Support I64->I32 casts in the NCG, and use them for I64->Integer conversions
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, lo, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    hi = TO_W_(val >> 32);
427    lo = TO_W_(val);
428
429    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(hi,0) ) {
444      neg = 1;
445      lo = -lo;
446      if(lo == 0) {
447        hi = -hi;
448      } else {
449        hi = -hi - 1;
450      }
451    }
452
453    if ( words_needed == 2 )  { 
454       s = 2;
455       Hp(-1) = lo;
456       Hp(0) = hi;
457    } else { 
458        if ( lo != 0 ) {
459            s = 1;
460            Hp(0) = lo;
461        } else /* val==0 */  {
462            s = 0;
463        }
464    }
465    if ( neg != 0 ) {
466         s = -s;
467    }
468
469    /* returns (# size  :: Int#, 
470                  data  :: ByteArray# #)
471    */
472    RET_NP(s,p);
473 }
474 word64ToIntegerzh_fast
475 {
476    /* arguments: L1 = Word64# */
477
478    L_ val;
479    W_ hi, lo, s, words_needed, p;
480
481    val = L1;
482    hi = TO_W_(val >> 32);
483    lo = TO_W_(val);
484
485    if ( hi != 0 ) {
486       words_needed = 2;
487    } else {
488       words_needed = 1;
489    }
490
491    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
492                NO_PTRS, word64ToIntegerzh_fast );
493
494    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
495    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
496    StgArrWords_words(p) = words_needed;
497
498    if ( hi != 0 ) { 
499      s = 2;
500      Hp(-1) = lo;
501      Hp(0)  = hi;
502    } else {
503       if ( lo != 0 ) {
504         s = 1;
505         Hp(0) = lo;
506      } else /* val==0 */  {
507       s = 0;
508      }
509   }
510
511    /* returns (# size  :: Int#, 
512                  data  :: ByteArray# #)
513    */
514    RET_NP(s,p);
515 }
516
517
518
519 #endif /* SUPPORT_LONG_LONGS */
520
521 /* ToDo: this is shockingly inefficient */
522
523 #ifndef THREADED_RTS
524 section "bss" {
525   mp_tmp1:
526     bits8 [SIZEOF_MP_INT];
527 }
528
529 section "bss" {
530   mp_tmp2:
531     bits8 [SIZEOF_MP_INT];
532 }
533
534 section "bss" {
535   mp_result1:
536     bits8 [SIZEOF_MP_INT];
537 }
538
539 section "bss" {
540   mp_result2:
541     bits8 [SIZEOF_MP_INT];
542 }
543 #endif
544
545 #ifdef THREADED_RTS
546 #define FETCH_MP_TEMP(X) \
547 W_ X; \
548 X = BaseReg + (OFFSET_StgRegTable_r ## X);
549 #else
550 #define FETCH_MP_TEMP(X) /* Nothing */
551 #endif
552
553 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
554 name                                                                    \
555 {                                                                       \
556   CInt s1, s2;                                                          \
557   W_ d1, d2;                                                            \
558   FETCH_MP_TEMP(mp_tmp1);                                               \
559   FETCH_MP_TEMP(mp_tmp2);                                               \
560   FETCH_MP_TEMP(mp_result1)                                             \
561   FETCH_MP_TEMP(mp_result2);                                            \
562                                                                         \
563   /* call doYouWantToGC() */                                            \
564   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
565                                                                         \
566   s1 = W_TO_INT(R1);                                                    \
567   d1 = R2;                                                              \
568   s2 = W_TO_INT(R3);                                                    \
569   d2 = R4;                                                              \
570                                                                         \
571   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
572   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
573   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
574   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
575   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
576   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
577                                                                         \
578   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
579                                                                         \
580   /* Perform the operation */                                           \
581   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
582                                                                         \
583   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
584          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
585 }
586
587 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
588 name                                                                    \
589 {                                                                       \
590   CInt s1;                                                              \
591   W_ d1;                                                                \
592   FETCH_MP_TEMP(mp_tmp1);                                               \
593   FETCH_MP_TEMP(mp_result1)                                             \
594                                                                         \
595   /* call doYouWantToGC() */                                            \
596   MAYBE_GC(R2_PTR, name);                                               \
597                                                                         \
598   d1 = R2;                                                              \
599   s1 = W_TO_INT(R1);                                                    \
600                                                                         \
601   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
602   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
603   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
604                                                                         \
605   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
606                                                                         \
607   /* Perform the operation */                                           \
608   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
609                                                                         \
610   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
611          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
612 }
613
614 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
615 name                                                                                    \
616 {                                                                                       \
617   CInt s1, s2;                                                                          \
618   W_ d1, d2;                                                                            \
619   FETCH_MP_TEMP(mp_tmp1);                                                               \
620   FETCH_MP_TEMP(mp_tmp2);                                                               \
621   FETCH_MP_TEMP(mp_result1)                                                             \
622   FETCH_MP_TEMP(mp_result2)                                                             \
623                                                                                         \
624   /* call doYouWantToGC() */                                                            \
625   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
626                                                                                         \
627   s1 = W_TO_INT(R1);                                                                    \
628   d1 = R2;                                                                              \
629   s2 = W_TO_INT(R3);                                                                    \
630   d2 = R4;                                                                              \
631                                                                                         \
632   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
633   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
634   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
635   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
636   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
637   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
638                                                                                         \
639   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
640   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
641                                                                                         \
642   /* Perform the operation */                                                           \
643   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
644                                                                                         \
645   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
646            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
647            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
648            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
649 }
650
651 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
652 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
653 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
654 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
655 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
656 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
657 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
658 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
659 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
660 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
661 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
662
663 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
664 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
665
666 #ifndef THREADED_RTS
667 section "bss" {
668   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
669 }
670 #endif
671
672 gcdIntzh_fast
673 {
674     /* R1 = the first Int#; R2 = the second Int# */
675     W_ r; 
676     FETCH_MP_TEMP(mp_tmp_w);
677
678     W_[mp_tmp_w] = R1;
679     r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
680
681     R1 = r;
682     /* Result parked in R1, return via info-pointer at TOS */
683     jump %ENTRY_CODE(Sp(0));
684 }
685
686
687 gcdIntegerIntzh_fast
688 {
689     /* R1 = s1; R2 = d1; R3 = the int */
690     R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
691     
692     /* Result parked in R1, return via info-pointer at TOS */
693     jump %ENTRY_CODE(Sp(0));
694 }
695
696
697 cmpIntegerIntzh_fast
698 {
699     /* R1 = s1; R2 = d1; R3 = the int */
700     W_ usize, vsize, v_digit, u_digit;
701
702     usize = R1;
703     vsize = 0;
704     v_digit = R3;
705
706     // paraphrased from __gmpz_cmp_si() in the GMP sources
707     if (%gt(v_digit,0)) {
708         vsize = 1;
709     } else { 
710         if (%lt(v_digit,0)) {
711             vsize = -1;
712             v_digit = -v_digit;
713         }
714     }
715
716     if (usize != vsize) {
717         R1 = usize - vsize; 
718         jump %ENTRY_CODE(Sp(0));
719     }
720
721     if (usize == 0) {
722         R1 = 0; 
723         jump %ENTRY_CODE(Sp(0));
724     }
725
726     u_digit = W_[BYTE_ARR_CTS(R2)];
727
728     if (u_digit == v_digit) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
734         R1 = usize; 
735     } else {
736         R1 = -usize; 
737     }
738
739     jump %ENTRY_CODE(Sp(0));
740 }
741
742 cmpIntegerzh_fast
743 {
744     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
745     W_ usize, vsize, size, up, vp;
746     CInt cmp;
747
748     // paraphrased from __gmpz_cmp() in the GMP sources
749     usize = R1;
750     vsize = R3;
751
752     if (usize != vsize) {
753         R1 = usize - vsize; 
754         jump %ENTRY_CODE(Sp(0));
755     }
756
757     if (usize == 0) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(usize,0)) { // NB. not <, which is unsigned
763         size = -usize;
764     } else {
765         size = usize;
766     }
767
768     up = BYTE_ARR_CTS(R2);
769     vp = BYTE_ARR_CTS(R4);
770
771     cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
772
773     if (cmp == 0 :: CInt) {
774         R1 = 0; 
775         jump %ENTRY_CODE(Sp(0));
776     }
777
778     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
779         R1 = 1;
780     } else {
781         R1 = (-1); 
782     }
783     /* Result parked in R1, return via info-pointer at TOS */
784     jump %ENTRY_CODE(Sp(0));
785 }
786
787 integer2Intzh_fast
788 {
789     /* R1 = s; R2 = d */
790     W_ r, s;
791
792     s = R1;
793     if (s == 0) {
794         r = 0;
795     } else {
796         r = W_[R2 + SIZEOF_StgArrWords];
797         if (%lt(s,0)) {
798             r = -r;
799         }
800     }
801     /* Result parked in R1, return via info-pointer at TOS */
802     R1 = r;
803     jump %ENTRY_CODE(Sp(0));
804 }
805
806 integer2Wordzh_fast
807 {
808   /* R1 = s; R2 = d */
809   W_ r, s;
810
811   s = R1;
812   if (s == 0) {
813     r = 0;
814   } else {
815     r = W_[R2 + SIZEOF_StgArrWords];
816     if (%lt(s,0)) {
817         r = -r;
818     }
819   }
820   /* Result parked in R1, return via info-pointer at TOS */
821   R1 = r;
822   jump %ENTRY_CODE(Sp(0));
823 }
824
825 decodeFloatzh_fast
826
827     W_ p;
828     F_ arg;
829     FETCH_MP_TEMP(mp_tmp1);
830     FETCH_MP_TEMP(mp_tmp_w);
831     
832     /* arguments: F1 = Float# */
833     arg = F1;
834     
835     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
836     
837     /* Be prepared to tell Lennart-coded __decodeFloat
838        where mantissa._mp_d can be put (it does not care about the rest) */
839     p = Hp - SIZEOF_StgArrWords;
840     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
841     StgArrWords_words(p) = 1;
842     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
843     
844     /* Perform the operation */
845     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
846     
847     /* returns: (Int# (expn), Int#, ByteArray#) */
848     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
849 }
850
851 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
852 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
853
854 decodeDoublezh_fast
855
856     D_ arg;
857     W_ p;
858     FETCH_MP_TEMP(mp_tmp1);
859     FETCH_MP_TEMP(mp_tmp_w);
860
861     /* arguments: D1 = Double# */
862     arg = D1;
863
864     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
865     
866     /* Be prepared to tell Lennart-coded __decodeDouble
867        where mantissa.d can be put (it does not care about the rest) */
868     p = Hp - ARR_SIZE + WDS(1);
869     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
870     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
871     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
872
873     /* Perform the operation */
874     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
875     
876     /* returns: (Int# (expn), Int#, ByteArray#) */
877     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
878 }
879
880 /* -----------------------------------------------------------------------------
881  * Concurrency primitives
882  * -------------------------------------------------------------------------- */
883
884 forkzh_fast
885 {
886   /* args: R1 = closure to spark */
887
888   MAYBE_GC(R1_PTR, forkzh_fast);
889
890   W_ closure;
891   W_ threadid;
892   closure = R1;
893
894   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
895                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
896                                 closure "ptr") [];
897   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
898
899   // switch at the earliest opportunity
900   CInt[context_switch] = 1 :: CInt;
901   
902   RET_P(threadid);
903 }
904
905 forkOnzh_fast
906 {
907   /* args: R1 = cpu, R2 = closure to spark */
908
909   MAYBE_GC(R2_PTR, forkOnzh_fast);
910
911   W_ cpu;
912   W_ closure;
913   W_ threadid;
914   cpu = R1;
915   closure = R2;
916
917   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
918                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
919                                 closure "ptr") [];
920   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
921
922   // switch at the earliest opportunity
923   CInt[context_switch] = 1 :: CInt;
924   
925   RET_P(threadid);
926 }
927
928 yieldzh_fast
929 {
930   jump stg_yield_noregs;
931 }
932
933 myThreadIdzh_fast
934 {
935   /* no args. */
936   RET_P(CurrentTSO);
937 }
938
939 labelThreadzh_fast
940 {
941   /* args: 
942         R1 = ThreadId#
943         R2 = Addr# */
944 #ifdef DEBUG
945   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
946 #endif
947   jump %ENTRY_CODE(Sp(0));
948 }
949
950 isCurrentThreadBoundzh_fast
951 {
952   /* no args */
953   W_ r;
954   r = foreign "C" isThreadBound(CurrentTSO) [];
955   RET_N(r);
956 }
957
958
959 /* -----------------------------------------------------------------------------
960  * TVar primitives
961  * -------------------------------------------------------------------------- */
962
963 #ifdef REG_R1
964 #define SP_OFF 0
965 #define IF_NOT_REG_R1(x) 
966 #else
967 #define SP_OFF 1
968 #define IF_NOT_REG_R1(x) x
969 #endif
970
971 // Catch retry frame ------------------------------------------------------------
972
973 #define CATCH_RETRY_FRAME_ERROR(label) \
974   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
975
976 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
977 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
978 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
979 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
980 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
981 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
982 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
983 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
984
985 #if MAX_VECTORED_RTN > 8
986 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
987 #endif
988
989 #if defined(PROFILING)
990 #define CATCH_RETRY_FRAME_BITMAP 7
991 #define CATCH_RETRY_FRAME_WORDS  5
992 #else
993 #define CATCH_RETRY_FRAME_BITMAP 1
994 #define CATCH_RETRY_FRAME_WORDS  3
995 #endif
996
997 INFO_TABLE_RET(stg_catch_retry_frame,
998                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
999                CATCH_RETRY_FRAME,
1000                stg_catch_retry_frame_0_ret,
1001                stg_catch_retry_frame_1_ret,
1002                stg_catch_retry_frame_2_ret,
1003                stg_catch_retry_frame_3_ret,
1004                stg_catch_retry_frame_4_ret,
1005                stg_catch_retry_frame_5_ret,
1006                stg_catch_retry_frame_6_ret,
1007                stg_catch_retry_frame_7_ret)
1008 {
1009    W_ r, frame, trec, outer;
1010    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1011
1012    frame = Sp;
1013    trec = StgTSO_trec(CurrentTSO);
1014    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1015    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1016    if (r != 0) {
1017      /* Succeeded (either first branch or second branch) */
1018      StgTSO_trec(CurrentTSO) = outer;
1019      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1020      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1021      jump %ENTRY_CODE(Sp(SP_OFF));
1022    } else {
1023      /* Did not commit: re-execute */
1024      W_ new_trec;
1025      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1026      StgTSO_trec(CurrentTSO) = new_trec;
1027      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1028        R1 = StgCatchRetryFrame_alt_code(frame);
1029      } else {
1030        R1 = StgCatchRetryFrame_first_code(frame);
1031      }
1032      jump stg_ap_v_fast;
1033    }
1034 }
1035
1036
1037 // Atomically frame -------------------------------------------------------------
1038
1039
1040 #define ATOMICALLY_FRAME_ERROR(label) \
1041   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1042
1043 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1044 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1045 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1046 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1047 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1048 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1049 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1050 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1051
1052 #if MAX_VECTORED_RTN > 8
1053 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1054 #endif
1055
1056 #if defined(PROFILING)
1057 #define ATOMICALLY_FRAME_BITMAP 3
1058 #define ATOMICALLY_FRAME_WORDS  4
1059 #else
1060 #define ATOMICALLY_FRAME_BITMAP 0
1061 #define ATOMICALLY_FRAME_WORDS  2
1062 #endif
1063
1064
1065 INFO_TABLE_RET(stg_atomically_frame,
1066                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1067                ATOMICALLY_FRAME,
1068                stg_atomically_frame_0_ret,
1069                stg_atomically_frame_1_ret,
1070                stg_atomically_frame_2_ret,
1071                stg_atomically_frame_3_ret,
1072                stg_atomically_frame_4_ret,
1073                stg_atomically_frame_5_ret,
1074                stg_atomically_frame_6_ret,
1075                stg_atomically_frame_7_ret)
1076 {
1077   W_ frame, trec, valid, next_invariant, q, outer;
1078   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1079
1080   frame = Sp;
1081   trec = StgTSO_trec(CurrentTSO);
1082   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1083
1084   if (outer == NO_TREC) {
1085     /* First time back at the atomically frame -- pick up invariants */
1086     "ptr" q = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1087     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1088
1089   } else {
1090     /* Second/subsequent time back at the atomically frame -- abort the
1091      * tx that's checking the invariant and move on to the next one */
1092     StgTSO_trec(CurrentTSO) = outer;
1093     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1094     StgInvariantCheckQueue_my_execution(q) = trec;
1095     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1096     /* Don't free trec -- it's linked from q and will be stashed in the
1097      * invariant if we eventually commit. */
1098     q = StgInvariantCheckQueue_next_queue_entry(q);
1099     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1100     trec = outer;
1101   }
1102
1103   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1104
1105   if (q != END_INVARIANT_CHECK_QUEUE) {
1106     /* We can't commit yet: another invariant to check */
1107     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1108     StgTSO_trec(CurrentTSO) = trec;
1109
1110     next_invariant = StgInvariantCheckQueue_invariant(q);
1111     R1 = StgAtomicInvariant_code(next_invariant);
1112     jump stg_ap_v_fast;
1113
1114   } else {
1115
1116     /* We've got no more invariants to check, try to commit */
1117     valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1118     if (valid != 0) {
1119       /* Transaction was valid: commit succeeded */
1120       StgTSO_trec(CurrentTSO) = NO_TREC;
1121       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1122       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1123       jump %ENTRY_CODE(Sp(SP_OFF));
1124     } else {
1125       /* Transaction was not valid: try again */
1126       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1127       StgTSO_trec(CurrentTSO) = trec;
1128       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1129       R1 = StgAtomicallyFrame_code(frame);
1130       jump stg_ap_v_fast;
1131     }
1132   }
1133 }
1134
1135 INFO_TABLE_RET(stg_atomically_waiting_frame,
1136                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1137                ATOMICALLY_FRAME,
1138                stg_atomically_frame_0_ret,
1139                stg_atomically_frame_1_ret,
1140                stg_atomically_frame_2_ret,
1141                stg_atomically_frame_3_ret,
1142                stg_atomically_frame_4_ret,
1143                stg_atomically_frame_5_ret,
1144                stg_atomically_frame_6_ret,
1145                stg_atomically_frame_7_ret)
1146 {
1147   W_ frame, trec, valid;
1148   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1149
1150   frame = Sp;
1151
1152   /* The TSO is currently waiting: should we stop waiting? */
1153   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1154   if (valid != 0) {
1155     /* Previous attempt is still valid: no point trying again yet */
1156           IF_NOT_REG_R1(Sp_adj(-2);
1157                         Sp(1) = stg_NO_FINALIZER_closure;
1158                         Sp(0) = stg_ut_1_0_unreg_info;)
1159     jump stg_block_noregs;
1160   } else {
1161     /* Previous attempt is no longer valid: try again */
1162     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1163     StgTSO_trec(CurrentTSO) = trec;
1164     StgHeader_info(frame) = stg_atomically_frame_info;
1165     R1 = StgAtomicallyFrame_code(frame);
1166     jump stg_ap_v_fast;
1167   }
1168 }
1169
1170 // STM catch frame --------------------------------------------------------------
1171
1172 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)                                               \
1173    label                                                                                        \
1174    {                                                                                            \
1175       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )                                       \
1176       W_ r, frame, trec, outer;                                                                 \
1177       frame = Sp;                                                                               \
1178       trec = StgTSO_trec(CurrentTSO);                                                           \
1179       "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];                             \
1180       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];          \
1181       if (r != 0) {                                                                             \
1182         /* Commit succeeded */                                                                  \
1183         StgTSO_trec(CurrentTSO) = outer;                                                        \
1184         Sp = Sp + SIZEOF_StgCatchSTMFrame;                                                      \
1185         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                                \
1186         jump ret;                                                                               \
1187       } else {                                                                                  \
1188         /* Commit failed */                                                                     \
1189         W_ new_trec;                                                                            \
1190         "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; \
1191         StgTSO_trec(CurrentTSO) = new_trec;                                                     \
1192         R1 = StgCatchSTMFrame_code(frame);                                                      \
1193         jump stg_ap_v_fast;                                                                     \
1194       }                                                                                         \
1195    }
1196
1197 #ifdef REG_R1
1198 #define SP_OFF 0
1199 #else
1200 #define SP_OFF 1
1201 #endif
1202
1203 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1204 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1205 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1206 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1207 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1208 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1209 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1210 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1211
1212 #if MAX_VECTORED_RTN > 8
1213 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1214 #endif
1215
1216 #if defined(PROFILING)
1217 #define CATCH_STM_FRAME_BITMAP 3
1218 #define CATCH_STM_FRAME_WORDS  4
1219 #else
1220 #define CATCH_STM_FRAME_BITMAP 0
1221 #define CATCH_STM_FRAME_WORDS  2
1222 #endif
1223
1224 /* Catch frames are very similar to update frames, but when entering
1225  * one we just pop the frame off the stack and perform the correct
1226  * kind of return to the activation record underneath us on the stack.
1227  */
1228
1229 INFO_TABLE_RET(stg_catch_stm_frame,
1230                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1231                CATCH_STM_FRAME,
1232                stg_catch_stm_frame_0_ret,
1233                stg_catch_stm_frame_1_ret,
1234                stg_catch_stm_frame_2_ret,
1235                stg_catch_stm_frame_3_ret,
1236                stg_catch_stm_frame_4_ret,
1237                stg_catch_stm_frame_5_ret,
1238                stg_catch_stm_frame_6_ret,
1239                stg_catch_stm_frame_7_ret)
1240 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1241
1242
1243 // Primop definition ------------------------------------------------------------
1244
1245 atomicallyzh_fast
1246 {
1247   W_ frame;
1248   W_ old_trec;
1249   W_ new_trec;
1250   
1251   // stmStartTransaction may allocate
1252   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1253
1254   /* Args: R1 = m :: STM a */
1255   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1256
1257   old_trec = StgTSO_trec(CurrentTSO);
1258
1259   /* Nested transactions are not allowed; raise an exception */
1260   if (old_trec != NO_TREC) {
1261      R1 = base_GHCziIOBase_NestedAtomically_closure;
1262      jump raisezh_fast;
1263   }
1264
1265   /* Set up the atomically frame */
1266   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1267   frame = Sp;
1268
1269   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1270   StgAtomicallyFrame_code(frame) = R1;
1271   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1272
1273   /* Start the memory transcation */
1274   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1275   StgTSO_trec(CurrentTSO) = new_trec;
1276
1277   /* Apply R1 to the realworld token */
1278   jump stg_ap_v_fast;
1279 }
1280
1281
1282 catchSTMzh_fast
1283 {
1284   W_ frame;
1285   
1286   /* Args: R1 :: STM a */
1287   /* Args: R2 :: Exception -> STM a */
1288   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1289
1290   /* Set up the catch frame */
1291   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1292   frame = Sp;
1293
1294   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1295   StgCatchSTMFrame_handler(frame) = R2;
1296   StgCatchSTMFrame_code(frame) = R1;
1297
1298   /* Start a nested transaction to run the body of the try block in */
1299   W_ cur_trec;  
1300   W_ new_trec;
1301   cur_trec = StgTSO_trec(CurrentTSO);
1302   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1303   StgTSO_trec(CurrentTSO) = new_trec;
1304
1305   /* Apply R1 to the realworld token */
1306   jump stg_ap_v_fast;
1307 }
1308
1309
1310 catchRetryzh_fast
1311 {
1312   W_ frame;
1313   W_ new_trec;
1314   W_ trec;
1315
1316   // stmStartTransaction may allocate
1317   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1318
1319   /* Args: R1 :: STM a */
1320   /* Args: R2 :: STM a */
1321   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1322
1323   /* Start a nested transaction within which to run the first code */
1324   trec = StgTSO_trec(CurrentTSO);
1325   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1326   StgTSO_trec(CurrentTSO) = new_trec;
1327
1328   /* Set up the catch-retry frame */
1329   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1330   frame = Sp;
1331   
1332   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1333   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1334   StgCatchRetryFrame_first_code(frame) = R1;
1335   StgCatchRetryFrame_alt_code(frame) = R2;
1336
1337   /* Apply R1 to the realworld token */
1338   jump stg_ap_v_fast;
1339 }
1340
1341
1342 retryzh_fast
1343 {
1344   W_ frame_type;
1345   W_ frame;
1346   W_ trec;
1347   W_ outer;
1348   W_ r;
1349
1350   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1351
1352   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1353 retry_pop_stack:
1354   StgTSO_sp(CurrentTSO) = Sp;
1355   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1356   Sp = StgTSO_sp(CurrentTSO);
1357   frame = Sp;
1358   trec = StgTSO_trec(CurrentTSO);
1359   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1360
1361   if (frame_type == CATCH_RETRY_FRAME) {
1362     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1363     ASSERT(outer != NO_TREC);
1364     // Abort the transaction attempting the current branch
1365     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1366     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1367     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1368       // Retry in the first branch: try the alternative
1369       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1370       StgTSO_trec(CurrentTSO) = trec;
1371       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1372       R1 = StgCatchRetryFrame_alt_code(frame);
1373       jump stg_ap_v_fast;
1374     } else {
1375       // Retry in the alternative code: propagate the retry
1376       StgTSO_trec(CurrentTSO) = outer;
1377       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1378       goto retry_pop_stack;
1379     }
1380   }
1381
1382   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1383   ASSERT(frame_type == ATOMICALLY_FRAME);
1384   if (outer != NO_TREC) {
1385     // We called retry while checking invariants, so abort the current
1386     // invariant check (merging its TVar accesses into the parents read
1387     // set so we'll wait on them)
1388     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1389     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1390     trec = outer;
1391      StgTSO_trec(CurrentTSO) = trec;
1392     "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1393   }
1394   ASSERT(outer == NO_TREC);
1395
1396   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1397   if (r != 0) {
1398     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1399     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1400     Sp = frame;
1401     // Fix up the stack in the unregisterised case: the return convention is different.
1402     IF_NOT_REG_R1(Sp_adj(-2); 
1403                   Sp(1) = stg_NO_FINALIZER_closure;
1404                   Sp(0) = stg_ut_1_0_unreg_info;)
1405     R3 = trec; // passing to stmWaitUnblock()
1406     jump stg_block_stmwait;
1407   } else {
1408     // Transaction was not valid: retry immediately
1409     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1410     StgTSO_trec(CurrentTSO) = trec;
1411     R1 = StgAtomicallyFrame_code(frame);
1412     Sp = frame;
1413     jump stg_ap_v_fast;
1414   }
1415 }
1416
1417
1418 checkzh_fast
1419 {
1420   W_ trec, closure;
1421
1422   /* Args: R1 = invariant closure */
1423   MAYBE_GC (R1_PTR, checkzh_fast); 
1424
1425   trec = StgTSO_trec(CurrentTSO);
1426   closure = R1;
1427   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1428                                      trec "ptr",
1429                                      closure "ptr") [];
1430
1431   jump %ENTRY_CODE(Sp(0));
1432 }
1433
1434
1435 newTVarzh_fast
1436 {
1437   W_ tv;
1438   W_ new_value;
1439
1440   /* Args: R1 = initialisation value */
1441
1442   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1443   new_value = R1;
1444   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1445   RET_P(tv);
1446 }
1447
1448
1449 readTVarzh_fast
1450 {
1451   W_ trec;
1452   W_ tvar;
1453   W_ result;
1454
1455   /* Args: R1 = TVar closure */
1456
1457   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1458   trec = StgTSO_trec(CurrentTSO);
1459   tvar = R1;
1460   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1461
1462   RET_P(result);
1463 }
1464
1465
1466 writeTVarzh_fast
1467 {
1468   W_ trec;
1469   W_ tvar;
1470   W_ new_value;
1471   
1472   /* Args: R1 = TVar closure */
1473   /*       R2 = New value    */
1474
1475   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1476   trec = StgTSO_trec(CurrentTSO);
1477   tvar = R1;
1478   new_value = R2;
1479   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1480
1481   jump %ENTRY_CODE(Sp(0));
1482 }
1483
1484
1485 /* -----------------------------------------------------------------------------
1486  * MVar primitives
1487  *
1488  * take & putMVar work as follows.  Firstly, an important invariant:
1489  *
1490  *    If the MVar is full, then the blocking queue contains only
1491  *    threads blocked on putMVar, and if the MVar is empty then the
1492  *    blocking queue contains only threads blocked on takeMVar.
1493  *
1494  * takeMvar:
1495  *    MVar empty : then add ourselves to the blocking queue
1496  *    MVar full  : remove the value from the MVar, and
1497  *                 blocking queue empty     : return
1498  *                 blocking queue non-empty : perform the first blocked putMVar
1499  *                                            from the queue, and wake up the
1500  *                                            thread (MVar is now full again)
1501  *
1502  * putMVar is just the dual of the above algorithm.
1503  *
1504  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1505  * the stack of the thread waiting to do the putMVar.  See
1506  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1507  * the stack layout, and the PerformPut and PerformTake macros below.
1508  *
1509  * It is important that a blocked take or put is woken up with the
1510  * take/put already performed, because otherwise there would be a
1511  * small window of vulnerability where the thread could receive an
1512  * exception and never perform its take or put, and we'd end up with a
1513  * deadlock.
1514  *
1515  * -------------------------------------------------------------------------- */
1516
1517 isEmptyMVarzh_fast
1518 {
1519     /* args: R1 = MVar closure */
1520
1521     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1522         RET_N(1);
1523     } else {
1524         RET_N(0);
1525     }
1526 }
1527
1528 newMVarzh_fast
1529 {
1530     /* args: none */
1531     W_ mvar;
1532
1533     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1534   
1535     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1536     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1537     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1538     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1539     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1540     RET_P(mvar);
1541 }
1542
1543
1544 /* If R1 isn't available, pass it on the stack */
1545 #ifdef REG_R1
1546 #define PerformTake(tso, value)                         \
1547     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1548     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1549 #else
1550 #define PerformTake(tso, value)                                 \
1551     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1552     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1553 #endif
1554
1555 #define PerformPut(tso,lval)                    \
1556     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1557     lval = W_[StgTSO_sp(tso) - WDS(1)];
1558
1559 takeMVarzh_fast
1560 {
1561     W_ mvar, val, info, tso;
1562
1563     /* args: R1 = MVar closure */
1564     mvar = R1;
1565
1566 #if defined(THREADED_RTS)
1567     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1568 #else
1569     info = GET_INFO(mvar);
1570 #endif
1571
1572     /* If the MVar is empty, put ourselves on its blocking queue,
1573      * and wait until we're woken up.
1574      */
1575     if (info == stg_EMPTY_MVAR_info) {
1576         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1577             StgMVar_head(mvar) = CurrentTSO;
1578         } else {
1579             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1580         }
1581         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1582         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1583         StgTSO_block_info(CurrentTSO)  = mvar;
1584         StgMVar_tail(mvar) = CurrentTSO;
1585         
1586         jump stg_block_takemvar;
1587   }
1588
1589   /* we got the value... */
1590   val = StgMVar_value(mvar);
1591
1592   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1593   {
1594       /* There are putMVar(s) waiting... 
1595        * wake up the first thread on the queue
1596        */
1597       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1598
1599       /* actually perform the putMVar for the thread that we just woke up */
1600       tso = StgMVar_head(mvar);
1601       PerformPut(tso,StgMVar_value(mvar));
1602       dirtyTSO(tso);
1603
1604 #if defined(GRAN) || defined(PAR)
1605       /* ToDo: check 2nd arg (mvar) is right */
1606       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1607       StgMVar_head(mvar) = tso;
1608 #else
1609       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1610                                          StgMVar_head(mvar) "ptr") [];
1611       StgMVar_head(mvar) = tso;
1612 #endif
1613
1614       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1615           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1616       }
1617
1618 #if defined(THREADED_RTS)
1619       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1620 #endif
1621       RET_P(val);
1622   } 
1623   else
1624   {
1625       /* No further putMVars, MVar is now empty */
1626       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1627  
1628 #if defined(THREADED_RTS)
1629       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1630 #else
1631       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1632 #endif
1633
1634       RET_P(val);
1635   }
1636 }
1637
1638
1639 tryTakeMVarzh_fast
1640 {
1641     W_ mvar, val, info, tso;
1642
1643     /* args: R1 = MVar closure */
1644
1645     mvar = R1;
1646
1647 #if defined(THREADED_RTS)
1648     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1649 #else
1650     info = GET_INFO(mvar);
1651 #endif
1652
1653     if (info == stg_EMPTY_MVAR_info) {
1654 #if defined(THREADED_RTS)
1655         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1656 #endif
1657         /* HACK: we need a pointer to pass back, 
1658          * so we abuse NO_FINALIZER_closure
1659          */
1660         RET_NP(0, stg_NO_FINALIZER_closure);
1661     }
1662
1663     /* we got the value... */
1664     val = StgMVar_value(mvar);
1665
1666     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1667
1668         /* There are putMVar(s) waiting... 
1669          * wake up the first thread on the queue
1670          */
1671         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1672
1673         /* actually perform the putMVar for the thread that we just woke up */
1674         tso = StgMVar_head(mvar);
1675         PerformPut(tso,StgMVar_value(mvar));
1676         dirtyTSO(tso);
1677
1678 #if defined(GRAN) || defined(PAR)
1679         /* ToDo: check 2nd arg (mvar) is right */
1680         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1681         StgMVar_head(mvar) = tso;
1682 #else
1683         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1684                                            StgMVar_head(mvar) "ptr") [];
1685         StgMVar_head(mvar) = tso;
1686 #endif
1687
1688         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1689             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1690         }
1691 #if defined(THREADED_RTS)
1692         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1693 #endif
1694     }
1695     else 
1696     {
1697         /* No further putMVars, MVar is now empty */
1698         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1699 #if defined(THREADED_RTS)
1700         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1701 #else
1702         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1703 #endif
1704     }
1705     
1706     RET_NP(1, val);
1707 }
1708
1709
1710 putMVarzh_fast
1711 {
1712     W_ mvar, info, tso;
1713
1714     /* args: R1 = MVar, R2 = value */
1715     mvar = R1;
1716
1717 #if defined(THREADED_RTS)
1718     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1719 #else
1720     info = GET_INFO(mvar);
1721 #endif
1722
1723     if (info == stg_FULL_MVAR_info) {
1724         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1725             StgMVar_head(mvar) = CurrentTSO;
1726         } else {
1727             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1728         }
1729         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1730         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1731         StgTSO_block_info(CurrentTSO)  = mvar;
1732         StgMVar_tail(mvar) = CurrentTSO;
1733         
1734         jump stg_block_putmvar;
1735     }
1736   
1737     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1738
1739         /* There are takeMVar(s) waiting: wake up the first one
1740          */
1741         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1742
1743         /* actually perform the takeMVar */
1744         tso = StgMVar_head(mvar);
1745         PerformTake(tso, R2);
1746         dirtyTSO(tso);
1747       
1748 #if defined(GRAN) || defined(PAR)
1749         /* ToDo: check 2nd arg (mvar) is right */
1750         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1751         StgMVar_head(mvar) = tso;
1752 #else
1753         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1754         StgMVar_head(mvar) = tso;
1755 #endif
1756
1757         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1758             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1759         }
1760
1761 #if defined(THREADED_RTS)
1762         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1763 #endif
1764         jump %ENTRY_CODE(Sp(0));
1765     }
1766     else
1767     {
1768         /* No further takes, the MVar is now full. */
1769         StgMVar_value(mvar) = R2;
1770
1771 #if defined(THREADED_RTS)
1772         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1773 #else
1774         SET_INFO(mvar,stg_FULL_MVAR_info);
1775 #endif
1776         jump %ENTRY_CODE(Sp(0));
1777     }
1778     
1779     /* ToDo: yield afterward for better communication performance? */
1780 }
1781
1782
1783 tryPutMVarzh_fast
1784 {
1785     W_ mvar, info, tso;
1786
1787     /* args: R1 = MVar, R2 = value */
1788     mvar = R1;
1789
1790 #if defined(THREADED_RTS)
1791     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1792 #else
1793     info = GET_INFO(mvar);
1794 #endif
1795
1796     if (info == stg_FULL_MVAR_info) {
1797 #if defined(THREADED_RTS)
1798         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1799 #endif
1800         RET_N(0);
1801     }
1802   
1803     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1804
1805         /* There are takeMVar(s) waiting: wake up the first one
1806          */
1807         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1808         
1809         /* actually perform the takeMVar */
1810         tso = StgMVar_head(mvar);
1811         PerformTake(tso, R2);
1812         dirtyTSO(tso);
1813       
1814 #if defined(GRAN) || defined(PAR)
1815         /* ToDo: check 2nd arg (mvar) is right */
1816         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1817         StgMVar_head(mvar) = tso;
1818 #else
1819         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1820         StgMVar_head(mvar) = tso;
1821 #endif
1822
1823         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1824             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1825         }
1826
1827 #if defined(THREADED_RTS)
1828         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1829 #endif
1830     }
1831     else
1832     {
1833         /* No further takes, the MVar is now full. */
1834         StgMVar_value(mvar) = R2;
1835
1836 #if defined(THREADED_RTS)
1837         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1838 #else
1839         SET_INFO(mvar,stg_FULL_MVAR_info);
1840 #endif
1841     }
1842     
1843     RET_N(1);
1844     /* ToDo: yield afterward for better communication performance? */
1845 }
1846
1847
1848 /* -----------------------------------------------------------------------------
1849    Stable pointer primitives
1850    -------------------------------------------------------------------------  */
1851
1852 makeStableNamezh_fast
1853 {
1854     W_ index, sn_obj;
1855
1856     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1857   
1858     index = foreign "C" lookupStableName(R1 "ptr") [];
1859
1860     /* Is there already a StableName for this heap object?
1861      *  stable_ptr_table is a pointer to an array of snEntry structs.
1862      */
1863     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1864         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1865         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1866         StgStableName_sn(sn_obj) = index;
1867         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1868     } else {
1869         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1870     }
1871     
1872     RET_P(sn_obj);
1873 }
1874
1875
1876 makeStablePtrzh_fast
1877 {
1878     /* Args: R1 = a */
1879     W_ sp;
1880     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1881     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1882     RET_N(sp);
1883 }
1884
1885 deRefStablePtrzh_fast
1886 {
1887     /* Args: R1 = the stable ptr */
1888     W_ r, sp;
1889     sp = R1;
1890     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1891     RET_P(r);
1892 }
1893
1894 /* -----------------------------------------------------------------------------
1895    Bytecode object primitives
1896    -------------------------------------------------------------------------  */
1897
1898 newBCOzh_fast
1899 {
1900     /* R1 = instrs
1901        R2 = literals
1902        R3 = ptrs
1903        R4 = itbls
1904        R5 = arity
1905        R6 = bitmap array
1906     */
1907     W_ bco, bitmap_arr, bytes, words;
1908     
1909     bitmap_arr = R6;
1910     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1911     bytes = WDS(words);
1912
1913     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1914
1915     bco = Hp - bytes + WDS(1);
1916     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1917     
1918     StgBCO_instrs(bco)     = R1;
1919     StgBCO_literals(bco)   = R2;
1920     StgBCO_ptrs(bco)       = R3;
1921     StgBCO_itbls(bco)      = R4;
1922     StgBCO_arity(bco)      = HALF_W_(R5);
1923     StgBCO_size(bco)       = HALF_W_(words);
1924     
1925     // Copy the arity/bitmap info into the BCO
1926     W_ i;
1927     i = 0;
1928 for:
1929     if (i < StgArrWords_words(bitmap_arr)) {
1930         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1931         i = i + 1;
1932         goto for;
1933     }
1934     
1935     RET_P(bco);
1936 }
1937
1938
1939 mkApUpd0zh_fast
1940 {
1941     // R1 = the BCO# for the AP
1942     //  
1943     W_ ap;
1944
1945     // This function is *only* used to wrap zero-arity BCOs in an
1946     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1947     // saturated and always points directly to a FUN or BCO.
1948     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1949            StgBCO_arity(R1) == HALF_W_(0));
1950
1951     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1952     TICK_ALLOC_UP_THK(0, 0);
1953     CCCS_ALLOC(SIZEOF_StgAP);
1954
1955     ap = Hp - SIZEOF_StgAP + WDS(1);
1956     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1957     
1958     StgAP_n_args(ap) = HALF_W_(0);
1959     StgAP_fun(ap) = R1;
1960     
1961     RET_P(ap);
1962 }
1963
1964 /* -----------------------------------------------------------------------------
1965    Thread I/O blocking primitives
1966    -------------------------------------------------------------------------- */
1967
1968 /* Add a thread to the end of the blocked queue. (C-- version of the C
1969  * macro in Schedule.h).
1970  */
1971 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1972     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1973     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1974       W_[blocked_queue_hd] = tso;                       \
1975     } else {                                            \
1976       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1977     }                                                   \
1978     W_[blocked_queue_tl] = tso;
1979
1980 waitReadzh_fast
1981 {
1982     /* args: R1 */
1983 #ifdef THREADED_RTS
1984     foreign "C" barf("waitRead# on threaded RTS");
1985 #else
1986
1987     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1988     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1989     StgTSO_block_info(CurrentTSO) = R1;
1990     // No locking - we're not going to use this interface in the
1991     // threaded RTS anyway.
1992     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1993     jump stg_block_noregs;
1994 #endif
1995 }
1996
1997 waitWritezh_fast
1998 {
1999     /* args: R1 */
2000 #ifdef THREADED_RTS
2001     foreign "C" barf("waitWrite# on threaded RTS");
2002 #else
2003
2004     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2005     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2006     StgTSO_block_info(CurrentTSO) = R1;
2007     // No locking - we're not going to use this interface in the
2008     // threaded RTS anyway.
2009     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2010     jump stg_block_noregs;
2011 #endif
2012 }
2013
2014
2015 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2016 delayzh_fast
2017 {
2018 #ifdef mingw32_HOST_OS
2019     W_ ares;
2020     CInt reqID;
2021 #else
2022     W_ t, prev, target;
2023 #endif
2024
2025 #ifdef THREADED_RTS
2026     foreign "C" barf("delay# on threaded RTS");
2027 #else
2028
2029     /* args: R1 (microsecond delay amount) */
2030     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2031     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2032
2033 #ifdef mingw32_HOST_OS
2034
2035     /* could probably allocate this on the heap instead */
2036     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2037                                             stg_delayzh_malloc_str);
2038     reqID = foreign "C" addDelayRequest(R1);
2039     StgAsyncIOResult_reqID(ares)   = reqID;
2040     StgAsyncIOResult_len(ares)     = 0;
2041     StgAsyncIOResult_errCode(ares) = 0;
2042     StgTSO_block_info(CurrentTSO)  = ares;
2043
2044     /* Having all async-blocked threads reside on the blocked_queue
2045      * simplifies matters, so change the status to OnDoProc put the
2046      * delayed thread on the blocked_queue.
2047      */
2048     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2049     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2050     jump stg_block_async_void;
2051
2052 #else
2053
2054     W_ time;
2055     time = foreign "C" getourtimeofday() [R1];
2056     target = (R1 / (TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000)) + time;
2057     StgTSO_block_info(CurrentTSO) = target;
2058
2059     /* Insert the new thread in the sleeping queue. */
2060     prev = NULL;
2061     t = W_[sleeping_queue];
2062 while:
2063     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2064         prev = t;
2065         t = StgTSO_link(t);
2066         goto while;
2067     }
2068
2069     StgTSO_link(CurrentTSO) = t;
2070     if (prev == NULL) {
2071         W_[sleeping_queue] = CurrentTSO;
2072     } else {
2073         StgTSO_link(prev) = CurrentTSO;
2074     }
2075     jump stg_block_noregs;
2076 #endif
2077 #endif /* !THREADED_RTS */
2078 }
2079
2080
2081 #ifdef mingw32_HOST_OS
2082 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2083 asyncReadzh_fast
2084 {
2085     W_ ares;
2086     CInt reqID;
2087
2088 #ifdef THREADED_RTS
2089     foreign "C" barf("asyncRead# on threaded RTS");
2090 #else
2091
2092     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2093     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2094     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2095
2096     /* could probably allocate this on the heap instead */
2097     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2098                                             stg_asyncReadzh_malloc_str)
2099                         [R1,R2,R3,R4];
2100     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2101     StgAsyncIOResult_reqID(ares)   = reqID;
2102     StgAsyncIOResult_len(ares)     = 0;
2103     StgAsyncIOResult_errCode(ares) = 0;
2104     StgTSO_block_info(CurrentTSO)  = ares;
2105     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2106     jump stg_block_async;
2107 #endif
2108 }
2109
2110 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2111 asyncWritezh_fast
2112 {
2113     W_ ares;
2114     CInt reqID;
2115
2116 #ifdef THREADED_RTS
2117     foreign "C" barf("asyncWrite# on threaded RTS");
2118 #else
2119
2120     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2121     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2122     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2123
2124     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2125                                             stg_asyncWritezh_malloc_str)
2126                         [R1,R2,R3,R4];
2127     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2128
2129     StgAsyncIOResult_reqID(ares)   = reqID;
2130     StgAsyncIOResult_len(ares)     = 0;
2131     StgAsyncIOResult_errCode(ares) = 0;
2132     StgTSO_block_info(CurrentTSO)  = ares;
2133     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2134     jump stg_block_async;
2135 #endif
2136 }
2137
2138 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2139 asyncDoProczh_fast
2140 {
2141     W_ ares;
2142     CInt reqID;
2143
2144 #ifdef THREADED_RTS
2145     foreign "C" barf("asyncDoProc# on threaded RTS");
2146 #else
2147
2148     /* args: R1 = proc, R2 = param */
2149     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2150     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2151
2152     /* could probably allocate this on the heap instead */
2153     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2154                                             stg_asyncDoProczh_malloc_str) 
2155                                 [R1,R2];
2156     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2157     StgAsyncIOResult_reqID(ares)   = reqID;
2158     StgAsyncIOResult_len(ares)     = 0;
2159     StgAsyncIOResult_errCode(ares) = 0;
2160     StgTSO_block_info(CurrentTSO) = ares;
2161     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2162     jump stg_block_async;
2163 #endif
2164 }
2165 #endif
2166
2167 /* -----------------------------------------------------------------------------
2168   ** temporary **
2169
2170    classes CCallable and CReturnable don't really exist, but the
2171    compiler insists on generating dictionaries containing references
2172    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2173    for these.  Some C compilers can't cope with zero-length static arrays,
2174    so we have to make these one element long.
2175   --------------------------------------------------------------------------- */
2176
2177 section "rodata" {
2178   GHC_ZCCCallable_static_info:   W_ 0;
2179 }
2180
2181 section "rodata" {
2182   GHC_ZCCReturnable_static_info: W_ 0;
2183 }