annotate C-- calls that do not return
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 import __gmpz_init;
32 import __gmpz_add;
33 import __gmpz_sub;
34 import __gmpz_mul;
35 import __gmpz_gcd;
36 import __gmpn_gcd_1;
37 import __gmpn_cmp;
38 import __gmpz_tdiv_q;
39 import __gmpz_tdiv_r;
40 import __gmpz_tdiv_qr;
41 import __gmpz_fdiv_qr;
42 import __gmpz_divexact;
43 import __gmpz_and;
44 import __gmpz_xor;
45 import __gmpz_ior;
46 import __gmpz_com;
47 import base_GHCziIOBase_NestedAtomically_closure;
48 import pthread_mutex_lock;
49 import pthread_mutex_unlock;
50 #endif
51
52 /*-----------------------------------------------------------------------------
53   Array Primitives
54
55   Basically just new*Array - the others are all inline macros.
56
57   The size arg is always passed in R1, and the result returned in R1.
58
59   The slow entry point is for returning from a heap check, the saved
60   size argument must be re-loaded from the stack.
61   -------------------------------------------------------------------------- */
62
63 /* for objects that are *less* than the size of a word, make sure we
64  * round up to the nearest word for the size of the array.
65  */
66
67 newByteArrayzh_fast
68 {
69     W_ words, payload_words, n, p;
70     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
71     n = R1;
72     payload_words = ROUNDUP_BYTES_TO_WDS(n);
73     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
74     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
75     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
76     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
77     StgArrWords_words(p) = payload_words;
78     RET_P(p);
79 }
80
81 newPinnedByteArrayzh_fast
82 {
83     W_ words, payload_words, n, p;
84
85     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
86     n = R1;
87     payload_words = ROUNDUP_BYTES_TO_WDS(n);
88
89     // We want an 8-byte aligned array.  allocatePinned() gives us
90     // 8-byte aligned memory by default, but we want to align the
91     // *goods* inside the ArrWords object, so we have to check the
92     // size of the ArrWords header and adjust our size accordingly.
93     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
94     if ((SIZEOF_StgArrWords & 7) != 0) {
95         words = words + 1;
96     }
97
98     ("ptr" p) = foreign "C" allocatePinned(words) [];
99     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
100
101     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
102     // have to push the object forward one word so that the goods
103     // fall on an 8-byte boundary.
104     if ((SIZEOF_StgArrWords & 7) != 0) {
105         p = p + WDS(1);
106     }
107
108     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
109     StgArrWords_words(p) = payload_words;
110     RET_P(p);
111 }
112
113 newArrayzh_fast
114 {
115     W_ words, n, init, arr, p;
116     /* Args: R1 = words, R2 = initialisation value */
117
118     n = R1;
119     MAYBE_GC(R2_PTR,newArrayzh_fast);
120
121     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
122     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
123     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
124
125     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
126     StgMutArrPtrs_ptrs(arr) = n;
127
128     // Initialise all elements of the the array with the value in R2
129     init = R2;
130     p = arr + SIZEOF_StgMutArrPtrs;
131   for:
132     if (p < arr + WDS(words)) {
133         W_[p] = init;
134         p = p + WDS(1);
135         goto for;
136     }
137
138     RET_P(arr);
139 }
140
141 unsafeThawArrayzh_fast
142 {
143   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
144   //
145   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
146   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
147   // it on the mutable list for the GC to remove (removing something from
148   // the mutable list is not easy, because the mut_list is only singly-linked).
149   // 
150   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
151   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
152   // to indicate that it is still on the mutable list.
153   //
154   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
155   // either it is on a mut_list, or it isn't.  We adopt the convention that
156   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
157   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
158   // we put it on the mutable list more than once, but it would get scavenged
159   // multiple times during GC, which would be unnecessarily slow.
160   //
161   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
162         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
163         recordMutable(R1, R1);
164         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
165         RET_P(R1);
166   } else {
167         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
168         RET_P(R1);
169   }
170 }
171
172 /* -----------------------------------------------------------------------------
173    MutVar primitives
174    -------------------------------------------------------------------------- */
175
176 newMutVarzh_fast
177 {
178     W_ mv;
179     /* Args: R1 = initialisation value */
180
181     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
182
183     mv = Hp - SIZEOF_StgMutVar + WDS(1);
184     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
185     StgMutVar_var(mv) = R1;
186     
187     RET_P(mv);
188 }
189
190 atomicModifyMutVarzh_fast
191 {
192     W_ mv, z, x, y, r;
193     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
194
195     /* If x is the current contents of the MutVar#, then 
196        We want to make the new contents point to
197
198          (sel_0 (f x))
199  
200        and the return value is
201          
202          (sel_1 (f x))
203
204         obviously we can share (f x).
205
206          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
207          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
208          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
209     */
210
211 #if MIN_UPD_SIZE > 1
212 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
213 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
214 #else
215 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
216 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
217 #endif
218
219 #if MIN_UPD_SIZE > 2
220 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
221 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
222 #else
223 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
224 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
225 #endif
226
227 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
228
229    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
230
231 #if defined(THREADED_RTS)
232     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
233 #endif
234
235    x = StgMutVar_var(R1);
236
237    TICK_ALLOC_THUNK_2();
238    CCCS_ALLOC(THUNK_2_SIZE);
239    z = Hp - THUNK_2_SIZE + WDS(1);
240    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
241    LDV_RECORD_CREATE(z);
242    StgThunk_payload(z,0) = R2;
243    StgThunk_payload(z,1) = x;
244
245    TICK_ALLOC_THUNK_1();
246    CCCS_ALLOC(THUNK_1_SIZE);
247    y = z - THUNK_1_SIZE;
248    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
249    LDV_RECORD_CREATE(y);
250    StgThunk_payload(y,0) = z;
251
252    StgMutVar_var(R1) = y;
253    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
254
255    TICK_ALLOC_THUNK_1();
256    CCCS_ALLOC(THUNK_1_SIZE);
257    r = y - THUNK_1_SIZE;
258    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
259    LDV_RECORD_CREATE(r);
260    StgThunk_payload(r,0) = z;
261
262 #if defined(THREADED_RTS)
263     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
264 #endif
265
266    RET_P(r);
267 }
268
269 /* -----------------------------------------------------------------------------
270    Weak Pointer Primitives
271    -------------------------------------------------------------------------- */
272
273 STRING(stg_weak_msg,"New weak pointer at %p\n")
274
275 mkWeakzh_fast
276 {
277   /* R1 = key
278      R2 = value
279      R3 = finalizer (or NULL)
280   */
281   W_ w;
282
283   if (R3 == NULL) {
284     R3 = stg_NO_FINALIZER_closure;
285   }
286
287   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
288
289   w = Hp - SIZEOF_StgWeak + WDS(1);
290   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
291
292   StgWeak_key(w)       = R1;
293   StgWeak_value(w)     = R2;
294   StgWeak_finalizer(w) = R3;
295
296   StgWeak_link(w)       = W_[weak_ptr_list];
297   W_[weak_ptr_list]     = w;
298
299   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
300
301   RET_P(w);
302 }
303
304
305 finalizzeWeakzh_fast
306 {
307   /* R1 = weak ptr
308    */
309   W_ w, f;
310
311   w = R1;
312
313   // already dead?
314   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
315       RET_NP(0,stg_NO_FINALIZER_closure);
316   }
317
318   // kill it
319 #ifdef PROFILING
320   // @LDV profiling
321   // A weak pointer is inherently used, so we do not need to call
322   // LDV_recordDead_FILL_SLOP_DYNAMIC():
323   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
324   // or, LDV_recordDead():
325   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
326   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
327   // large as weak pointers, so there is no need to fill the slop, either.
328   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
329 #endif
330
331   //
332   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
333   //
334   SET_INFO(w,stg_DEAD_WEAK_info);
335   LDV_RECORD_CREATE(w);
336
337   f = StgWeak_finalizer(w);
338   StgDeadWeak_link(w) = StgWeak_link(w);
339
340   /* return the finalizer */
341   if (f == stg_NO_FINALIZER_closure) {
342       RET_NP(0,stg_NO_FINALIZER_closure);
343   } else {
344       RET_NP(1,f);
345   }
346 }
347
348 deRefWeakzh_fast
349 {
350   /* R1 = weak ptr */
351   W_ w, code, val;
352
353   w = R1;
354   if (GET_INFO(w) == stg_WEAK_info) {
355     code = 1;
356     val = StgWeak_value(w);
357   } else {
358     code = 0;
359     val = w;
360   }
361   RET_NP(code,val);
362 }
363
364 /* -----------------------------------------------------------------------------
365    Arbitrary-precision Integer operations.
366
367    There are some assumptions in this code that mp_limb_t == W_.  This is
368    the case for all the platforms that GHC supports, currently.
369    -------------------------------------------------------------------------- */
370
371 int2Integerzh_fast
372 {
373    /* arguments: R1 = Int# */
374
375    W_ val, s, p;        /* to avoid aliasing */
376
377    val = R1;
378    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
379
380    p = Hp - SIZEOF_StgArrWords;
381    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
382    StgArrWords_words(p) = 1;
383
384    /* mpz_set_si is inlined here, makes things simpler */
385    if (%lt(val,0)) { 
386         s  = -1;
387         Hp(0) = -val;
388    } else { 
389      if (%gt(val,0)) {
390         s = 1;
391         Hp(0) = val;
392      } else {
393         s = 0;
394      }
395   }
396
397    /* returns (# size  :: Int#, 
398                  data  :: ByteArray# 
399                #)
400    */
401    RET_NP(s,p);
402 }
403
404 word2Integerzh_fast
405 {
406    /* arguments: R1 = Word# */
407
408    W_ val, s, p;        /* to avoid aliasing */
409
410    val = R1;
411
412    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
413
414    p = Hp - SIZEOF_StgArrWords;
415    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
416    StgArrWords_words(p) = 1;
417
418    if (val != 0) {
419         s = 1;
420         W_[Hp] = val;
421    } else {
422         s = 0;
423    }
424
425    /* returns (# size  :: Int#, 
426                  data  :: ByteArray# #)
427    */
428    RET_NP(s,p);
429 }
430
431
432 /*
433  * 'long long' primops for converting to/from Integers.
434  */
435
436 #ifdef SUPPORT_LONG_LONGS
437
438 int64ToIntegerzh_fast
439 {
440    /* arguments: L1 = Int64# */
441
442    L_ val;
443    W_ hi, lo, s, neg, words_needed, p;
444
445    val = L1;
446    neg = 0;
447
448    hi = TO_W_(val >> 32);
449    lo = TO_W_(val);
450
451    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
452        words_needed = 2;
453    } else { 
454        // minimum is one word
455        words_needed = 1;
456    }
457
458    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
459                NO_PTRS, int64ToIntegerzh_fast );
460
461    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
462    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
463    StgArrWords_words(p) = words_needed;
464
465    if ( %lt(hi,0) ) {
466      neg = 1;
467      lo = -lo;
468      if(lo == 0) {
469        hi = -hi;
470      } else {
471        hi = -hi - 1;
472      }
473    }
474
475    if ( words_needed == 2 )  { 
476       s = 2;
477       Hp(-1) = lo;
478       Hp(0) = hi;
479    } else { 
480        if ( lo != 0 ) {
481            s = 1;
482            Hp(0) = lo;
483        } else /* val==0 */  {
484            s = 0;
485        }
486    }
487    if ( neg != 0 ) {
488         s = -s;
489    }
490
491    /* returns (# size  :: Int#, 
492                  data  :: ByteArray# #)
493    */
494    RET_NP(s,p);
495 }
496 word64ToIntegerzh_fast
497 {
498    /* arguments: L1 = Word64# */
499
500    L_ val;
501    W_ hi, lo, s, words_needed, p;
502
503    val = L1;
504    hi = TO_W_(val >> 32);
505    lo = TO_W_(val);
506
507    if ( hi != 0 ) {
508       words_needed = 2;
509    } else {
510       words_needed = 1;
511    }
512
513    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
514                NO_PTRS, word64ToIntegerzh_fast );
515
516    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
517    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
518    StgArrWords_words(p) = words_needed;
519
520    if ( hi != 0 ) { 
521      s = 2;
522      Hp(-1) = lo;
523      Hp(0)  = hi;
524    } else {
525       if ( lo != 0 ) {
526         s = 1;
527         Hp(0) = lo;
528      } else /* val==0 */  {
529       s = 0;
530      }
531   }
532
533    /* returns (# size  :: Int#, 
534                  data  :: ByteArray# #)
535    */
536    RET_NP(s,p);
537 }
538
539
540
541 #endif /* SUPPORT_LONG_LONGS */
542
543 /* ToDo: this is shockingly inefficient */
544
545 #ifndef THREADED_RTS
546 section "bss" {
547   mp_tmp1:
548     bits8 [SIZEOF_MP_INT];
549 }
550
551 section "bss" {
552   mp_tmp2:
553     bits8 [SIZEOF_MP_INT];
554 }
555
556 section "bss" {
557   mp_result1:
558     bits8 [SIZEOF_MP_INT];
559 }
560
561 section "bss" {
562   mp_result2:
563     bits8 [SIZEOF_MP_INT];
564 }
565 #endif
566
567 #ifdef THREADED_RTS
568 #define FETCH_MP_TEMP(X) \
569 W_ X; \
570 X = BaseReg + (OFFSET_StgRegTable_r ## X);
571 #else
572 #define FETCH_MP_TEMP(X) /* Nothing */
573 #endif
574
575 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
576 name                                                                    \
577 {                                                                       \
578   CInt s1, s2;                                                          \
579   W_ d1, d2;                                                            \
580   FETCH_MP_TEMP(mp_tmp1);                                               \
581   FETCH_MP_TEMP(mp_tmp2);                                               \
582   FETCH_MP_TEMP(mp_result1)                                             \
583   FETCH_MP_TEMP(mp_result2);                                            \
584                                                                         \
585   /* call doYouWantToGC() */                                            \
586   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
587                                                                         \
588   s1 = W_TO_INT(R1);                                                    \
589   d1 = R2;                                                              \
590   s2 = W_TO_INT(R3);                                                    \
591   d2 = R4;                                                              \
592                                                                         \
593   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
594   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
595   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
596   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
597   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
598   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
599                                                                         \
600   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
601                                                                         \
602   /* Perform the operation */                                           \
603   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
604                                                                         \
605   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
606          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
607 }
608
609 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
610 name                                                                    \
611 {                                                                       \
612   CInt s1;                                                              \
613   W_ d1;                                                                \
614   FETCH_MP_TEMP(mp_tmp1);                                               \
615   FETCH_MP_TEMP(mp_result1)                                             \
616                                                                         \
617   /* call doYouWantToGC() */                                            \
618   MAYBE_GC(R2_PTR, name);                                               \
619                                                                         \
620   d1 = R2;                                                              \
621   s1 = W_TO_INT(R1);                                                    \
622                                                                         \
623   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
624   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
625   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
626                                                                         \
627   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
628                                                                         \
629   /* Perform the operation */                                           \
630   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
631                                                                         \
632   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
633          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
634 }
635
636 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
637 name                                                                                    \
638 {                                                                                       \
639   CInt s1, s2;                                                                          \
640   W_ d1, d2;                                                                            \
641   FETCH_MP_TEMP(mp_tmp1);                                                               \
642   FETCH_MP_TEMP(mp_tmp2);                                                               \
643   FETCH_MP_TEMP(mp_result1)                                                             \
644   FETCH_MP_TEMP(mp_result2)                                                             \
645                                                                                         \
646   /* call doYouWantToGC() */                                                            \
647   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
648                                                                                         \
649   s1 = W_TO_INT(R1);                                                                    \
650   d1 = R2;                                                                              \
651   s2 = W_TO_INT(R3);                                                                    \
652   d2 = R4;                                                                              \
653                                                                                         \
654   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
655   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
656   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
657   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
658   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
659   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
660                                                                                         \
661   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
662   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
663                                                                                         \
664   /* Perform the operation */                                                           \
665   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
666                                                                                         \
667   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
668            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
669            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
670            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
671 }
672
673 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
674 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
675 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
676 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
677 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
678 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
679 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
680 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
681 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
682 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
683 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
684
685 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
686 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
687
688 #ifndef THREADED_RTS
689 section "bss" {
690   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
691 }
692 #endif
693
694 gcdIntzh_fast
695 {
696     /* R1 = the first Int#; R2 = the second Int# */
697     W_ r; 
698     FETCH_MP_TEMP(mp_tmp_w);
699
700     W_[mp_tmp_w] = R1;
701     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
702
703     R1 = r;
704     /* Result parked in R1, return via info-pointer at TOS */
705     jump %ENTRY_CODE(Sp(0));
706 }
707
708
709 gcdIntegerIntzh_fast
710 {
711     /* R1 = s1; R2 = d1; R3 = the int */
712     W_ s1;
713     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
714     R1 = s1;
715     
716     /* Result parked in R1, return via info-pointer at TOS */
717     jump %ENTRY_CODE(Sp(0));
718 }
719
720
721 cmpIntegerIntzh_fast
722 {
723     /* R1 = s1; R2 = d1; R3 = the int */
724     W_ usize, vsize, v_digit, u_digit;
725
726     usize = R1;
727     vsize = 0;
728     v_digit = R3;
729
730     // paraphrased from __gmpz_cmp_si() in the GMP sources
731     if (%gt(v_digit,0)) {
732         vsize = 1;
733     } else { 
734         if (%lt(v_digit,0)) {
735             vsize = -1;
736             v_digit = -v_digit;
737         }
738     }
739
740     if (usize != vsize) {
741         R1 = usize - vsize; 
742         jump %ENTRY_CODE(Sp(0));
743     }
744
745     if (usize == 0) {
746         R1 = 0; 
747         jump %ENTRY_CODE(Sp(0));
748     }
749
750     u_digit = W_[BYTE_ARR_CTS(R2)];
751
752     if (u_digit == v_digit) {
753         R1 = 0; 
754         jump %ENTRY_CODE(Sp(0));
755     }
756
757     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
758         R1 = usize; 
759     } else {
760         R1 = -usize; 
761     }
762
763     jump %ENTRY_CODE(Sp(0));
764 }
765
766 cmpIntegerzh_fast
767 {
768     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
769     W_ usize, vsize, size, up, vp;
770     CInt cmp;
771
772     // paraphrased from __gmpz_cmp() in the GMP sources
773     usize = R1;
774     vsize = R3;
775
776     if (usize != vsize) {
777         R1 = usize - vsize; 
778         jump %ENTRY_CODE(Sp(0));
779     }
780
781     if (usize == 0) {
782         R1 = 0; 
783         jump %ENTRY_CODE(Sp(0));
784     }
785
786     if (%lt(usize,0)) { // NB. not <, which is unsigned
787         size = -usize;
788     } else {
789         size = usize;
790     }
791
792     up = BYTE_ARR_CTS(R2);
793     vp = BYTE_ARR_CTS(R4);
794
795     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
796
797     if (cmp == 0 :: CInt) {
798         R1 = 0; 
799         jump %ENTRY_CODE(Sp(0));
800     }
801
802     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
803         R1 = 1;
804     } else {
805         R1 = (-1); 
806     }
807     /* Result parked in R1, return via info-pointer at TOS */
808     jump %ENTRY_CODE(Sp(0));
809 }
810
811 integer2Intzh_fast
812 {
813     /* R1 = s; R2 = d */
814     W_ r, s;
815
816     s = R1;
817     if (s == 0) {
818         r = 0;
819     } else {
820         r = W_[R2 + SIZEOF_StgArrWords];
821         if (%lt(s,0)) {
822             r = -r;
823         }
824     }
825     /* Result parked in R1, return via info-pointer at TOS */
826     R1 = r;
827     jump %ENTRY_CODE(Sp(0));
828 }
829
830 integer2Wordzh_fast
831 {
832   /* R1 = s; R2 = d */
833   W_ r, s;
834
835   s = R1;
836   if (s == 0) {
837     r = 0;
838   } else {
839     r = W_[R2 + SIZEOF_StgArrWords];
840     if (%lt(s,0)) {
841         r = -r;
842     }
843   }
844   /* Result parked in R1, return via info-pointer at TOS */
845   R1 = r;
846   jump %ENTRY_CODE(Sp(0));
847 }
848
849 decodeFloatzh_fast
850
851     W_ p;
852     F_ arg;
853     FETCH_MP_TEMP(mp_tmp1);
854     FETCH_MP_TEMP(mp_tmp_w);
855     
856     /* arguments: F1 = Float# */
857     arg = F1;
858     
859     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
860     
861     /* Be prepared to tell Lennart-coded __decodeFloat
862        where mantissa._mp_d can be put (it does not care about the rest) */
863     p = Hp - SIZEOF_StgArrWords;
864     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
865     StgArrWords_words(p) = 1;
866     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
867     
868     /* Perform the operation */
869     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
870     
871     /* returns: (Int# (expn), Int#, ByteArray#) */
872     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
873 }
874
875 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
876 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
877
878 decodeDoublezh_fast
879
880     D_ arg;
881     W_ p;
882     FETCH_MP_TEMP(mp_tmp1);
883     FETCH_MP_TEMP(mp_tmp_w);
884
885     /* arguments: D1 = Double# */
886     arg = D1;
887
888     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
889     
890     /* Be prepared to tell Lennart-coded __decodeDouble
891        where mantissa.d can be put (it does not care about the rest) */
892     p = Hp - ARR_SIZE + WDS(1);
893     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
894     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
895     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
896
897     /* Perform the operation */
898     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
899     
900     /* returns: (Int# (expn), Int#, ByteArray#) */
901     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
902 }
903
904 /* -----------------------------------------------------------------------------
905  * Concurrency primitives
906  * -------------------------------------------------------------------------- */
907
908 forkzh_fast
909 {
910   /* args: R1 = closure to spark */
911
912   MAYBE_GC(R1_PTR, forkzh_fast);
913
914   W_ closure;
915   W_ threadid;
916   closure = R1;
917
918   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
919                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
920                                 closure "ptr") [];
921   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
922
923   // switch at the earliest opportunity
924   CInt[context_switch] = 1 :: CInt;
925   
926   RET_P(threadid);
927 }
928
929 forkOnzh_fast
930 {
931   /* args: R1 = cpu, R2 = closure to spark */
932
933   MAYBE_GC(R2_PTR, forkOnzh_fast);
934
935   W_ cpu;
936   W_ closure;
937   W_ threadid;
938   cpu = R1;
939   closure = R2;
940
941   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
942                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
943                                 closure "ptr") [];
944   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
945
946   // switch at the earliest opportunity
947   CInt[context_switch] = 1 :: CInt;
948   
949   RET_P(threadid);
950 }
951
952 yieldzh_fast
953 {
954   jump stg_yield_noregs;
955 }
956
957 myThreadIdzh_fast
958 {
959   /* no args. */
960   RET_P(CurrentTSO);
961 }
962
963 labelThreadzh_fast
964 {
965   /* args: 
966         R1 = ThreadId#
967         R2 = Addr# */
968 #ifdef DEBUG
969   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
970 #endif
971   jump %ENTRY_CODE(Sp(0));
972 }
973
974 isCurrentThreadBoundzh_fast
975 {
976   /* no args */
977   W_ r;
978   (r) = foreign "C" isThreadBound(CurrentTSO) [];
979   RET_N(r);
980 }
981
982
983 /* -----------------------------------------------------------------------------
984  * TVar primitives
985  * -------------------------------------------------------------------------- */
986
987 #ifdef REG_R1
988 #define SP_OFF 0
989 #define IF_NOT_REG_R1(x) 
990 #else
991 #define SP_OFF 1
992 #define IF_NOT_REG_R1(x) x
993 #endif
994
995 // Catch retry frame ------------------------------------------------------------
996
997 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
998 #if defined(PROFILING)
999   W_ unused1, W_ unused2,
1000 #endif
1001   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1002 {
1003    W_ r, frame, trec, outer;
1004    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1005
1006    frame = Sp;
1007    trec = StgTSO_trec(CurrentTSO);
1008    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1009    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1010    if (r != 0) {
1011      /* Succeeded (either first branch or second branch) */
1012      StgTSO_trec(CurrentTSO) = outer;
1013      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1014      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1015      jump %ENTRY_CODE(Sp(SP_OFF));
1016    } else {
1017      /* Did not commit: re-execute */
1018      W_ new_trec;
1019      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1020      StgTSO_trec(CurrentTSO) = new_trec;
1021      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1022        R1 = StgCatchRetryFrame_alt_code(frame);
1023      } else {
1024        R1 = StgCatchRetryFrame_first_code(frame);
1025      }
1026      jump stg_ap_v_fast;
1027    }
1028 }
1029
1030
1031 // Atomically frame ------------------------------------------------------------
1032
1033 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1034 #if defined(PROFILING)
1035   W_ unused1, W_ unused2,
1036 #endif
1037   "ptr" W_ unused3, "ptr" W_ unused4)
1038 {
1039   W_ frame, trec, valid, next_invariant, q, outer;
1040   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1041
1042   frame = Sp;
1043   trec = StgTSO_trec(CurrentTSO);
1044   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1045
1046   if (outer == NO_TREC) {
1047     /* First time back at the atomically frame -- pick up invariants */
1048     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1049     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1050
1051   } else {
1052     /* Second/subsequent time back at the atomically frame -- abort the
1053      * tx that's checking the invariant and move on to the next one */
1054     StgTSO_trec(CurrentTSO) = outer;
1055     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1056     StgInvariantCheckQueue_my_execution(q) = trec;
1057     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1058     /* Don't free trec -- it's linked from q and will be stashed in the
1059      * invariant if we eventually commit. */
1060     q = StgInvariantCheckQueue_next_queue_entry(q);
1061     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1062     trec = outer;
1063   }
1064
1065   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1066
1067   if (q != END_INVARIANT_CHECK_QUEUE) {
1068     /* We can't commit yet: another invariant to check */
1069     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1070     StgTSO_trec(CurrentTSO) = trec;
1071
1072     next_invariant = StgInvariantCheckQueue_invariant(q);
1073     R1 = StgAtomicInvariant_code(next_invariant);
1074     jump stg_ap_v_fast;
1075
1076   } else {
1077
1078     /* We've got no more invariants to check, try to commit */
1079     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1080     if (valid != 0) {
1081       /* Transaction was valid: commit succeeded */
1082       StgTSO_trec(CurrentTSO) = NO_TREC;
1083       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1084       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1085       jump %ENTRY_CODE(Sp(SP_OFF));
1086     } else {
1087       /* Transaction was not valid: try again */
1088       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1089       StgTSO_trec(CurrentTSO) = trec;
1090       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1091       R1 = StgAtomicallyFrame_code(frame);
1092       jump stg_ap_v_fast;
1093     }
1094   }
1095 }
1096
1097 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1098 #if defined(PROFILING)
1099   W_ unused1, W_ unused2,
1100 #endif
1101   "ptr" W_ unused3, "ptr" W_ unused4)
1102 {
1103   W_ frame, trec, valid;
1104   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1105
1106   frame = Sp;
1107
1108   /* The TSO is currently waiting: should we stop waiting? */
1109   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1110   if (valid != 0) {
1111     /* Previous attempt is still valid: no point trying again yet */
1112           IF_NOT_REG_R1(Sp_adj(-2);
1113                         Sp(1) = stg_NO_FINALIZER_closure;
1114                         Sp(0) = stg_ut_1_0_unreg_info;)
1115     jump stg_block_noregs;
1116   } else {
1117     /* Previous attempt is no longer valid: try again */
1118     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1119     StgTSO_trec(CurrentTSO) = trec;
1120     StgHeader_info(frame) = stg_atomically_frame_info;
1121     R1 = StgAtomicallyFrame_code(frame);
1122     jump stg_ap_v_fast;
1123   }
1124 }
1125
1126 // STM catch frame --------------------------------------------------------------
1127
1128 #ifdef REG_R1
1129 #define SP_OFF 0
1130 #else
1131 #define SP_OFF 1
1132 #endif
1133
1134 /* Catch frames are very similar to update frames, but when entering
1135  * one we just pop the frame off the stack and perform the correct
1136  * kind of return to the activation record underneath us on the stack.
1137  */
1138
1139 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1140 #if defined(PROFILING)
1141   W_ unused1, W_ unused2,
1142 #endif
1143   "ptr" W_ unused3, "ptr" W_ unused4)
1144    {
1145       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1146       W_ r, frame, trec, outer;
1147       frame = Sp;
1148       trec = StgTSO_trec(CurrentTSO);
1149       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1150       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1151       if (r != 0) {
1152         /* Commit succeeded */
1153         StgTSO_trec(CurrentTSO) = outer;
1154         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1155         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1156         jump Sp(SP_OFF);
1157       } else {
1158         /* Commit failed */
1159         W_ new_trec;
1160         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1161         StgTSO_trec(CurrentTSO) = new_trec;
1162         R1 = StgCatchSTMFrame_code(frame);
1163         jump stg_ap_v_fast;
1164       }
1165    }
1166
1167
1168 // Primop definition ------------------------------------------------------------
1169
1170 atomicallyzh_fast
1171 {
1172   W_ frame;
1173   W_ old_trec;
1174   W_ new_trec;
1175   
1176   // stmStartTransaction may allocate
1177   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1178
1179   /* Args: R1 = m :: STM a */
1180   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1181
1182   old_trec = StgTSO_trec(CurrentTSO);
1183
1184   /* Nested transactions are not allowed; raise an exception */
1185   if (old_trec != NO_TREC) {
1186      R1 = base_GHCziIOBase_NestedAtomically_closure;
1187      jump raisezh_fast;
1188   }
1189
1190   /* Set up the atomically frame */
1191   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1192   frame = Sp;
1193
1194   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1195   StgAtomicallyFrame_code(frame) = R1;
1196   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1197
1198   /* Start the memory transcation */
1199   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1200   StgTSO_trec(CurrentTSO) = new_trec;
1201
1202   /* Apply R1 to the realworld token */
1203   jump stg_ap_v_fast;
1204 }
1205
1206
1207 catchSTMzh_fast
1208 {
1209   W_ frame;
1210   
1211   /* Args: R1 :: STM a */
1212   /* Args: R2 :: Exception -> STM a */
1213   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1214
1215   /* Set up the catch frame */
1216   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1217   frame = Sp;
1218
1219   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1220   StgCatchSTMFrame_handler(frame) = R2;
1221   StgCatchSTMFrame_code(frame) = R1;
1222
1223   /* Start a nested transaction to run the body of the try block in */
1224   W_ cur_trec;  
1225   W_ new_trec;
1226   cur_trec = StgTSO_trec(CurrentTSO);
1227   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1228   StgTSO_trec(CurrentTSO) = new_trec;
1229
1230   /* Apply R1 to the realworld token */
1231   jump stg_ap_v_fast;
1232 }
1233
1234
1235 catchRetryzh_fast
1236 {
1237   W_ frame;
1238   W_ new_trec;
1239   W_ trec;
1240
1241   // stmStartTransaction may allocate
1242   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1243
1244   /* Args: R1 :: STM a */
1245   /* Args: R2 :: STM a */
1246   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1247
1248   /* Start a nested transaction within which to run the first code */
1249   trec = StgTSO_trec(CurrentTSO);
1250   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1251   StgTSO_trec(CurrentTSO) = new_trec;
1252
1253   /* Set up the catch-retry frame */
1254   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1255   frame = Sp;
1256   
1257   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1258   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1259   StgCatchRetryFrame_first_code(frame) = R1;
1260   StgCatchRetryFrame_alt_code(frame) = R2;
1261
1262   /* Apply R1 to the realworld token */
1263   jump stg_ap_v_fast;
1264 }
1265
1266
1267 retryzh_fast
1268 {
1269   W_ frame_type;
1270   W_ frame;
1271   W_ trec;
1272   W_ outer;
1273   W_ r;
1274
1275   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1276
1277   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1278 retry_pop_stack:
1279   StgTSO_sp(CurrentTSO) = Sp;
1280   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1281   Sp = StgTSO_sp(CurrentTSO);
1282   frame = Sp;
1283   trec = StgTSO_trec(CurrentTSO);
1284   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1285
1286   if (frame_type == CATCH_RETRY_FRAME) {
1287     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1288     ASSERT(outer != NO_TREC);
1289     // Abort the transaction attempting the current branch
1290     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1291     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1292     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1293       // Retry in the first branch: try the alternative
1294       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1295       StgTSO_trec(CurrentTSO) = trec;
1296       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1297       R1 = StgCatchRetryFrame_alt_code(frame);
1298       jump stg_ap_v_fast;
1299     } else {
1300       // Retry in the alternative code: propagate the retry
1301       StgTSO_trec(CurrentTSO) = outer;
1302       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1303       goto retry_pop_stack;
1304     }
1305   }
1306
1307   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1308   ASSERT(frame_type == ATOMICALLY_FRAME);
1309   if (outer != NO_TREC) {
1310     // We called retry while checking invariants, so abort the current
1311     // invariant check (merging its TVar accesses into the parents read
1312     // set so we'll wait on them)
1313     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1314     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1315     trec = outer;
1316     StgTSO_trec(CurrentTSO) = trec;
1317     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1318   }
1319   ASSERT(outer == NO_TREC);
1320
1321   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1322   if (r != 0) {
1323     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1324     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1325     Sp = frame;
1326     // Fix up the stack in the unregisterised case: the return convention is different.
1327     IF_NOT_REG_R1(Sp_adj(-2); 
1328                   Sp(1) = stg_NO_FINALIZER_closure;
1329                   Sp(0) = stg_ut_1_0_unreg_info;)
1330     R3 = trec; // passing to stmWaitUnblock()
1331     jump stg_block_stmwait;
1332   } else {
1333     // Transaction was not valid: retry immediately
1334     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1335     StgTSO_trec(CurrentTSO) = trec;
1336     R1 = StgAtomicallyFrame_code(frame);
1337     Sp = frame;
1338     jump stg_ap_v_fast;
1339   }
1340 }
1341
1342
1343 checkzh_fast
1344 {
1345   W_ trec, closure;
1346
1347   /* Args: R1 = invariant closure */
1348   MAYBE_GC (R1_PTR, checkzh_fast); 
1349
1350   trec = StgTSO_trec(CurrentTSO);
1351   closure = R1;
1352   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1353                                      trec "ptr",
1354                                      closure "ptr") [];
1355
1356   jump %ENTRY_CODE(Sp(0));
1357 }
1358
1359
1360 newTVarzh_fast
1361 {
1362   W_ tv;
1363   W_ new_value;
1364
1365   /* Args: R1 = initialisation value */
1366
1367   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1368   new_value = R1;
1369   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1370   RET_P(tv);
1371 }
1372
1373
1374 readTVarzh_fast
1375 {
1376   W_ trec;
1377   W_ tvar;
1378   W_ result;
1379
1380   /* Args: R1 = TVar closure */
1381
1382   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1383   trec = StgTSO_trec(CurrentTSO);
1384   tvar = R1;
1385   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1386
1387   RET_P(result);
1388 }
1389
1390
1391 writeTVarzh_fast
1392 {
1393   W_ trec;
1394   W_ tvar;
1395   W_ new_value;
1396   
1397   /* Args: R1 = TVar closure */
1398   /*       R2 = New value    */
1399
1400   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1401   trec = StgTSO_trec(CurrentTSO);
1402   tvar = R1;
1403   new_value = R2;
1404   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1405
1406   jump %ENTRY_CODE(Sp(0));
1407 }
1408
1409
1410 /* -----------------------------------------------------------------------------
1411  * MVar primitives
1412  *
1413  * take & putMVar work as follows.  Firstly, an important invariant:
1414  *
1415  *    If the MVar is full, then the blocking queue contains only
1416  *    threads blocked on putMVar, and if the MVar is empty then the
1417  *    blocking queue contains only threads blocked on takeMVar.
1418  *
1419  * takeMvar:
1420  *    MVar empty : then add ourselves to the blocking queue
1421  *    MVar full  : remove the value from the MVar, and
1422  *                 blocking queue empty     : return
1423  *                 blocking queue non-empty : perform the first blocked putMVar
1424  *                                            from the queue, and wake up the
1425  *                                            thread (MVar is now full again)
1426  *
1427  * putMVar is just the dual of the above algorithm.
1428  *
1429  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1430  * the stack of the thread waiting to do the putMVar.  See
1431  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1432  * the stack layout, and the PerformPut and PerformTake macros below.
1433  *
1434  * It is important that a blocked take or put is woken up with the
1435  * take/put already performed, because otherwise there would be a
1436  * small window of vulnerability where the thread could receive an
1437  * exception and never perform its take or put, and we'd end up with a
1438  * deadlock.
1439  *
1440  * -------------------------------------------------------------------------- */
1441
1442 isEmptyMVarzh_fast
1443 {
1444     /* args: R1 = MVar closure */
1445
1446     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1447         RET_N(1);
1448     } else {
1449         RET_N(0);
1450     }
1451 }
1452
1453 newMVarzh_fast
1454 {
1455     /* args: none */
1456     W_ mvar;
1457
1458     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1459   
1460     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1461     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1462     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1463     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1464     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1465     RET_P(mvar);
1466 }
1467
1468
1469 /* If R1 isn't available, pass it on the stack */
1470 #ifdef REG_R1
1471 #define PerformTake(tso, value)                         \
1472     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1473     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1474 #else
1475 #define PerformTake(tso, value)                                 \
1476     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1477     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1478 #endif
1479
1480 #define PerformPut(tso,lval)                    \
1481     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1482     lval = W_[StgTSO_sp(tso) - WDS(1)];
1483
1484 takeMVarzh_fast
1485 {
1486     W_ mvar, val, info, tso;
1487
1488     /* args: R1 = MVar closure */
1489     mvar = R1;
1490
1491 #if defined(THREADED_RTS)
1492     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1493 #else
1494     info = GET_INFO(mvar);
1495 #endif
1496
1497     /* If the MVar is empty, put ourselves on its blocking queue,
1498      * and wait until we're woken up.
1499      */
1500     if (info == stg_EMPTY_MVAR_info) {
1501         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1502             StgMVar_head(mvar) = CurrentTSO;
1503         } else {
1504             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1505         }
1506         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1507         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1508         StgTSO_block_info(CurrentTSO)  = mvar;
1509         StgMVar_tail(mvar) = CurrentTSO;
1510         
1511         jump stg_block_takemvar;
1512   }
1513
1514   /* we got the value... */
1515   val = StgMVar_value(mvar);
1516
1517   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1518   {
1519       /* There are putMVar(s) waiting... 
1520        * wake up the first thread on the queue
1521        */
1522       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1523
1524       /* actually perform the putMVar for the thread that we just woke up */
1525       tso = StgMVar_head(mvar);
1526       PerformPut(tso,StgMVar_value(mvar));
1527       dirtyTSO(tso);
1528
1529 #if defined(GRAN) || defined(PAR)
1530       /* ToDo: check 2nd arg (mvar) is right */
1531       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1532       StgMVar_head(mvar) = tso;
1533 #else
1534       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1535                                          StgMVar_head(mvar) "ptr") [];
1536       StgMVar_head(mvar) = tso;
1537 #endif
1538
1539       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1540           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1541       }
1542
1543 #if defined(THREADED_RTS)
1544       unlockClosure(mvar, stg_FULL_MVAR_info);
1545 #endif
1546       RET_P(val);
1547   } 
1548   else
1549   {
1550       /* No further putMVars, MVar is now empty */
1551       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1552  
1553 #if defined(THREADED_RTS)
1554       unlockClosure(mvar, stg_EMPTY_MVAR_info);
1555 #else
1556       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1557 #endif
1558
1559       RET_P(val);
1560   }
1561 }
1562
1563
1564 tryTakeMVarzh_fast
1565 {
1566     W_ mvar, val, info, tso;
1567
1568     /* args: R1 = MVar closure */
1569
1570     mvar = R1;
1571
1572 #if defined(THREADED_RTS)
1573     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1574 #else
1575     info = GET_INFO(mvar);
1576 #endif
1577
1578     if (info == stg_EMPTY_MVAR_info) {
1579 #if defined(THREADED_RTS)
1580         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1581 #endif
1582         /* HACK: we need a pointer to pass back, 
1583          * so we abuse NO_FINALIZER_closure
1584          */
1585         RET_NP(0, stg_NO_FINALIZER_closure);
1586     }
1587
1588     /* we got the value... */
1589     val = StgMVar_value(mvar);
1590
1591     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1592
1593         /* There are putMVar(s) waiting... 
1594          * wake up the first thread on the queue
1595          */
1596         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1597
1598         /* actually perform the putMVar for the thread that we just woke up */
1599         tso = StgMVar_head(mvar);
1600         PerformPut(tso,StgMVar_value(mvar));
1601         dirtyTSO(tso);
1602
1603 #if defined(GRAN) || defined(PAR)
1604         /* ToDo: check 2nd arg (mvar) is right */
1605         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1606         StgMVar_head(mvar) = tso;
1607 #else
1608         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1609                                            StgMVar_head(mvar) "ptr") [];
1610         StgMVar_head(mvar) = tso;
1611 #endif
1612
1613         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1614             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1615         }
1616 #if defined(THREADED_RTS)
1617         unlockClosure(mvar, stg_FULL_MVAR_info);
1618 #endif
1619     }
1620     else 
1621     {
1622         /* No further putMVars, MVar is now empty */
1623         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1624 #if defined(THREADED_RTS)
1625         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1626 #else
1627         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1628 #endif
1629     }
1630     
1631     RET_NP(1, val);
1632 }
1633
1634
1635 putMVarzh_fast
1636 {
1637     W_ mvar, info, tso;
1638
1639     /* args: R1 = MVar, R2 = value */
1640     mvar = R1;
1641
1642 #if defined(THREADED_RTS)
1643     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1644 #else
1645     info = GET_INFO(mvar);
1646 #endif
1647
1648     if (info == stg_FULL_MVAR_info) {
1649         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1650             StgMVar_head(mvar) = CurrentTSO;
1651         } else {
1652             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1653         }
1654         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1655         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1656         StgTSO_block_info(CurrentTSO)  = mvar;
1657         StgMVar_tail(mvar) = CurrentTSO;
1658         
1659         jump stg_block_putmvar;
1660     }
1661   
1662     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1663
1664         /* There are takeMVar(s) waiting: wake up the first one
1665          */
1666         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1667
1668         /* actually perform the takeMVar */
1669         tso = StgMVar_head(mvar);
1670         PerformTake(tso, R2);
1671         dirtyTSO(tso);
1672       
1673 #if defined(GRAN) || defined(PAR)
1674         /* ToDo: check 2nd arg (mvar) is right */
1675         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1676         StgMVar_head(mvar) = tso;
1677 #else
1678         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1679         StgMVar_head(mvar) = tso;
1680 #endif
1681
1682         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1683             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1684         }
1685
1686 #if defined(THREADED_RTS)
1687         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1688 #endif
1689         jump %ENTRY_CODE(Sp(0));
1690     }
1691     else
1692     {
1693         /* No further takes, the MVar is now full. */
1694         StgMVar_value(mvar) = R2;
1695
1696 #if defined(THREADED_RTS)
1697         unlockClosure(mvar, stg_FULL_MVAR_info);
1698 #else
1699         SET_INFO(mvar,stg_FULL_MVAR_info);
1700 #endif
1701         jump %ENTRY_CODE(Sp(0));
1702     }
1703     
1704     /* ToDo: yield afterward for better communication performance? */
1705 }
1706
1707
1708 tryPutMVarzh_fast
1709 {
1710     W_ mvar, info, tso;
1711
1712     /* args: R1 = MVar, R2 = value */
1713     mvar = R1;
1714
1715 #if defined(THREADED_RTS)
1716     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1717 #else
1718     info = GET_INFO(mvar);
1719 #endif
1720
1721     if (info == stg_FULL_MVAR_info) {
1722 #if defined(THREADED_RTS)
1723         unlockClosure(mvar, stg_FULL_MVAR_info);
1724 #endif
1725         RET_N(0);
1726     }
1727   
1728     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1729
1730         /* There are takeMVar(s) waiting: wake up the first one
1731          */
1732         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1733         
1734         /* actually perform the takeMVar */
1735         tso = StgMVar_head(mvar);
1736         PerformTake(tso, R2);
1737         dirtyTSO(tso);
1738       
1739 #if defined(GRAN) || defined(PAR)
1740         /* ToDo: check 2nd arg (mvar) is right */
1741         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1742         StgMVar_head(mvar) = tso;
1743 #else
1744         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1745         StgMVar_head(mvar) = tso;
1746 #endif
1747
1748         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1749             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1750         }
1751
1752 #if defined(THREADED_RTS)
1753         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1754 #endif
1755     }
1756     else
1757     {
1758         /* No further takes, the MVar is now full. */
1759         StgMVar_value(mvar) = R2;
1760
1761 #if defined(THREADED_RTS)
1762         unlockClosure(mvar, stg_FULL_MVAR_info);
1763 #else
1764         SET_INFO(mvar,stg_FULL_MVAR_info);
1765 #endif
1766     }
1767     
1768     RET_N(1);
1769     /* ToDo: yield afterward for better communication performance? */
1770 }
1771
1772
1773 /* -----------------------------------------------------------------------------
1774    Stable pointer primitives
1775    -------------------------------------------------------------------------  */
1776
1777 makeStableNamezh_fast
1778 {
1779     W_ index, sn_obj;
1780
1781     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1782   
1783     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1784
1785     /* Is there already a StableName for this heap object?
1786      *  stable_ptr_table is a pointer to an array of snEntry structs.
1787      */
1788     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1789         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1790         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1791         StgStableName_sn(sn_obj) = index;
1792         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1793     } else {
1794         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1795     }
1796     
1797     RET_P(sn_obj);
1798 }
1799
1800
1801 makeStablePtrzh_fast
1802 {
1803     /* Args: R1 = a */
1804     W_ sp;
1805     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1806     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1807     RET_N(sp);
1808 }
1809
1810 deRefStablePtrzh_fast
1811 {
1812     /* Args: R1 = the stable ptr */
1813     W_ r, sp;
1814     sp = R1;
1815     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1816     RET_P(r);
1817 }
1818
1819 /* -----------------------------------------------------------------------------
1820    Bytecode object primitives
1821    -------------------------------------------------------------------------  */
1822
1823 newBCOzh_fast
1824 {
1825     /* R1 = instrs
1826        R2 = literals
1827        R3 = ptrs
1828        R4 = arity
1829        R5 = bitmap array
1830     */
1831     W_ bco, bitmap_arr, bytes, words;
1832     
1833     bitmap_arr = R5;
1834
1835     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1836     bytes = WDS(words);
1837
1838     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1839
1840     bco = Hp - bytes + WDS(1);
1841     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1842     
1843     StgBCO_instrs(bco)     = R1;
1844     StgBCO_literals(bco)   = R2;
1845     StgBCO_ptrs(bco)       = R3;
1846     StgBCO_arity(bco)      = HALF_W_(R4);
1847     StgBCO_size(bco)       = HALF_W_(words);
1848     
1849     // Copy the arity/bitmap info into the BCO
1850     W_ i;
1851     i = 0;
1852 for:
1853     if (i < StgArrWords_words(bitmap_arr)) {
1854         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1855         i = i + 1;
1856         goto for;
1857     }
1858     
1859     RET_P(bco);
1860 }
1861
1862
1863 mkApUpd0zh_fast
1864 {
1865     // R1 = the BCO# for the AP
1866     //  
1867     W_ ap;
1868
1869     // This function is *only* used to wrap zero-arity BCOs in an
1870     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1871     // saturated and always points directly to a FUN or BCO.
1872     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1873            StgBCO_arity(R1) == HALF_W_(0));
1874
1875     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1876     TICK_ALLOC_UP_THK(0, 0);
1877     CCCS_ALLOC(SIZEOF_StgAP);
1878
1879     ap = Hp - SIZEOF_StgAP + WDS(1);
1880     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1881     
1882     StgAP_n_args(ap) = HALF_W_(0);
1883     StgAP_fun(ap) = R1;
1884     
1885     RET_P(ap);
1886 }
1887
1888 unpackClosurezh_fast
1889 {
1890 /* args: R1 = closure to analyze */
1891 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1892
1893     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1894     info  = %GET_STD_INFO(UNTAG(R1));
1895
1896     // Some closures have non-standard layout, so we omit those here.
1897     W_ type;
1898     type = TO_W_(%INFO_TYPE(info));
1899     switch [0 .. N_CLOSURE_TYPES] type {
1900     case THUNK_SELECTOR : {
1901         ptrs = 1;
1902         nptrs = 0;
1903         goto out;
1904     }
1905     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1906          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1907         ptrs = 0;
1908         nptrs = 0;
1909         goto out;
1910     }
1911     default: {
1912         ptrs  = TO_W_(%INFO_PTRS(info)); 
1913         nptrs = TO_W_(%INFO_NPTRS(info));
1914         goto out;
1915     }}
1916 out:
1917
1918     W_ ptrs_arr_sz, nptrs_arr_sz;
1919     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1920     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1921
1922     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1923
1924     W_ clos;
1925     clos = UNTAG(R1);
1926
1927     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1928     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1929
1930     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1931     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1932     p = 0;
1933 for:
1934     if(p < ptrs) {
1935          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1936          p = p + 1;
1937          goto for;
1938     }
1939     
1940     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1941     StgArrWords_words(nptrs_arr) = nptrs;
1942     p = 0;
1943 for2:
1944     if(p < nptrs) {
1945          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1946          p = p + 1;
1947          goto for2;
1948     }
1949     RET_NPP(info, ptrs_arr, nptrs_arr);
1950 }
1951
1952 /* -----------------------------------------------------------------------------
1953    Thread I/O blocking primitives
1954    -------------------------------------------------------------------------- */
1955
1956 /* Add a thread to the end of the blocked queue. (C-- version of the C
1957  * macro in Schedule.h).
1958  */
1959 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1960     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1961     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1962       W_[blocked_queue_hd] = tso;                       \
1963     } else {                                            \
1964       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1965     }                                                   \
1966     W_[blocked_queue_tl] = tso;
1967
1968 waitReadzh_fast
1969 {
1970     /* args: R1 */
1971 #ifdef THREADED_RTS
1972     foreign "C" barf("waitRead# on threaded RTS") never returns;
1973 #else
1974
1975     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1976     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1977     StgTSO_block_info(CurrentTSO) = R1;
1978     // No locking - we're not going to use this interface in the
1979     // threaded RTS anyway.
1980     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1981     jump stg_block_noregs;
1982 #endif
1983 }
1984
1985 waitWritezh_fast
1986 {
1987     /* args: R1 */
1988 #ifdef THREADED_RTS
1989     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1990 #else
1991
1992     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1993     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1994     StgTSO_block_info(CurrentTSO) = R1;
1995     // No locking - we're not going to use this interface in the
1996     // threaded RTS anyway.
1997     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1998     jump stg_block_noregs;
1999 #endif
2000 }
2001
2002
2003 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2004 delayzh_fast
2005 {
2006 #ifdef mingw32_HOST_OS
2007     W_ ares;
2008     CInt reqID;
2009 #else
2010     W_ t, prev, target;
2011 #endif
2012
2013 #ifdef THREADED_RTS
2014     foreign "C" barf("delay# on threaded RTS") never returns;
2015 #else
2016
2017     /* args: R1 (microsecond delay amount) */
2018     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2019     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2020
2021 #ifdef mingw32_HOST_OS
2022
2023     /* could probably allocate this on the heap instead */
2024     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2025                                             stg_delayzh_malloc_str);
2026     (reqID) = foreign "C" addDelayRequest(R1);
2027     StgAsyncIOResult_reqID(ares)   = reqID;
2028     StgAsyncIOResult_len(ares)     = 0;
2029     StgAsyncIOResult_errCode(ares) = 0;
2030     StgTSO_block_info(CurrentTSO)  = ares;
2031
2032     /* Having all async-blocked threads reside on the blocked_queue
2033      * simplifies matters, so change the status to OnDoProc put the
2034      * delayed thread on the blocked_queue.
2035      */
2036     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2037     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2038     jump stg_block_async_void;
2039
2040 #else
2041
2042     W_ time;
2043     W_ divisor;
2044     (time) = foreign "C" getourtimeofday() [R1];
2045     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2046     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2047            + time + 1; /* Add 1 as getourtimeofday rounds down */
2048     StgTSO_block_info(CurrentTSO) = target;
2049
2050     /* Insert the new thread in the sleeping queue. */
2051     prev = NULL;
2052     t = W_[sleeping_queue];
2053 while:
2054     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2055         prev = t;
2056         t = StgTSO_link(t);
2057         goto while;
2058     }
2059
2060     StgTSO_link(CurrentTSO) = t;
2061     if (prev == NULL) {
2062         W_[sleeping_queue] = CurrentTSO;
2063     } else {
2064         StgTSO_link(prev) = CurrentTSO;
2065     }
2066     jump stg_block_noregs;
2067 #endif
2068 #endif /* !THREADED_RTS */
2069 }
2070
2071
2072 #ifdef mingw32_HOST_OS
2073 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2074 asyncReadzh_fast
2075 {
2076     W_ ares;
2077     CInt reqID;
2078
2079 #ifdef THREADED_RTS
2080     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2081 #else
2082
2083     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2084     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2085     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2086
2087     /* could probably allocate this on the heap instead */
2088     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2089                                             stg_asyncReadzh_malloc_str)
2090                         [R1,R2,R3,R4];
2091     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2092     StgAsyncIOResult_reqID(ares)   = reqID;
2093     StgAsyncIOResult_len(ares)     = 0;
2094     StgAsyncIOResult_errCode(ares) = 0;
2095     StgTSO_block_info(CurrentTSO)  = ares;
2096     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2097     jump stg_block_async;
2098 #endif
2099 }
2100
2101 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2102 asyncWritezh_fast
2103 {
2104     W_ ares;
2105     CInt reqID;
2106
2107 #ifdef THREADED_RTS
2108     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2109 #else
2110
2111     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2112     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2113     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2114
2115     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2116                                             stg_asyncWritezh_malloc_str)
2117                         [R1,R2,R3,R4];
2118     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2119
2120     StgAsyncIOResult_reqID(ares)   = reqID;
2121     StgAsyncIOResult_len(ares)     = 0;
2122     StgAsyncIOResult_errCode(ares) = 0;
2123     StgTSO_block_info(CurrentTSO)  = ares;
2124     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2125     jump stg_block_async;
2126 #endif
2127 }
2128
2129 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2130 asyncDoProczh_fast
2131 {
2132     W_ ares;
2133     CInt reqID;
2134
2135 #ifdef THREADED_RTS
2136     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2137 #else
2138
2139     /* args: R1 = proc, R2 = param */
2140     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2141     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2142
2143     /* could probably allocate this on the heap instead */
2144     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2145                                             stg_asyncDoProczh_malloc_str) 
2146                                 [R1,R2];
2147     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2148     StgAsyncIOResult_reqID(ares)   = reqID;
2149     StgAsyncIOResult_len(ares)     = 0;
2150     StgAsyncIOResult_errCode(ares) = 0;
2151     StgTSO_block_info(CurrentTSO) = ares;
2152     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2153     jump stg_block_async;
2154 #endif
2155 }
2156 #endif
2157
2158 // noDuplicate# tries to ensure that none of the thunks under
2159 // evaluation by the current thread are also under evaluation by
2160 // another thread.  It relies on *both* threads doing noDuplicate#;
2161 // the second one will get blocked if they are duplicating some work.
2162 noDuplicatezh_fast
2163 {
2164     SAVE_THREAD_STATE();
2165     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2166     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2167     
2168     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2169         jump stg_threadFinished;
2170     } else {
2171         LOAD_THREAD_STATE();
2172         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2173         jump %ENTRY_CODE(Sp(0));
2174     }
2175 }
2176
2177 getApStackValzh_fast
2178 {
2179    W_ ap_stack, offset, val, ok;
2180
2181    /* args: R1 = AP_STACK, R2 = offset */
2182    ap_stack = R1;
2183    offset   = R2;
2184
2185    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2186         ok = 1;
2187         val = StgAP_STACK_payload(ap_stack,offset); 
2188    } else {
2189         ok = 0;
2190         val = R1;
2191    }
2192    RET_NP(ok,val);
2193 }