add Outputable instance for OccIfaceEq
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 import __gmpz_init;
32 import __gmpz_add;
33 import __gmpz_sub;
34 import __gmpz_mul;
35 import __gmpz_gcd;
36 import __gmpn_gcd_1;
37 import __gmpn_cmp;
38 import __gmpz_tdiv_q;
39 import __gmpz_tdiv_r;
40 import __gmpz_tdiv_qr;
41 import __gmpz_fdiv_qr;
42 import __gmpz_divexact;
43 import __gmpz_and;
44 import __gmpz_xor;
45 import __gmpz_ior;
46 import __gmpz_com;
47 import base_GHCziIOBase_NestedAtomically_closure;
48 import pthread_mutex_lock;
49 import pthread_mutex_unlock;
50 #endif
51 import EnterCriticalSection;
52 import LeaveCriticalSection;
53
54 /*-----------------------------------------------------------------------------
55   Array Primitives
56
57   Basically just new*Array - the others are all inline macros.
58
59   The size arg is always passed in R1, and the result returned in R1.
60
61   The slow entry point is for returning from a heap check, the saved
62   size argument must be re-loaded from the stack.
63   -------------------------------------------------------------------------- */
64
65 /* for objects that are *less* than the size of a word, make sure we
66  * round up to the nearest word for the size of the array.
67  */
68
69 newByteArrayzh_fast
70 {
71     W_ words, payload_words, n, p;
72     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
73     n = R1;
74     payload_words = ROUNDUP_BYTES_TO_WDS(n);
75     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
76     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
79     StgArrWords_words(p) = payload_words;
80     RET_P(p);
81 }
82
83 newPinnedByteArrayzh_fast
84 {
85     W_ words, payload_words, n, p;
86
87     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
88     n = R1;
89     payload_words = ROUNDUP_BYTES_TO_WDS(n);
90
91     // We want an 8-byte aligned array.  allocatePinned() gives us
92     // 8-byte aligned memory by default, but we want to align the
93     // *goods* inside the ArrWords object, so we have to check the
94     // size of the ArrWords header and adjust our size accordingly.
95     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
96     if ((SIZEOF_StgArrWords & 7) != 0) {
97         words = words + 1;
98     }
99
100     ("ptr" p) = foreign "C" allocatePinned(words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
102
103     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
104     // have to push the object forward one word so that the goods
105     // fall on an 8-byte boundary.
106     if ((SIZEOF_StgArrWords & 7) != 0) {
107         p = p + WDS(1);
108     }
109
110     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
111     StgArrWords_words(p) = payload_words;
112     RET_P(p);
113 }
114
115 newArrayzh_fast
116 {
117     W_ words, n, init, arr, p;
118     /* Args: R1 = words, R2 = initialisation value */
119
120     n = R1;
121     MAYBE_GC(R2_PTR,newArrayzh_fast);
122
123     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
124     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
125     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
126
127     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
128     StgMutArrPtrs_ptrs(arr) = n;
129
130     // Initialise all elements of the the array with the value in R2
131     init = R2;
132     p = arr + SIZEOF_StgMutArrPtrs;
133   for:
134     if (p < arr + WDS(words)) {
135         W_[p] = init;
136         p = p + WDS(1);
137         goto for;
138     }
139
140     RET_P(arr);
141 }
142
143 unsafeThawArrayzh_fast
144 {
145   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
146   //
147   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
148   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
149   // it on the mutable list for the GC to remove (removing something from
150   // the mutable list is not easy, because the mut_list is only singly-linked).
151   // 
152   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
153   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
154   // to indicate that it is still on the mutable list.
155   //
156   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
157   // either it is on a mut_list, or it isn't.  We adopt the convention that
158   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
159   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
160   // we put it on the mutable list more than once, but it would get scavenged
161   // multiple times during GC, which would be unnecessarily slow.
162   //
163   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
164         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
165         recordMutable(R1, R1);
166         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
167         RET_P(R1);
168   } else {
169         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
170         RET_P(R1);
171   }
172 }
173
174 /* -----------------------------------------------------------------------------
175    MutVar primitives
176    -------------------------------------------------------------------------- */
177
178 newMutVarzh_fast
179 {
180     W_ mv;
181     /* Args: R1 = initialisation value */
182
183     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
184
185     mv = Hp - SIZEOF_StgMutVar + WDS(1);
186     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
187     StgMutVar_var(mv) = R1;
188     
189     RET_P(mv);
190 }
191
192 atomicModifyMutVarzh_fast
193 {
194     W_ mv, z, x, y, r;
195     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
196
197     /* If x is the current contents of the MutVar#, then 
198        We want to make the new contents point to
199
200          (sel_0 (f x))
201  
202        and the return value is
203          
204          (sel_1 (f x))
205
206         obviously we can share (f x).
207
208          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
209          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
210          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
211     */
212
213 #if MIN_UPD_SIZE > 1
214 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
215 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
216 #else
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
219 #endif
220
221 #if MIN_UPD_SIZE > 2
222 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
223 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
224 #else
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
227 #endif
228
229 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
230
231    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
232
233 #if defined(THREADED_RTS)
234     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
235 #endif
236
237    x = StgMutVar_var(R1);
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = R2;
245    StgThunk_payload(z,1) = x;
246
247    TICK_ALLOC_THUNK_1();
248    CCCS_ALLOC(THUNK_1_SIZE);
249    y = z - THUNK_1_SIZE;
250    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
251    LDV_RECORD_CREATE(y);
252    StgThunk_payload(y,0) = z;
253
254    StgMutVar_var(R1) = y;
255    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
256
257    TICK_ALLOC_THUNK_1();
258    CCCS_ALLOC(THUNK_1_SIZE);
259    r = y - THUNK_1_SIZE;
260    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
261    LDV_RECORD_CREATE(r);
262    StgThunk_payload(r,0) = z;
263
264 #if defined(THREADED_RTS)
265     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
266 #endif
267
268    RET_P(r);
269 }
270
271 /* -----------------------------------------------------------------------------
272    Weak Pointer Primitives
273    -------------------------------------------------------------------------- */
274
275 STRING(stg_weak_msg,"New weak pointer at %p\n")
276
277 mkWeakzh_fast
278 {
279   /* R1 = key
280      R2 = value
281      R3 = finalizer (or NULL)
282   */
283   W_ w;
284
285   if (R3 == NULL) {
286     R3 = stg_NO_FINALIZER_closure;
287   }
288
289   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
290
291   w = Hp - SIZEOF_StgWeak + WDS(1);
292   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
293
294   StgWeak_key(w)       = R1;
295   StgWeak_value(w)     = R2;
296   StgWeak_finalizer(w) = R3;
297
298   StgWeak_link(w)       = W_[weak_ptr_list];
299   W_[weak_ptr_list]     = w;
300
301   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
302
303   RET_P(w);
304 }
305
306
307 finalizzeWeakzh_fast
308 {
309   /* R1 = weak ptr
310    */
311   W_ w, f;
312
313   w = R1;
314
315   // already dead?
316   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
317       RET_NP(0,stg_NO_FINALIZER_closure);
318   }
319
320   // kill it
321 #ifdef PROFILING
322   // @LDV profiling
323   // A weak pointer is inherently used, so we do not need to call
324   // LDV_recordDead_FILL_SLOP_DYNAMIC():
325   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
326   // or, LDV_recordDead():
327   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
328   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
329   // large as weak pointers, so there is no need to fill the slop, either.
330   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
331 #endif
332
333   //
334   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
335   //
336   SET_INFO(w,stg_DEAD_WEAK_info);
337   LDV_RECORD_CREATE(w);
338
339   f = StgWeak_finalizer(w);
340   StgDeadWeak_link(w) = StgWeak_link(w);
341
342   /* return the finalizer */
343   if (f == stg_NO_FINALIZER_closure) {
344       RET_NP(0,stg_NO_FINALIZER_closure);
345   } else {
346       RET_NP(1,f);
347   }
348 }
349
350 deRefWeakzh_fast
351 {
352   /* R1 = weak ptr */
353   W_ w, code, val;
354
355   w = R1;
356   if (GET_INFO(w) == stg_WEAK_info) {
357     code = 1;
358     val = StgWeak_value(w);
359   } else {
360     code = 0;
361     val = w;
362   }
363   RET_NP(code,val);
364 }
365
366 /* -----------------------------------------------------------------------------
367    Arbitrary-precision Integer operations.
368
369    There are some assumptions in this code that mp_limb_t == W_.  This is
370    the case for all the platforms that GHC supports, currently.
371    -------------------------------------------------------------------------- */
372
373 int2Integerzh_fast
374 {
375    /* arguments: R1 = Int# */
376
377    W_ val, s, p;        /* to avoid aliasing */
378
379    val = R1;
380    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
381
382    p = Hp - SIZEOF_StgArrWords;
383    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
384    StgArrWords_words(p) = 1;
385
386    /* mpz_set_si is inlined here, makes things simpler */
387    if (%lt(val,0)) { 
388         s  = -1;
389         Hp(0) = -val;
390    } else { 
391      if (%gt(val,0)) {
392         s = 1;
393         Hp(0) = val;
394      } else {
395         s = 0;
396      }
397   }
398
399    /* returns (# size  :: Int#, 
400                  data  :: ByteArray# 
401                #)
402    */
403    RET_NP(s,p);
404 }
405
406 word2Integerzh_fast
407 {
408    /* arguments: R1 = Word# */
409
410    W_ val, s, p;        /* to avoid aliasing */
411
412    val = R1;
413
414    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
415
416    p = Hp - SIZEOF_StgArrWords;
417    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
418    StgArrWords_words(p) = 1;
419
420    if (val != 0) {
421         s = 1;
422         W_[Hp] = val;
423    } else {
424         s = 0;
425    }
426
427    /* returns (# size  :: Int#, 
428                  data  :: ByteArray# #)
429    */
430    RET_NP(s,p);
431 }
432
433
434 /*
435  * 'long long' primops for converting to/from Integers.
436  */
437
438 #ifdef SUPPORT_LONG_LONGS
439
440 int64ToIntegerzh_fast
441 {
442    /* arguments: L1 = Int64# */
443
444    L_ val;
445    W_ hi, lo, s, neg, words_needed, p;
446
447    val = L1;
448    neg = 0;
449
450    hi = TO_W_(val >> 32);
451    lo = TO_W_(val);
452
453    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
454        words_needed = 2;
455    } else { 
456        // minimum is one word
457        words_needed = 1;
458    }
459
460    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
461                NO_PTRS, int64ToIntegerzh_fast );
462
463    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
464    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
465    StgArrWords_words(p) = words_needed;
466
467    if ( %lt(hi,0) ) {
468      neg = 1;
469      lo = -lo;
470      if(lo == 0) {
471        hi = -hi;
472      } else {
473        hi = -hi - 1;
474      }
475    }
476
477    if ( words_needed == 2 )  { 
478       s = 2;
479       Hp(-1) = lo;
480       Hp(0) = hi;
481    } else { 
482        if ( lo != 0 ) {
483            s = 1;
484            Hp(0) = lo;
485        } else /* val==0 */  {
486            s = 0;
487        }
488    }
489    if ( neg != 0 ) {
490         s = -s;
491    }
492
493    /* returns (# size  :: Int#, 
494                  data  :: ByteArray# #)
495    */
496    RET_NP(s,p);
497 }
498 word64ToIntegerzh_fast
499 {
500    /* arguments: L1 = Word64# */
501
502    L_ val;
503    W_ hi, lo, s, words_needed, p;
504
505    val = L1;
506    hi = TO_W_(val >> 32);
507    lo = TO_W_(val);
508
509    if ( hi != 0 ) {
510       words_needed = 2;
511    } else {
512       words_needed = 1;
513    }
514
515    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
516                NO_PTRS, word64ToIntegerzh_fast );
517
518    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
519    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
520    StgArrWords_words(p) = words_needed;
521
522    if ( hi != 0 ) { 
523      s = 2;
524      Hp(-1) = lo;
525      Hp(0)  = hi;
526    } else {
527       if ( lo != 0 ) {
528         s = 1;
529         Hp(0) = lo;
530      } else /* val==0 */  {
531       s = 0;
532      }
533   }
534
535    /* returns (# size  :: Int#, 
536                  data  :: ByteArray# #)
537    */
538    RET_NP(s,p);
539 }
540
541
542
543 #endif /* SUPPORT_LONG_LONGS */
544
545 /* ToDo: this is shockingly inefficient */
546
547 #ifndef THREADED_RTS
548 section "bss" {
549   mp_tmp1:
550     bits8 [SIZEOF_MP_INT];
551 }
552
553 section "bss" {
554   mp_tmp2:
555     bits8 [SIZEOF_MP_INT];
556 }
557
558 section "bss" {
559   mp_result1:
560     bits8 [SIZEOF_MP_INT];
561 }
562
563 section "bss" {
564   mp_result2:
565     bits8 [SIZEOF_MP_INT];
566 }
567 #endif
568
569 #ifdef THREADED_RTS
570 #define FETCH_MP_TEMP(X) \
571 W_ X; \
572 X = BaseReg + (OFFSET_StgRegTable_r ## X);
573 #else
574 #define FETCH_MP_TEMP(X) /* Nothing */
575 #endif
576
577 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
578 name                                                                    \
579 {                                                                       \
580   CInt s1, s2;                                                          \
581   W_ d1, d2;                                                            \
582   FETCH_MP_TEMP(mp_tmp1);                                               \
583   FETCH_MP_TEMP(mp_tmp2);                                               \
584   FETCH_MP_TEMP(mp_result1)                                             \
585   FETCH_MP_TEMP(mp_result2);                                            \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
589                                                                         \
590   s1 = W_TO_INT(R1);                                                    \
591   d1 = R2;                                                              \
592   s2 = W_TO_INT(R3);                                                    \
593   d2 = R4;                                                              \
594                                                                         \
595   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
596   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
597   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
598   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
599   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
600   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
601                                                                         \
602   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
603                                                                         \
604   /* Perform the operation */                                           \
605   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
606                                                                         \
607   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
608          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
609 }
610
611 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
612 name                                                                    \
613 {                                                                       \
614   CInt s1;                                                              \
615   W_ d1;                                                                \
616   FETCH_MP_TEMP(mp_tmp1);                                               \
617   FETCH_MP_TEMP(mp_result1)                                             \
618                                                                         \
619   /* call doYouWantToGC() */                                            \
620   MAYBE_GC(R2_PTR, name);                                               \
621                                                                         \
622   d1 = R2;                                                              \
623   s1 = W_TO_INT(R1);                                                    \
624                                                                         \
625   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
626   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
627   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
628                                                                         \
629   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
630                                                                         \
631   /* Perform the operation */                                           \
632   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
633                                                                         \
634   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
635          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
636 }
637
638 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
639 name                                                                                    \
640 {                                                                                       \
641   CInt s1, s2;                                                                          \
642   W_ d1, d2;                                                                            \
643   FETCH_MP_TEMP(mp_tmp1);                                                               \
644   FETCH_MP_TEMP(mp_tmp2);                                                               \
645   FETCH_MP_TEMP(mp_result1)                                                             \
646   FETCH_MP_TEMP(mp_result2)                                                             \
647                                                                                         \
648   /* call doYouWantToGC() */                                                            \
649   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
650                                                                                         \
651   s1 = W_TO_INT(R1);                                                                    \
652   d1 = R2;                                                                              \
653   s2 = W_TO_INT(R3);                                                                    \
654   d2 = R4;                                                                              \
655                                                                                         \
656   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
657   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
658   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
659   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
660   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
661   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
662                                                                                         \
663   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
664   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
665                                                                                         \
666   /* Perform the operation */                                                           \
667   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
668                                                                                         \
669   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
670            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
671            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
672            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
673 }
674
675 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
676 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
677 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
678 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
679 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
680 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
681 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
682 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
683 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
684 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
685 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
686
687 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
688 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
689
690 #ifndef THREADED_RTS
691 section "bss" {
692   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
693 }
694 #endif
695
696 gcdIntzh_fast
697 {
698     /* R1 = the first Int#; R2 = the second Int# */
699     W_ r; 
700     FETCH_MP_TEMP(mp_tmp_w);
701
702     W_[mp_tmp_w] = R1;
703     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
704
705     R1 = r;
706     /* Result parked in R1, return via info-pointer at TOS */
707     jump %ENTRY_CODE(Sp(0));
708 }
709
710
711 gcdIntegerIntzh_fast
712 {
713     /* R1 = s1; R2 = d1; R3 = the int */
714     W_ s1;
715     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
716     R1 = s1;
717     
718     /* Result parked in R1, return via info-pointer at TOS */
719     jump %ENTRY_CODE(Sp(0));
720 }
721
722
723 cmpIntegerIntzh_fast
724 {
725     /* R1 = s1; R2 = d1; R3 = the int */
726     W_ usize, vsize, v_digit, u_digit;
727
728     usize = R1;
729     vsize = 0;
730     v_digit = R3;
731
732     // paraphrased from __gmpz_cmp_si() in the GMP sources
733     if (%gt(v_digit,0)) {
734         vsize = 1;
735     } else { 
736         if (%lt(v_digit,0)) {
737             vsize = -1;
738             v_digit = -v_digit;
739         }
740     }
741
742     if (usize != vsize) {
743         R1 = usize - vsize; 
744         jump %ENTRY_CODE(Sp(0));
745     }
746
747     if (usize == 0) {
748         R1 = 0; 
749         jump %ENTRY_CODE(Sp(0));
750     }
751
752     u_digit = W_[BYTE_ARR_CTS(R2)];
753
754     if (u_digit == v_digit) {
755         R1 = 0; 
756         jump %ENTRY_CODE(Sp(0));
757     }
758
759     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
760         R1 = usize; 
761     } else {
762         R1 = -usize; 
763     }
764
765     jump %ENTRY_CODE(Sp(0));
766 }
767
768 cmpIntegerzh_fast
769 {
770     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
771     W_ usize, vsize, size, up, vp;
772     CInt cmp;
773
774     // paraphrased from __gmpz_cmp() in the GMP sources
775     usize = R1;
776     vsize = R3;
777
778     if (usize != vsize) {
779         R1 = usize - vsize; 
780         jump %ENTRY_CODE(Sp(0));
781     }
782
783     if (usize == 0) {
784         R1 = 0; 
785         jump %ENTRY_CODE(Sp(0));
786     }
787
788     if (%lt(usize,0)) { // NB. not <, which is unsigned
789         size = -usize;
790     } else {
791         size = usize;
792     }
793
794     up = BYTE_ARR_CTS(R2);
795     vp = BYTE_ARR_CTS(R4);
796
797     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
798
799     if (cmp == 0 :: CInt) {
800         R1 = 0; 
801         jump %ENTRY_CODE(Sp(0));
802     }
803
804     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
805         R1 = 1;
806     } else {
807         R1 = (-1); 
808     }
809     /* Result parked in R1, return via info-pointer at TOS */
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Intzh_fast
814 {
815     /* R1 = s; R2 = d */
816     W_ r, s;
817
818     s = R1;
819     if (s == 0) {
820         r = 0;
821     } else {
822         r = W_[R2 + SIZEOF_StgArrWords];
823         if (%lt(s,0)) {
824             r = -r;
825         }
826     }
827     /* Result parked in R1, return via info-pointer at TOS */
828     R1 = r;
829     jump %ENTRY_CODE(Sp(0));
830 }
831
832 integer2Wordzh_fast
833 {
834   /* R1 = s; R2 = d */
835   W_ r, s;
836
837   s = R1;
838   if (s == 0) {
839     r = 0;
840   } else {
841     r = W_[R2 + SIZEOF_StgArrWords];
842     if (%lt(s,0)) {
843         r = -r;
844     }
845   }
846   /* Result parked in R1, return via info-pointer at TOS */
847   R1 = r;
848   jump %ENTRY_CODE(Sp(0));
849 }
850
851 decodeFloatzh_fast
852
853     W_ p;
854     F_ arg;
855     FETCH_MP_TEMP(mp_tmp1);
856     FETCH_MP_TEMP(mp_tmp_w);
857     
858     /* arguments: F1 = Float# */
859     arg = F1;
860     
861     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
862     
863     /* Be prepared to tell Lennart-coded __decodeFloat
864        where mantissa._mp_d can be put (it does not care about the rest) */
865     p = Hp - SIZEOF_StgArrWords;
866     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
867     StgArrWords_words(p) = 1;
868     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
869     
870     /* Perform the operation */
871     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
872     
873     /* returns: (Int# (expn), Int#, ByteArray#) */
874     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
875 }
876
877 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
878 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
879
880 decodeDoublezh_fast
881
882     D_ arg;
883     W_ p;
884     FETCH_MP_TEMP(mp_tmp1);
885     FETCH_MP_TEMP(mp_tmp_w);
886
887     /* arguments: D1 = Double# */
888     arg = D1;
889
890     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
891     
892     /* Be prepared to tell Lennart-coded __decodeDouble
893        where mantissa.d can be put (it does not care about the rest) */
894     p = Hp - ARR_SIZE + WDS(1);
895     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
896     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
897     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
898
899     /* Perform the operation */
900     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
901     
902     /* returns: (Int# (expn), Int#, ByteArray#) */
903     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
904 }
905
906 /* -----------------------------------------------------------------------------
907  * Concurrency primitives
908  * -------------------------------------------------------------------------- */
909
910 forkzh_fast
911 {
912   /* args: R1 = closure to spark */
913
914   MAYBE_GC(R1_PTR, forkzh_fast);
915
916   W_ closure;
917   W_ threadid;
918   closure = R1;
919
920   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
921                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
922                                 closure "ptr") [];
923
924   /* start blocked if the current thread is blocked */
925   StgTSO_flags(threadid) = 
926      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
927                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
928
929   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
930
931   // switch at the earliest opportunity
932   CInt[context_switch] = 1 :: CInt;
933   
934   RET_P(threadid);
935 }
936
937 forkOnzh_fast
938 {
939   /* args: R1 = cpu, R2 = closure to spark */
940
941   MAYBE_GC(R2_PTR, forkOnzh_fast);
942
943   W_ cpu;
944   W_ closure;
945   W_ threadid;
946   cpu = R1;
947   closure = R2;
948
949   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
950                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
951                                 closure "ptr") [];
952
953   /* start blocked if the current thread is blocked */
954   StgTSO_flags(threadid) = 
955      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
956                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
957
958   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
959
960   // switch at the earliest opportunity
961   CInt[context_switch] = 1 :: CInt;
962   
963   RET_P(threadid);
964 }
965
966 yieldzh_fast
967 {
968   jump stg_yield_noregs;
969 }
970
971 myThreadIdzh_fast
972 {
973   /* no args. */
974   RET_P(CurrentTSO);
975 }
976
977 labelThreadzh_fast
978 {
979   /* args: 
980         R1 = ThreadId#
981         R2 = Addr# */
982 #ifdef DEBUG
983   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
984 #endif
985   jump %ENTRY_CODE(Sp(0));
986 }
987
988 isCurrentThreadBoundzh_fast
989 {
990   /* no args */
991   W_ r;
992   (r) = foreign "C" isThreadBound(CurrentTSO) [];
993   RET_N(r);
994 }
995
996
997 /* -----------------------------------------------------------------------------
998  * TVar primitives
999  * -------------------------------------------------------------------------- */
1000
1001 #ifdef REG_R1
1002 #define SP_OFF 0
1003 #define IF_NOT_REG_R1(x) 
1004 #else
1005 #define SP_OFF 1
1006 #define IF_NOT_REG_R1(x) x
1007 #endif
1008
1009 // Catch retry frame ------------------------------------------------------------
1010
1011 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1012 #if defined(PROFILING)
1013   W_ unused1, W_ unused2,
1014 #endif
1015   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1016 {
1017    W_ r, frame, trec, outer;
1018    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1019
1020    frame = Sp;
1021    trec = StgTSO_trec(CurrentTSO);
1022    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1023    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1024    if (r != 0) {
1025      /* Succeeded (either first branch or second branch) */
1026      StgTSO_trec(CurrentTSO) = outer;
1027      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1028      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1029      jump %ENTRY_CODE(Sp(SP_OFF));
1030    } else {
1031      /* Did not commit: re-execute */
1032      W_ new_trec;
1033      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1034      StgTSO_trec(CurrentTSO) = new_trec;
1035      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1036        R1 = StgCatchRetryFrame_alt_code(frame);
1037      } else {
1038        R1 = StgCatchRetryFrame_first_code(frame);
1039      }
1040      jump stg_ap_v_fast;
1041    }
1042 }
1043
1044
1045 // Atomically frame ------------------------------------------------------------
1046
1047 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1048 #if defined(PROFILING)
1049   W_ unused1, W_ unused2,
1050 #endif
1051   "ptr" W_ unused3, "ptr" W_ unused4)
1052 {
1053   W_ frame, trec, valid, next_invariant, q, outer;
1054   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1055
1056   frame = Sp;
1057   trec = StgTSO_trec(CurrentTSO);
1058   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1059
1060   if (outer == NO_TREC) {
1061     /* First time back at the atomically frame -- pick up invariants */
1062     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1063     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1064
1065   } else {
1066     /* Second/subsequent time back at the atomically frame -- abort the
1067      * tx that's checking the invariant and move on to the next one */
1068     StgTSO_trec(CurrentTSO) = outer;
1069     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1070     StgInvariantCheckQueue_my_execution(q) = trec;
1071     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1072     /* Don't free trec -- it's linked from q and will be stashed in the
1073      * invariant if we eventually commit. */
1074     q = StgInvariantCheckQueue_next_queue_entry(q);
1075     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1076     trec = outer;
1077   }
1078
1079   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1080
1081   if (q != END_INVARIANT_CHECK_QUEUE) {
1082     /* We can't commit yet: another invariant to check */
1083     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1084     StgTSO_trec(CurrentTSO) = trec;
1085
1086     next_invariant = StgInvariantCheckQueue_invariant(q);
1087     R1 = StgAtomicInvariant_code(next_invariant);
1088     jump stg_ap_v_fast;
1089
1090   } else {
1091
1092     /* We've got no more invariants to check, try to commit */
1093     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1094     if (valid != 0) {
1095       /* Transaction was valid: commit succeeded */
1096       StgTSO_trec(CurrentTSO) = NO_TREC;
1097       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1098       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1099       jump %ENTRY_CODE(Sp(SP_OFF));
1100     } else {
1101       /* Transaction was not valid: try again */
1102       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1103       StgTSO_trec(CurrentTSO) = trec;
1104       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1105       R1 = StgAtomicallyFrame_code(frame);
1106       jump stg_ap_v_fast;
1107     }
1108   }
1109 }
1110
1111 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1112 #if defined(PROFILING)
1113   W_ unused1, W_ unused2,
1114 #endif
1115   "ptr" W_ unused3, "ptr" W_ unused4)
1116 {
1117   W_ frame, trec, valid;
1118   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1119
1120   frame = Sp;
1121
1122   /* The TSO is currently waiting: should we stop waiting? */
1123   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1124   if (valid != 0) {
1125     /* Previous attempt is still valid: no point trying again yet */
1126           IF_NOT_REG_R1(Sp_adj(-2);
1127                         Sp(1) = stg_NO_FINALIZER_closure;
1128                         Sp(0) = stg_ut_1_0_unreg_info;)
1129     jump stg_block_noregs;
1130   } else {
1131     /* Previous attempt is no longer valid: try again */
1132     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1133     StgTSO_trec(CurrentTSO) = trec;
1134     StgHeader_info(frame) = stg_atomically_frame_info;
1135     R1 = StgAtomicallyFrame_code(frame);
1136     jump stg_ap_v_fast;
1137   }
1138 }
1139
1140 // STM catch frame --------------------------------------------------------------
1141
1142 #ifdef REG_R1
1143 #define SP_OFF 0
1144 #else
1145 #define SP_OFF 1
1146 #endif
1147
1148 /* Catch frames are very similar to update frames, but when entering
1149  * one we just pop the frame off the stack and perform the correct
1150  * kind of return to the activation record underneath us on the stack.
1151  */
1152
1153 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1154 #if defined(PROFILING)
1155   W_ unused1, W_ unused2,
1156 #endif
1157   "ptr" W_ unused3, "ptr" W_ unused4)
1158    {
1159       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1160       W_ r, frame, trec, outer;
1161       frame = Sp;
1162       trec = StgTSO_trec(CurrentTSO);
1163       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1164       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1165       if (r != 0) {
1166         /* Commit succeeded */
1167         StgTSO_trec(CurrentTSO) = outer;
1168         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1169         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1170         jump Sp(SP_OFF);
1171       } else {
1172         /* Commit failed */
1173         W_ new_trec;
1174         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1175         StgTSO_trec(CurrentTSO) = new_trec;
1176         R1 = StgCatchSTMFrame_code(frame);
1177         jump stg_ap_v_fast;
1178       }
1179    }
1180
1181
1182 // Primop definition ------------------------------------------------------------
1183
1184 atomicallyzh_fast
1185 {
1186   W_ frame;
1187   W_ old_trec;
1188   W_ new_trec;
1189   
1190   // stmStartTransaction may allocate
1191   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1192
1193   /* Args: R1 = m :: STM a */
1194   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1195
1196   old_trec = StgTSO_trec(CurrentTSO);
1197
1198   /* Nested transactions are not allowed; raise an exception */
1199   if (old_trec != NO_TREC) {
1200      R1 = base_GHCziIOBase_NestedAtomically_closure;
1201      jump raisezh_fast;
1202   }
1203
1204   /* Set up the atomically frame */
1205   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1206   frame = Sp;
1207
1208   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1209   StgAtomicallyFrame_code(frame) = R1;
1210   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1211
1212   /* Start the memory transcation */
1213   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1214   StgTSO_trec(CurrentTSO) = new_trec;
1215
1216   /* Apply R1 to the realworld token */
1217   jump stg_ap_v_fast;
1218 }
1219
1220
1221 catchSTMzh_fast
1222 {
1223   W_ frame;
1224   
1225   /* Args: R1 :: STM a */
1226   /* Args: R2 :: Exception -> STM a */
1227   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1228
1229   /* Set up the catch frame */
1230   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1231   frame = Sp;
1232
1233   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1234   StgCatchSTMFrame_handler(frame) = R2;
1235   StgCatchSTMFrame_code(frame) = R1;
1236
1237   /* Start a nested transaction to run the body of the try block in */
1238   W_ cur_trec;  
1239   W_ new_trec;
1240   cur_trec = StgTSO_trec(CurrentTSO);
1241   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1242   StgTSO_trec(CurrentTSO) = new_trec;
1243
1244   /* Apply R1 to the realworld token */
1245   jump stg_ap_v_fast;
1246 }
1247
1248
1249 catchRetryzh_fast
1250 {
1251   W_ frame;
1252   W_ new_trec;
1253   W_ trec;
1254
1255   // stmStartTransaction may allocate
1256   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1257
1258   /* Args: R1 :: STM a */
1259   /* Args: R2 :: STM a */
1260   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1261
1262   /* Start a nested transaction within which to run the first code */
1263   trec = StgTSO_trec(CurrentTSO);
1264   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1265   StgTSO_trec(CurrentTSO) = new_trec;
1266
1267   /* Set up the catch-retry frame */
1268   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1269   frame = Sp;
1270   
1271   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1272   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1273   StgCatchRetryFrame_first_code(frame) = R1;
1274   StgCatchRetryFrame_alt_code(frame) = R2;
1275
1276   /* Apply R1 to the realworld token */
1277   jump stg_ap_v_fast;
1278 }
1279
1280
1281 retryzh_fast
1282 {
1283   W_ frame_type;
1284   W_ frame;
1285   W_ trec;
1286   W_ outer;
1287   W_ r;
1288
1289   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1290
1291   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1292 retry_pop_stack:
1293   StgTSO_sp(CurrentTSO) = Sp;
1294   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1295   Sp = StgTSO_sp(CurrentTSO);
1296   frame = Sp;
1297   trec = StgTSO_trec(CurrentTSO);
1298   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1299
1300   if (frame_type == CATCH_RETRY_FRAME) {
1301     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1302     ASSERT(outer != NO_TREC);
1303     // Abort the transaction attempting the current branch
1304     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1305     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1306     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1307       // Retry in the first branch: try the alternative
1308       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1309       StgTSO_trec(CurrentTSO) = trec;
1310       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1311       R1 = StgCatchRetryFrame_alt_code(frame);
1312       jump stg_ap_v_fast;
1313     } else {
1314       // Retry in the alternative code: propagate the retry
1315       StgTSO_trec(CurrentTSO) = outer;
1316       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1317       goto retry_pop_stack;
1318     }
1319   }
1320
1321   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1322   ASSERT(frame_type == ATOMICALLY_FRAME);
1323   if (outer != NO_TREC) {
1324     // We called retry while checking invariants, so abort the current
1325     // invariant check (merging its TVar accesses into the parents read
1326     // set so we'll wait on them)
1327     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1328     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1329     trec = outer;
1330     StgTSO_trec(CurrentTSO) = trec;
1331     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1332   }
1333   ASSERT(outer == NO_TREC);
1334
1335   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1336   if (r != 0) {
1337     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1338     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1339     Sp = frame;
1340     // Fix up the stack in the unregisterised case: the return convention is different.
1341     IF_NOT_REG_R1(Sp_adj(-2); 
1342                   Sp(1) = stg_NO_FINALIZER_closure;
1343                   Sp(0) = stg_ut_1_0_unreg_info;)
1344     R3 = trec; // passing to stmWaitUnblock()
1345     jump stg_block_stmwait;
1346   } else {
1347     // Transaction was not valid: retry immediately
1348     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1349     StgTSO_trec(CurrentTSO) = trec;
1350     R1 = StgAtomicallyFrame_code(frame);
1351     Sp = frame;
1352     jump stg_ap_v_fast;
1353   }
1354 }
1355
1356
1357 checkzh_fast
1358 {
1359   W_ trec, closure;
1360
1361   /* Args: R1 = invariant closure */
1362   MAYBE_GC (R1_PTR, checkzh_fast); 
1363
1364   trec = StgTSO_trec(CurrentTSO);
1365   closure = R1;
1366   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1367                                      trec "ptr",
1368                                      closure "ptr") [];
1369
1370   jump %ENTRY_CODE(Sp(0));
1371 }
1372
1373
1374 newTVarzh_fast
1375 {
1376   W_ tv;
1377   W_ new_value;
1378
1379   /* Args: R1 = initialisation value */
1380
1381   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1382   new_value = R1;
1383   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1384   RET_P(tv);
1385 }
1386
1387
1388 readTVarzh_fast
1389 {
1390   W_ trec;
1391   W_ tvar;
1392   W_ result;
1393
1394   /* Args: R1 = TVar closure */
1395
1396   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1397   trec = StgTSO_trec(CurrentTSO);
1398   tvar = R1;
1399   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1400
1401   RET_P(result);
1402 }
1403
1404
1405 writeTVarzh_fast
1406 {
1407   W_ trec;
1408   W_ tvar;
1409   W_ new_value;
1410   
1411   /* Args: R1 = TVar closure */
1412   /*       R2 = New value    */
1413
1414   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1415   trec = StgTSO_trec(CurrentTSO);
1416   tvar = R1;
1417   new_value = R2;
1418   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1419
1420   jump %ENTRY_CODE(Sp(0));
1421 }
1422
1423
1424 /* -----------------------------------------------------------------------------
1425  * MVar primitives
1426  *
1427  * take & putMVar work as follows.  Firstly, an important invariant:
1428  *
1429  *    If the MVar is full, then the blocking queue contains only
1430  *    threads blocked on putMVar, and if the MVar is empty then the
1431  *    blocking queue contains only threads blocked on takeMVar.
1432  *
1433  * takeMvar:
1434  *    MVar empty : then add ourselves to the blocking queue
1435  *    MVar full  : remove the value from the MVar, and
1436  *                 blocking queue empty     : return
1437  *                 blocking queue non-empty : perform the first blocked putMVar
1438  *                                            from the queue, and wake up the
1439  *                                            thread (MVar is now full again)
1440  *
1441  * putMVar is just the dual of the above algorithm.
1442  *
1443  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1444  * the stack of the thread waiting to do the putMVar.  See
1445  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1446  * the stack layout, and the PerformPut and PerformTake macros below.
1447  *
1448  * It is important that a blocked take or put is woken up with the
1449  * take/put already performed, because otherwise there would be a
1450  * small window of vulnerability where the thread could receive an
1451  * exception and never perform its take or put, and we'd end up with a
1452  * deadlock.
1453  *
1454  * -------------------------------------------------------------------------- */
1455
1456 isEmptyMVarzh_fast
1457 {
1458     /* args: R1 = MVar closure */
1459
1460     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1461         RET_N(1);
1462     } else {
1463         RET_N(0);
1464     }
1465 }
1466
1467 newMVarzh_fast
1468 {
1469     /* args: none */
1470     W_ mvar;
1471
1472     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1473   
1474     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1475     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1476         // MVARs start dirty: generation 0 has no mutable list
1477     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1478     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1479     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1480     RET_P(mvar);
1481 }
1482
1483
1484 /* If R1 isn't available, pass it on the stack */
1485 #ifdef REG_R1
1486 #define PerformTake(tso, value)                         \
1487     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1488     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1489 #else
1490 #define PerformTake(tso, value)                                 \
1491     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1492     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1493 #endif
1494
1495 #define PerformPut(tso,lval)                    \
1496     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1497     lval = W_[StgTSO_sp(tso) - WDS(1)];
1498
1499 takeMVarzh_fast
1500 {
1501     W_ mvar, val, info, tso;
1502
1503     /* args: R1 = MVar closure */
1504     mvar = R1;
1505
1506 #if defined(THREADED_RTS)
1507     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1508 #else
1509     info = GET_INFO(mvar);
1510 #endif
1511         
1512     if (info == stg_MVAR_CLEAN_info) {
1513         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1514     }
1515
1516     /* If the MVar is empty, put ourselves on its blocking queue,
1517      * and wait until we're woken up.
1518      */
1519     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1520         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1521             StgMVar_head(mvar) = CurrentTSO;
1522         } else {
1523             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1524         }
1525         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1526         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1527         StgTSO_block_info(CurrentTSO)  = mvar;
1528         StgMVar_tail(mvar) = CurrentTSO;
1529         
1530         jump stg_block_takemvar;
1531   }
1532
1533   /* we got the value... */
1534   val = StgMVar_value(mvar);
1535
1536   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1537   {
1538       /* There are putMVar(s) waiting... 
1539        * wake up the first thread on the queue
1540        */
1541       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1542
1543       /* actually perform the putMVar for the thread that we just woke up */
1544       tso = StgMVar_head(mvar);
1545       PerformPut(tso,StgMVar_value(mvar));
1546       dirtyTSO(tso);
1547
1548 #if defined(GRAN) || defined(PAR)
1549       /* ToDo: check 2nd arg (mvar) is right */
1550       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1551       StgMVar_head(mvar) = tso;
1552 #else
1553       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1554                                          StgMVar_head(mvar) "ptr") [];
1555       StgMVar_head(mvar) = tso;
1556 #endif
1557
1558       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1559           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1560       }
1561
1562 #if defined(THREADED_RTS)
1563       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1564 #else
1565       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1566 #endif
1567       RET_P(val);
1568   } 
1569   else
1570   {
1571       /* No further putMVars, MVar is now empty */
1572       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1573  
1574 #if defined(THREADED_RTS)
1575       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1576 #else
1577       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1578 #endif
1579
1580       RET_P(val);
1581   }
1582 }
1583
1584
1585 tryTakeMVarzh_fast
1586 {
1587     W_ mvar, val, info, tso;
1588
1589     /* args: R1 = MVar closure */
1590
1591     mvar = R1;
1592
1593 #if defined(THREADED_RTS)
1594     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1595 #else
1596     info = GET_INFO(mvar);
1597 #endif
1598
1599     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1600 #if defined(THREADED_RTS)
1601         unlockClosure(mvar, info);
1602 #endif
1603         /* HACK: we need a pointer to pass back, 
1604          * so we abuse NO_FINALIZER_closure
1605          */
1606         RET_NP(0, stg_NO_FINALIZER_closure);
1607     }
1608
1609     if (info == stg_MVAR_CLEAN_info) {
1610         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1611     }
1612
1613     /* we got the value... */
1614     val = StgMVar_value(mvar);
1615
1616     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1617
1618         /* There are putMVar(s) waiting... 
1619          * wake up the first thread on the queue
1620          */
1621         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1622
1623         /* actually perform the putMVar for the thread that we just woke up */
1624         tso = StgMVar_head(mvar);
1625         PerformPut(tso,StgMVar_value(mvar));
1626         dirtyTSO(tso);
1627
1628 #if defined(GRAN) || defined(PAR)
1629         /* ToDo: check 2nd arg (mvar) is right */
1630         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1631         StgMVar_head(mvar) = tso;
1632 #else
1633         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1634                                            StgMVar_head(mvar) "ptr") [];
1635         StgMVar_head(mvar) = tso;
1636 #endif
1637
1638         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1639             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1640         }
1641 #if defined(THREADED_RTS)
1642         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1643 #else
1644         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1645 #endif
1646     }
1647     else 
1648     {
1649         /* No further putMVars, MVar is now empty */
1650         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1651 #if defined(THREADED_RTS)
1652         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1653 #else
1654         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1655 #endif
1656     }
1657     
1658     RET_NP(1, val);
1659 }
1660
1661
1662 putMVarzh_fast
1663 {
1664     W_ mvar, info, tso;
1665
1666     /* args: R1 = MVar, R2 = value */
1667     mvar = R1;
1668
1669 #if defined(THREADED_RTS)
1670     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1671 #else
1672     info = GET_INFO(mvar);
1673 #endif
1674
1675     if (info == stg_MVAR_CLEAN_info) {
1676         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1677     }
1678
1679     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1680         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1681             StgMVar_head(mvar) = CurrentTSO;
1682         } else {
1683             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1684         }
1685         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1686         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1687         StgTSO_block_info(CurrentTSO)  = mvar;
1688         StgMVar_tail(mvar) = CurrentTSO;
1689         
1690         jump stg_block_putmvar;
1691     }
1692   
1693     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1694
1695         /* There are takeMVar(s) waiting: wake up the first one
1696          */
1697         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1698
1699         /* actually perform the takeMVar */
1700         tso = StgMVar_head(mvar);
1701         PerformTake(tso, R2);
1702         dirtyTSO(tso);
1703       
1704 #if defined(GRAN) || defined(PAR)
1705         /* ToDo: check 2nd arg (mvar) is right */
1706         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1707         StgMVar_head(mvar) = tso;
1708 #else
1709         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1710         StgMVar_head(mvar) = tso;
1711 #endif
1712
1713         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1714             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1715         }
1716
1717 #if defined(THREADED_RTS)
1718         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1719 #else
1720         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1721 #endif
1722         jump %ENTRY_CODE(Sp(0));
1723     }
1724     else
1725     {
1726         /* No further takes, the MVar is now full. */
1727         StgMVar_value(mvar) = R2;
1728
1729 #if defined(THREADED_RTS)
1730         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1731 #else
1732         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1733 #endif
1734         jump %ENTRY_CODE(Sp(0));
1735     }
1736     
1737     /* ToDo: yield afterward for better communication performance? */
1738 }
1739
1740
1741 tryPutMVarzh_fast
1742 {
1743     W_ mvar, info, tso;
1744
1745     /* args: R1 = MVar, R2 = value */
1746     mvar = R1;
1747
1748 #if defined(THREADED_RTS)
1749     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1750 #else
1751     info = GET_INFO(mvar);
1752 #endif
1753
1754     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1755 #if defined(THREADED_RTS)
1756         unlockClosure(mvar, info);
1757 #endif
1758         RET_N(0);
1759     }
1760   
1761     if (info == stg_MVAR_CLEAN_info) {
1762         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1763     }
1764
1765     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1766
1767         /* There are takeMVar(s) waiting: wake up the first one
1768          */
1769         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1770         
1771         /* actually perform the takeMVar */
1772         tso = StgMVar_head(mvar);
1773         PerformTake(tso, R2);
1774         dirtyTSO(tso);
1775       
1776 #if defined(GRAN) || defined(PAR)
1777         /* ToDo: check 2nd arg (mvar) is right */
1778         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1779         StgMVar_head(mvar) = tso;
1780 #else
1781         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1782         StgMVar_head(mvar) = tso;
1783 #endif
1784
1785         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1786             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1787         }
1788
1789 #if defined(THREADED_RTS)
1790         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1791 #else
1792         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1793 #endif
1794     }
1795     else
1796     {
1797         /* No further takes, the MVar is now full. */
1798         StgMVar_value(mvar) = R2;
1799
1800 #if defined(THREADED_RTS)
1801         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1802 #else
1803         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1804 #endif
1805     }
1806     
1807     RET_N(1);
1808     /* ToDo: yield afterward for better communication performance? */
1809 }
1810
1811
1812 /* -----------------------------------------------------------------------------
1813    Stable pointer primitives
1814    -------------------------------------------------------------------------  */
1815
1816 makeStableNamezh_fast
1817 {
1818     W_ index, sn_obj;
1819
1820     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1821   
1822     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1823
1824     /* Is there already a StableName for this heap object?
1825      *  stable_ptr_table is a pointer to an array of snEntry structs.
1826      */
1827     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1828         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1829         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1830         StgStableName_sn(sn_obj) = index;
1831         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1832     } else {
1833         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1834     }
1835     
1836     RET_P(sn_obj);
1837 }
1838
1839
1840 makeStablePtrzh_fast
1841 {
1842     /* Args: R1 = a */
1843     W_ sp;
1844     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1845     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1846     RET_N(sp);
1847 }
1848
1849 deRefStablePtrzh_fast
1850 {
1851     /* Args: R1 = the stable ptr */
1852     W_ r, sp;
1853     sp = R1;
1854     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1855     RET_P(r);
1856 }
1857
1858 /* -----------------------------------------------------------------------------
1859    Bytecode object primitives
1860    -------------------------------------------------------------------------  */
1861
1862 newBCOzh_fast
1863 {
1864     /* R1 = instrs
1865        R2 = literals
1866        R3 = ptrs
1867        R4 = arity
1868        R5 = bitmap array
1869     */
1870     W_ bco, bitmap_arr, bytes, words;
1871     
1872     bitmap_arr = R5;
1873
1874     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1875     bytes = WDS(words);
1876
1877     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1878
1879     bco = Hp - bytes + WDS(1);
1880     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1881     
1882     StgBCO_instrs(bco)     = R1;
1883     StgBCO_literals(bco)   = R2;
1884     StgBCO_ptrs(bco)       = R3;
1885     StgBCO_arity(bco)      = HALF_W_(R4);
1886     StgBCO_size(bco)       = HALF_W_(words);
1887     
1888     // Copy the arity/bitmap info into the BCO
1889     W_ i;
1890     i = 0;
1891 for:
1892     if (i < StgArrWords_words(bitmap_arr)) {
1893         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1894         i = i + 1;
1895         goto for;
1896     }
1897     
1898     RET_P(bco);
1899 }
1900
1901
1902 mkApUpd0zh_fast
1903 {
1904     // R1 = the BCO# for the AP
1905     //  
1906     W_ ap;
1907
1908     // This function is *only* used to wrap zero-arity BCOs in an
1909     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1910     // saturated and always points directly to a FUN or BCO.
1911     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1912            StgBCO_arity(R1) == HALF_W_(0));
1913
1914     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1915     TICK_ALLOC_UP_THK(0, 0);
1916     CCCS_ALLOC(SIZEOF_StgAP);
1917
1918     ap = Hp - SIZEOF_StgAP + WDS(1);
1919     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1920     
1921     StgAP_n_args(ap) = HALF_W_(0);
1922     StgAP_fun(ap) = R1;
1923     
1924     RET_P(ap);
1925 }
1926
1927 unpackClosurezh_fast
1928 {
1929 /* args: R1 = closure to analyze */
1930 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1931
1932     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1933     info  = %GET_STD_INFO(UNTAG(R1));
1934
1935     // Some closures have non-standard layout, so we omit those here.
1936     W_ type;
1937     type = TO_W_(%INFO_TYPE(info));
1938     switch [0 .. N_CLOSURE_TYPES] type {
1939     case THUNK_SELECTOR : {
1940         ptrs = 1;
1941         nptrs = 0;
1942         goto out;
1943     }
1944     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1945          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1946         ptrs = 0;
1947         nptrs = 0;
1948         goto out;
1949     }
1950     default: {
1951         ptrs  = TO_W_(%INFO_PTRS(info)); 
1952         nptrs = TO_W_(%INFO_NPTRS(info));
1953         goto out;
1954     }}
1955 out:
1956
1957     W_ ptrs_arr_sz, nptrs_arr_sz;
1958     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1959     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1960
1961     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1962
1963     W_ clos;
1964     clos = UNTAG(R1);
1965
1966     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1967     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1968
1969     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1970     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1971     p = 0;
1972 for:
1973     if(p < ptrs) {
1974          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1975          p = p + 1;
1976          goto for;
1977     }
1978     
1979     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1980     StgArrWords_words(nptrs_arr) = nptrs;
1981     p = 0;
1982 for2:
1983     if(p < nptrs) {
1984          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1985          p = p + 1;
1986          goto for2;
1987     }
1988     RET_NPP(info, ptrs_arr, nptrs_arr);
1989 }
1990
1991 /* -----------------------------------------------------------------------------
1992    Thread I/O blocking primitives
1993    -------------------------------------------------------------------------- */
1994
1995 /* Add a thread to the end of the blocked queue. (C-- version of the C
1996  * macro in Schedule.h).
1997  */
1998 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1999     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
2000     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2001       W_[blocked_queue_hd] = tso;                       \
2002     } else {                                            \
2003       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
2004     }                                                   \
2005     W_[blocked_queue_tl] = tso;
2006
2007 waitReadzh_fast
2008 {
2009     /* args: R1 */
2010 #ifdef THREADED_RTS
2011     foreign "C" barf("waitRead# on threaded RTS") never returns;
2012 #else
2013
2014     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2015     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2016     StgTSO_block_info(CurrentTSO) = R1;
2017     // No locking - we're not going to use this interface in the
2018     // threaded RTS anyway.
2019     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2020     jump stg_block_noregs;
2021 #endif
2022 }
2023
2024 waitWritezh_fast
2025 {
2026     /* args: R1 */
2027 #ifdef THREADED_RTS
2028     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2029 #else
2030
2031     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2032     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2033     StgTSO_block_info(CurrentTSO) = R1;
2034     // No locking - we're not going to use this interface in the
2035     // threaded RTS anyway.
2036     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2037     jump stg_block_noregs;
2038 #endif
2039 }
2040
2041
2042 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2043 delayzh_fast
2044 {
2045 #ifdef mingw32_HOST_OS
2046     W_ ares;
2047     CInt reqID;
2048 #else
2049     W_ t, prev, target;
2050 #endif
2051
2052 #ifdef THREADED_RTS
2053     foreign "C" barf("delay# on threaded RTS") never returns;
2054 #else
2055
2056     /* args: R1 (microsecond delay amount) */
2057     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2058     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2059
2060 #ifdef mingw32_HOST_OS
2061
2062     /* could probably allocate this on the heap instead */
2063     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2064                                             stg_delayzh_malloc_str);
2065     (reqID) = foreign "C" addDelayRequest(R1);
2066     StgAsyncIOResult_reqID(ares)   = reqID;
2067     StgAsyncIOResult_len(ares)     = 0;
2068     StgAsyncIOResult_errCode(ares) = 0;
2069     StgTSO_block_info(CurrentTSO)  = ares;
2070
2071     /* Having all async-blocked threads reside on the blocked_queue
2072      * simplifies matters, so change the status to OnDoProc put the
2073      * delayed thread on the blocked_queue.
2074      */
2075     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2076     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2077     jump stg_block_async_void;
2078
2079 #else
2080
2081     W_ time;
2082     W_ divisor;
2083     (time) = foreign "C" getourtimeofday() [R1];
2084     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2085     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2086            + time + 1; /* Add 1 as getourtimeofday rounds down */
2087     StgTSO_block_info(CurrentTSO) = target;
2088
2089     /* Insert the new thread in the sleeping queue. */
2090     prev = NULL;
2091     t = W_[sleeping_queue];
2092 while:
2093     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2094         prev = t;
2095         t = StgTSO_link(t);
2096         goto while;
2097     }
2098
2099     StgTSO_link(CurrentTSO) = t;
2100     if (prev == NULL) {
2101         W_[sleeping_queue] = CurrentTSO;
2102     } else {
2103         StgTSO_link(prev) = CurrentTSO;
2104     }
2105     jump stg_block_noregs;
2106 #endif
2107 #endif /* !THREADED_RTS */
2108 }
2109
2110
2111 #ifdef mingw32_HOST_OS
2112 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2113 asyncReadzh_fast
2114 {
2115     W_ ares;
2116     CInt reqID;
2117
2118 #ifdef THREADED_RTS
2119     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2120 #else
2121
2122     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2123     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2124     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2125
2126     /* could probably allocate this on the heap instead */
2127     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2128                                             stg_asyncReadzh_malloc_str)
2129                         [R1,R2,R3,R4];
2130     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2131     StgAsyncIOResult_reqID(ares)   = reqID;
2132     StgAsyncIOResult_len(ares)     = 0;
2133     StgAsyncIOResult_errCode(ares) = 0;
2134     StgTSO_block_info(CurrentTSO)  = ares;
2135     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2136     jump stg_block_async;
2137 #endif
2138 }
2139
2140 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2141 asyncWritezh_fast
2142 {
2143     W_ ares;
2144     CInt reqID;
2145
2146 #ifdef THREADED_RTS
2147     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2148 #else
2149
2150     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2151     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2152     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2153
2154     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2155                                             stg_asyncWritezh_malloc_str)
2156                         [R1,R2,R3,R4];
2157     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2158
2159     StgAsyncIOResult_reqID(ares)   = reqID;
2160     StgAsyncIOResult_len(ares)     = 0;
2161     StgAsyncIOResult_errCode(ares) = 0;
2162     StgTSO_block_info(CurrentTSO)  = ares;
2163     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2164     jump stg_block_async;
2165 #endif
2166 }
2167
2168 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2169 asyncDoProczh_fast
2170 {
2171     W_ ares;
2172     CInt reqID;
2173
2174 #ifdef THREADED_RTS
2175     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2176 #else
2177
2178     /* args: R1 = proc, R2 = param */
2179     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2180     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2181
2182     /* could probably allocate this on the heap instead */
2183     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2184                                             stg_asyncDoProczh_malloc_str) 
2185                                 [R1,R2];
2186     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2187     StgAsyncIOResult_reqID(ares)   = reqID;
2188     StgAsyncIOResult_len(ares)     = 0;
2189     StgAsyncIOResult_errCode(ares) = 0;
2190     StgTSO_block_info(CurrentTSO) = ares;
2191     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2192     jump stg_block_async;
2193 #endif
2194 }
2195 #endif
2196
2197 // noDuplicate# tries to ensure that none of the thunks under
2198 // evaluation by the current thread are also under evaluation by
2199 // another thread.  It relies on *both* threads doing noDuplicate#;
2200 // the second one will get blocked if they are duplicating some work.
2201 noDuplicatezh_fast
2202 {
2203     SAVE_THREAD_STATE();
2204     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2205     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2206     
2207     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2208         jump stg_threadFinished;
2209     } else {
2210         LOAD_THREAD_STATE();
2211         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2212         jump %ENTRY_CODE(Sp(0));
2213     }
2214 }
2215
2216 getApStackValzh_fast
2217 {
2218    W_ ap_stack, offset, val, ok;
2219
2220    /* args: R1 = AP_STACK, R2 = offset */
2221    ap_stack = R1;
2222    offset   = R2;
2223
2224    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2225         ok = 1;
2226         val = StgAP_STACK_payload(ap_stack,offset); 
2227    } else {
2228         ok = 0;
2229         val = R1;
2230    }
2231    RET_NP(ok,val);
2232 }