Add GMP_INCLUDE_DIRS in a couple of places
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import base_GHCziIOBase_NestedAtomically_closure;
50 import pthread_mutex_lock;
51 import pthread_mutex_unlock;
52 #endif
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, z, x, y, r;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235 #if defined(THREADED_RTS)
236     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
237 #endif
238
239    x = StgMutVar_var(R1);
240
241    TICK_ALLOC_THUNK_2();
242    CCCS_ALLOC(THUNK_2_SIZE);
243    z = Hp - THUNK_2_SIZE + WDS(1);
244    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
245    LDV_RECORD_CREATE(z);
246    StgThunk_payload(z,0) = R2;
247    StgThunk_payload(z,1) = x;
248
249    TICK_ALLOC_THUNK_1();
250    CCCS_ALLOC(THUNK_1_SIZE);
251    y = z - THUNK_1_SIZE;
252    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
253    LDV_RECORD_CREATE(y);
254    StgThunk_payload(y,0) = z;
255
256    StgMutVar_var(R1) = y;
257    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
258
259    TICK_ALLOC_THUNK_1();
260    CCCS_ALLOC(THUNK_1_SIZE);
261    r = y - THUNK_1_SIZE;
262    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
263    LDV_RECORD_CREATE(r);
264    StgThunk_payload(r,0) = z;
265
266 #if defined(THREADED_RTS)
267     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
268 #endif
269
270    RET_P(r);
271 }
272
273 /* -----------------------------------------------------------------------------
274    Weak Pointer Primitives
275    -------------------------------------------------------------------------- */
276
277 STRING(stg_weak_msg,"New weak pointer at %p\n")
278
279 mkWeakzh_fast
280 {
281   /* R1 = key
282      R2 = value
283      R3 = finalizer (or NULL)
284   */
285   W_ w;
286
287   if (R3 == NULL) {
288     R3 = stg_NO_FINALIZER_closure;
289   }
290
291   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
292
293   w = Hp - SIZEOF_StgWeak + WDS(1);
294   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
295
296   StgWeak_key(w)       = R1;
297   StgWeak_value(w)     = R2;
298   StgWeak_finalizer(w) = R3;
299
300   StgWeak_link(w)       = W_[weak_ptr_list];
301   W_[weak_ptr_list]     = w;
302
303   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
304
305   RET_P(w);
306 }
307
308
309 finalizzeWeakzh_fast
310 {
311   /* R1 = weak ptr
312    */
313   W_ w, f;
314
315   w = R1;
316
317   // already dead?
318   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   }
321
322   // kill it
323 #ifdef PROFILING
324   // @LDV profiling
325   // A weak pointer is inherently used, so we do not need to call
326   // LDV_recordDead_FILL_SLOP_DYNAMIC():
327   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
328   // or, LDV_recordDead():
329   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
330   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
331   // large as weak pointers, so there is no need to fill the slop, either.
332   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
333 #endif
334
335   //
336   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
337   //
338   SET_INFO(w,stg_DEAD_WEAK_info);
339   LDV_RECORD_CREATE(w);
340
341   f = StgWeak_finalizer(w);
342   StgDeadWeak_link(w) = StgWeak_link(w);
343
344   /* return the finalizer */
345   if (f == stg_NO_FINALIZER_closure) {
346       RET_NP(0,stg_NO_FINALIZER_closure);
347   } else {
348       RET_NP(1,f);
349   }
350 }
351
352 deRefWeakzh_fast
353 {
354   /* R1 = weak ptr */
355   W_ w, code, val;
356
357   w = R1;
358   if (GET_INFO(w) == stg_WEAK_info) {
359     code = 1;
360     val = StgWeak_value(w);
361   } else {
362     code = 0;
363     val = w;
364   }
365   RET_NP(code,val);
366 }
367
368 /* -----------------------------------------------------------------------------
369    Arbitrary-precision Integer operations.
370
371    There are some assumptions in this code that mp_limb_t == W_.  This is
372    the case for all the platforms that GHC supports, currently.
373    -------------------------------------------------------------------------- */
374
375 int2Integerzh_fast
376 {
377    /* arguments: R1 = Int# */
378
379    W_ val, s, p;        /* to avoid aliasing */
380
381    val = R1;
382    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
383
384    p = Hp - SIZEOF_StgArrWords;
385    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
386    StgArrWords_words(p) = 1;
387
388    /* mpz_set_si is inlined here, makes things simpler */
389    if (%lt(val,0)) { 
390         s  = -1;
391         Hp(0) = -val;
392    } else { 
393      if (%gt(val,0)) {
394         s = 1;
395         Hp(0) = val;
396      } else {
397         s = 0;
398      }
399   }
400
401    /* returns (# size  :: Int#, 
402                  data  :: ByteArray# 
403                #)
404    */
405    RET_NP(s,p);
406 }
407
408 word2Integerzh_fast
409 {
410    /* arguments: R1 = Word# */
411
412    W_ val, s, p;        /* to avoid aliasing */
413
414    val = R1;
415
416    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
417
418    p = Hp - SIZEOF_StgArrWords;
419    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
420    StgArrWords_words(p) = 1;
421
422    if (val != 0) {
423         s = 1;
424         W_[Hp] = val;
425    } else {
426         s = 0;
427    }
428
429    /* returns (# size  :: Int#, 
430                  data  :: ByteArray# #)
431    */
432    RET_NP(s,p);
433 }
434
435
436 /*
437  * 'long long' primops for converting to/from Integers.
438  */
439
440 #ifdef SUPPORT_LONG_LONGS
441
442 int64ToIntegerzh_fast
443 {
444    /* arguments: L1 = Int64# */
445
446    L_ val;
447    W_ hi, lo, s, neg, words_needed, p;
448
449    val = L1;
450    neg = 0;
451
452    hi = TO_W_(val >> 32);
453    lo = TO_W_(val);
454
455    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
456        words_needed = 2;
457    } else { 
458        // minimum is one word
459        words_needed = 1;
460    }
461
462    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
463                NO_PTRS, int64ToIntegerzh_fast );
464
465    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
466    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
467    StgArrWords_words(p) = words_needed;
468
469    if ( %lt(hi,0) ) {
470      neg = 1;
471      lo = -lo;
472      if(lo == 0) {
473        hi = -hi;
474      } else {
475        hi = -hi - 1;
476      }
477    }
478
479    if ( words_needed == 2 )  { 
480       s = 2;
481       Hp(-1) = lo;
482       Hp(0) = hi;
483    } else { 
484        if ( lo != 0 ) {
485            s = 1;
486            Hp(0) = lo;
487        } else /* val==0 */  {
488            s = 0;
489        }
490    }
491    if ( neg != 0 ) {
492         s = -s;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# #)
497    */
498    RET_NP(s,p);
499 }
500 word64ToIntegerzh_fast
501 {
502    /* arguments: L1 = Word64# */
503
504    L_ val;
505    W_ hi, lo, s, words_needed, p;
506
507    val = L1;
508    hi = TO_W_(val >> 32);
509    lo = TO_W_(val);
510
511    if ( hi != 0 ) {
512       words_needed = 2;
513    } else {
514       words_needed = 1;
515    }
516
517    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
518                NO_PTRS, word64ToIntegerzh_fast );
519
520    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
521    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
522    StgArrWords_words(p) = words_needed;
523
524    if ( hi != 0 ) { 
525      s = 2;
526      Hp(-1) = lo;
527      Hp(0)  = hi;
528    } else {
529       if ( lo != 0 ) {
530         s = 1;
531         Hp(0) = lo;
532      } else /* val==0 */  {
533       s = 0;
534      }
535   }
536
537    /* returns (# size  :: Int#, 
538                  data  :: ByteArray# #)
539    */
540    RET_NP(s,p);
541 }
542
543
544
545 #endif /* SUPPORT_LONG_LONGS */
546
547 /* ToDo: this is shockingly inefficient */
548
549 #ifndef THREADED_RTS
550 section "bss" {
551   mp_tmp1:
552     bits8 [SIZEOF_MP_INT];
553 }
554
555 section "bss" {
556   mp_tmp2:
557     bits8 [SIZEOF_MP_INT];
558 }
559
560 section "bss" {
561   mp_result1:
562     bits8 [SIZEOF_MP_INT];
563 }
564
565 section "bss" {
566   mp_result2:
567     bits8 [SIZEOF_MP_INT];
568 }
569 #endif
570
571 #ifdef THREADED_RTS
572 #define FETCH_MP_TEMP(X) \
573 W_ X; \
574 X = BaseReg + (OFFSET_StgRegTable_r ## X);
575 #else
576 #define FETCH_MP_TEMP(X) /* Nothing */
577 #endif
578
579 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1, s2;                                                          \
583   W_ d1, d2;                                                            \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_tmp2);                                               \
586   FETCH_MP_TEMP(mp_result1)                                             \
587   FETCH_MP_TEMP(mp_result2);                                            \
588                                                                         \
589   /* call doYouWantToGC() */                                            \
590   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
591                                                                         \
592   s1 = W_TO_INT(R1);                                                    \
593   d1 = R2;                                                              \
594   s2 = W_TO_INT(R3);                                                    \
595   d2 = R4;                                                              \
596                                                                         \
597   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
598   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
599   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
600   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
601   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
602   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
603                                                                         \
604   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
605                                                                         \
606   /* Perform the operation */                                           \
607   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
608                                                                         \
609   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
610          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
611 }
612
613 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
614 name                                                                    \
615 {                                                                       \
616   CInt s1;                                                              \
617   W_ d1;                                                                \
618   FETCH_MP_TEMP(mp_tmp1);                                               \
619   FETCH_MP_TEMP(mp_result1)                                             \
620                                                                         \
621   /* call doYouWantToGC() */                                            \
622   MAYBE_GC(R2_PTR, name);                                               \
623                                                                         \
624   d1 = R2;                                                              \
625   s1 = W_TO_INT(R1);                                                    \
626                                                                         \
627   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
628   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
629   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
630                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
632                                                                         \
633   /* Perform the operation */                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
635                                                                         \
636   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
637          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
638 }
639
640 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
641 name                                                                                    \
642 {                                                                                       \
643   CInt s1, s2;                                                                          \
644   W_ d1, d2;                                                                            \
645   FETCH_MP_TEMP(mp_tmp1);                                                               \
646   FETCH_MP_TEMP(mp_tmp2);                                                               \
647   FETCH_MP_TEMP(mp_result1)                                                             \
648   FETCH_MP_TEMP(mp_result2)                                                             \
649                                                                                         \
650   /* call doYouWantToGC() */                                                            \
651   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
652                                                                                         \
653   s1 = W_TO_INT(R1);                                                                    \
654   d1 = R2;                                                                              \
655   s2 = W_TO_INT(R3);                                                                    \
656   d2 = R4;                                                                              \
657                                                                                         \
658   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
659   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
660   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
661   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
662   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
663   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
664                                                                                         \
665   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
666   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
667                                                                                         \
668   /* Perform the operation */                                                           \
669   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
670                                                                                         \
671   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
672            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
673            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
674            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
675 }
676
677 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
678 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
679 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
680 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
681 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
682 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
683 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
684 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
685 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
686 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
687 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
688
689 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
690 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
691
692 #ifndef THREADED_RTS
693 section "bss" {
694   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
695 }
696 #endif
697
698 gcdIntzh_fast
699 {
700     /* R1 = the first Int#; R2 = the second Int# */
701     W_ r; 
702     FETCH_MP_TEMP(mp_tmp_w);
703
704     W_[mp_tmp_w] = R1;
705     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
706
707     R1 = r;
708     /* Result parked in R1, return via info-pointer at TOS */
709     jump %ENTRY_CODE(Sp(0));
710 }
711
712
713 gcdIntegerIntzh_fast
714 {
715     /* R1 = s1; R2 = d1; R3 = the int */
716     W_ s1;
717     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
718     R1 = s1;
719     
720     /* Result parked in R1, return via info-pointer at TOS */
721     jump %ENTRY_CODE(Sp(0));
722 }
723
724
725 cmpIntegerIntzh_fast
726 {
727     /* R1 = s1; R2 = d1; R3 = the int */
728     W_ usize, vsize, v_digit, u_digit;
729
730     usize = R1;
731     vsize = 0;
732     v_digit = R3;
733
734     // paraphrased from __gmpz_cmp_si() in the GMP sources
735     if (%gt(v_digit,0)) {
736         vsize = 1;
737     } else { 
738         if (%lt(v_digit,0)) {
739             vsize = -1;
740             v_digit = -v_digit;
741         }
742     }
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     u_digit = W_[BYTE_ARR_CTS(R2)];
755
756     if (u_digit == v_digit) {
757         R1 = 0; 
758         jump %ENTRY_CODE(Sp(0));
759     }
760
761     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
762         R1 = usize; 
763     } else {
764         R1 = -usize; 
765     }
766
767     jump %ENTRY_CODE(Sp(0));
768 }
769
770 cmpIntegerzh_fast
771 {
772     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
773     W_ usize, vsize, size, up, vp;
774     CInt cmp;
775
776     // paraphrased from __gmpz_cmp() in the GMP sources
777     usize = R1;
778     vsize = R3;
779
780     if (usize != vsize) {
781         R1 = usize - vsize; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (usize == 0) {
786         R1 = 0; 
787         jump %ENTRY_CODE(Sp(0));
788     }
789
790     if (%lt(usize,0)) { // NB. not <, which is unsigned
791         size = -usize;
792     } else {
793         size = usize;
794     }
795
796     up = BYTE_ARR_CTS(R2);
797     vp = BYTE_ARR_CTS(R4);
798
799     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
800
801     if (cmp == 0 :: CInt) {
802         R1 = 0; 
803         jump %ENTRY_CODE(Sp(0));
804     }
805
806     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
807         R1 = 1;
808     } else {
809         R1 = (-1); 
810     }
811     /* Result parked in R1, return via info-pointer at TOS */
812     jump %ENTRY_CODE(Sp(0));
813 }
814
815 integer2Intzh_fast
816 {
817     /* R1 = s; R2 = d */
818     W_ r, s;
819
820     s = R1;
821     if (s == 0) {
822         r = 0;
823     } else {
824         r = W_[R2 + SIZEOF_StgArrWords];
825         if (%lt(s,0)) {
826             r = -r;
827         }
828     }
829     /* Result parked in R1, return via info-pointer at TOS */
830     R1 = r;
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 integer2Wordzh_fast
835 {
836   /* R1 = s; R2 = d */
837   W_ r, s;
838
839   s = R1;
840   if (s == 0) {
841     r = 0;
842   } else {
843     r = W_[R2 + SIZEOF_StgArrWords];
844     if (%lt(s,0)) {
845         r = -r;
846     }
847   }
848   /* Result parked in R1, return via info-pointer at TOS */
849   R1 = r;
850   jump %ENTRY_CODE(Sp(0));
851 }
852
853 decodeFloatzh_fast
854
855     W_ p;
856     F_ arg;
857     FETCH_MP_TEMP(mp_tmp1);
858     FETCH_MP_TEMP(mp_tmp_w);
859     
860     /* arguments: F1 = Float# */
861     arg = F1;
862     
863     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
864     
865     /* Be prepared to tell Lennart-coded __decodeFloat
866        where mantissa._mp_d can be put (it does not care about the rest) */
867     p = Hp - SIZEOF_StgArrWords;
868     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
869     StgArrWords_words(p) = 1;
870     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
871     
872     /* Perform the operation */
873     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
874     
875     /* returns: (Int# (expn), Int#, ByteArray#) */
876     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
877 }
878
879 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
880 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
881
882 decodeDoublezh_fast
883
884     D_ arg;
885     W_ p;
886     FETCH_MP_TEMP(mp_tmp1);
887     FETCH_MP_TEMP(mp_tmp_w);
888
889     /* arguments: D1 = Double# */
890     arg = D1;
891
892     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
893     
894     /* Be prepared to tell Lennart-coded __decodeDouble
895        where mantissa.d can be put (it does not care about the rest) */
896     p = Hp - ARR_SIZE + WDS(1);
897     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
898     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
899     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
900
901     /* Perform the operation */
902     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
903     
904     /* returns: (Int# (expn), Int#, ByteArray#) */
905     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
906 }
907
908 /* -----------------------------------------------------------------------------
909  * Concurrency primitives
910  * -------------------------------------------------------------------------- */
911
912 forkzh_fast
913 {
914   /* args: R1 = closure to spark */
915
916   MAYBE_GC(R1_PTR, forkzh_fast);
917
918   W_ closure;
919   W_ threadid;
920   closure = R1;
921
922   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
923                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
924                                 closure "ptr") [];
925
926   /* start blocked if the current thread is blocked */
927   StgTSO_flags(threadid) = 
928      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
929                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
930
931   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
932
933   // switch at the earliest opportunity
934   CInt[context_switch] = 1 :: CInt;
935   
936   RET_P(threadid);
937 }
938
939 forkOnzh_fast
940 {
941   /* args: R1 = cpu, R2 = closure to spark */
942
943   MAYBE_GC(R2_PTR, forkOnzh_fast);
944
945   W_ cpu;
946   W_ closure;
947   W_ threadid;
948   cpu = R1;
949   closure = R2;
950
951   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
952                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
953                                 closure "ptr") [];
954
955   /* start blocked if the current thread is blocked */
956   StgTSO_flags(threadid) = 
957      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
958                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
959
960   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
961
962   // switch at the earliest opportunity
963   CInt[context_switch] = 1 :: CInt;
964   
965   RET_P(threadid);
966 }
967
968 yieldzh_fast
969 {
970   jump stg_yield_noregs;
971 }
972
973 myThreadIdzh_fast
974 {
975   /* no args. */
976   RET_P(CurrentTSO);
977 }
978
979 labelThreadzh_fast
980 {
981   /* args: 
982         R1 = ThreadId#
983         R2 = Addr# */
984 #ifdef DEBUG
985   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
986 #endif
987   jump %ENTRY_CODE(Sp(0));
988 }
989
990 isCurrentThreadBoundzh_fast
991 {
992   /* no args */
993   W_ r;
994   (r) = foreign "C" isThreadBound(CurrentTSO) [];
995   RET_N(r);
996 }
997
998
999 /* -----------------------------------------------------------------------------
1000  * TVar primitives
1001  * -------------------------------------------------------------------------- */
1002
1003 #ifdef REG_R1
1004 #define SP_OFF 0
1005 #define IF_NOT_REG_R1(x) 
1006 #else
1007 #define SP_OFF 1
1008 #define IF_NOT_REG_R1(x) x
1009 #endif
1010
1011 // Catch retry frame ------------------------------------------------------------
1012
1013 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1014 #if defined(PROFILING)
1015   W_ unused1, W_ unused2,
1016 #endif
1017   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1018 {
1019    W_ r, frame, trec, outer;
1020    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1021
1022    frame = Sp;
1023    trec = StgTSO_trec(CurrentTSO);
1024    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1025    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1026    if (r != 0) {
1027      /* Succeeded (either first branch or second branch) */
1028      StgTSO_trec(CurrentTSO) = outer;
1029      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1030      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1031      jump %ENTRY_CODE(Sp(SP_OFF));
1032    } else {
1033      /* Did not commit: re-execute */
1034      W_ new_trec;
1035      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1036      StgTSO_trec(CurrentTSO) = new_trec;
1037      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1038        R1 = StgCatchRetryFrame_alt_code(frame);
1039      } else {
1040        R1 = StgCatchRetryFrame_first_code(frame);
1041      }
1042      jump stg_ap_v_fast;
1043    }
1044 }
1045
1046
1047 // Atomically frame ------------------------------------------------------------
1048
1049 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1050 #if defined(PROFILING)
1051   W_ unused1, W_ unused2,
1052 #endif
1053   "ptr" W_ unused3, "ptr" W_ unused4)
1054 {
1055   W_ frame, trec, valid, next_invariant, q, outer;
1056   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1057
1058   frame = Sp;
1059   trec = StgTSO_trec(CurrentTSO);
1060   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1061
1062   if (outer == NO_TREC) {
1063     /* First time back at the atomically frame -- pick up invariants */
1064     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1065     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1066
1067   } else {
1068     /* Second/subsequent time back at the atomically frame -- abort the
1069      * tx that's checking the invariant and move on to the next one */
1070     StgTSO_trec(CurrentTSO) = outer;
1071     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1072     StgInvariantCheckQueue_my_execution(q) = trec;
1073     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1074     /* Don't free trec -- it's linked from q and will be stashed in the
1075      * invariant if we eventually commit. */
1076     q = StgInvariantCheckQueue_next_queue_entry(q);
1077     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1078     trec = outer;
1079   }
1080
1081   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1082
1083   if (q != END_INVARIANT_CHECK_QUEUE) {
1084     /* We can't commit yet: another invariant to check */
1085     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1086     StgTSO_trec(CurrentTSO) = trec;
1087
1088     next_invariant = StgInvariantCheckQueue_invariant(q);
1089     R1 = StgAtomicInvariant_code(next_invariant);
1090     jump stg_ap_v_fast;
1091
1092   } else {
1093
1094     /* We've got no more invariants to check, try to commit */
1095     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1096     if (valid != 0) {
1097       /* Transaction was valid: commit succeeded */
1098       StgTSO_trec(CurrentTSO) = NO_TREC;
1099       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1100       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1101       jump %ENTRY_CODE(Sp(SP_OFF));
1102     } else {
1103       /* Transaction was not valid: try again */
1104       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1105       StgTSO_trec(CurrentTSO) = trec;
1106       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1107       R1 = StgAtomicallyFrame_code(frame);
1108       jump stg_ap_v_fast;
1109     }
1110   }
1111 }
1112
1113 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1114 #if defined(PROFILING)
1115   W_ unused1, W_ unused2,
1116 #endif
1117   "ptr" W_ unused3, "ptr" W_ unused4)
1118 {
1119   W_ frame, trec, valid;
1120   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1121
1122   frame = Sp;
1123
1124   /* The TSO is currently waiting: should we stop waiting? */
1125   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1126   if (valid != 0) {
1127     /* Previous attempt is still valid: no point trying again yet */
1128           IF_NOT_REG_R1(Sp_adj(-2);
1129                         Sp(1) = stg_NO_FINALIZER_closure;
1130                         Sp(0) = stg_ut_1_0_unreg_info;)
1131     jump stg_block_noregs;
1132   } else {
1133     /* Previous attempt is no longer valid: try again */
1134     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1135     StgTSO_trec(CurrentTSO) = trec;
1136     StgHeader_info(frame) = stg_atomically_frame_info;
1137     R1 = StgAtomicallyFrame_code(frame);
1138     jump stg_ap_v_fast;
1139   }
1140 }
1141
1142 // STM catch frame --------------------------------------------------------------
1143
1144 #ifdef REG_R1
1145 #define SP_OFF 0
1146 #else
1147 #define SP_OFF 1
1148 #endif
1149
1150 /* Catch frames are very similar to update frames, but when entering
1151  * one we just pop the frame off the stack and perform the correct
1152  * kind of return to the activation record underneath us on the stack.
1153  */
1154
1155 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1156 #if defined(PROFILING)
1157   W_ unused1, W_ unused2,
1158 #endif
1159   "ptr" W_ unused3, "ptr" W_ unused4)
1160    {
1161       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1162       W_ r, frame, trec, outer;
1163       frame = Sp;
1164       trec = StgTSO_trec(CurrentTSO);
1165       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1166       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1167       if (r != 0) {
1168         /* Commit succeeded */
1169         StgTSO_trec(CurrentTSO) = outer;
1170         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1171         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1172         jump Sp(SP_OFF);
1173       } else {
1174         /* Commit failed */
1175         W_ new_trec;
1176         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1177         StgTSO_trec(CurrentTSO) = new_trec;
1178         R1 = StgCatchSTMFrame_code(frame);
1179         jump stg_ap_v_fast;
1180       }
1181    }
1182
1183
1184 // Primop definition ------------------------------------------------------------
1185
1186 atomicallyzh_fast
1187 {
1188   W_ frame;
1189   W_ old_trec;
1190   W_ new_trec;
1191   
1192   // stmStartTransaction may allocate
1193   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1194
1195   /* Args: R1 = m :: STM a */
1196   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1197
1198   old_trec = StgTSO_trec(CurrentTSO);
1199
1200   /* Nested transactions are not allowed; raise an exception */
1201   if (old_trec != NO_TREC) {
1202      R1 = base_GHCziIOBase_NestedAtomically_closure;
1203      jump raisezh_fast;
1204   }
1205
1206   /* Set up the atomically frame */
1207   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1208   frame = Sp;
1209
1210   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1211   StgAtomicallyFrame_code(frame) = R1;
1212   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1213
1214   /* Start the memory transcation */
1215   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1216   StgTSO_trec(CurrentTSO) = new_trec;
1217
1218   /* Apply R1 to the realworld token */
1219   jump stg_ap_v_fast;
1220 }
1221
1222
1223 catchSTMzh_fast
1224 {
1225   W_ frame;
1226   
1227   /* Args: R1 :: STM a */
1228   /* Args: R2 :: Exception -> STM a */
1229   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1230
1231   /* Set up the catch frame */
1232   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1233   frame = Sp;
1234
1235   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1236   StgCatchSTMFrame_handler(frame) = R2;
1237   StgCatchSTMFrame_code(frame) = R1;
1238
1239   /* Start a nested transaction to run the body of the try block in */
1240   W_ cur_trec;  
1241   W_ new_trec;
1242   cur_trec = StgTSO_trec(CurrentTSO);
1243   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1244   StgTSO_trec(CurrentTSO) = new_trec;
1245
1246   /* Apply R1 to the realworld token */
1247   jump stg_ap_v_fast;
1248 }
1249
1250
1251 catchRetryzh_fast
1252 {
1253   W_ frame;
1254   W_ new_trec;
1255   W_ trec;
1256
1257   // stmStartTransaction may allocate
1258   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1259
1260   /* Args: R1 :: STM a */
1261   /* Args: R2 :: STM a */
1262   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1263
1264   /* Start a nested transaction within which to run the first code */
1265   trec = StgTSO_trec(CurrentTSO);
1266   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1267   StgTSO_trec(CurrentTSO) = new_trec;
1268
1269   /* Set up the catch-retry frame */
1270   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1271   frame = Sp;
1272   
1273   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1274   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1275   StgCatchRetryFrame_first_code(frame) = R1;
1276   StgCatchRetryFrame_alt_code(frame) = R2;
1277
1278   /* Apply R1 to the realworld token */
1279   jump stg_ap_v_fast;
1280 }
1281
1282
1283 retryzh_fast
1284 {
1285   W_ frame_type;
1286   W_ frame;
1287   W_ trec;
1288   W_ outer;
1289   W_ r;
1290
1291   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1292
1293   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1294 retry_pop_stack:
1295   StgTSO_sp(CurrentTSO) = Sp;
1296   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1297   Sp = StgTSO_sp(CurrentTSO);
1298   frame = Sp;
1299   trec = StgTSO_trec(CurrentTSO);
1300   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1301
1302   if (frame_type == CATCH_RETRY_FRAME) {
1303     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1304     ASSERT(outer != NO_TREC);
1305     // Abort the transaction attempting the current branch
1306     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1307     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1308     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1309       // Retry in the first branch: try the alternative
1310       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1311       StgTSO_trec(CurrentTSO) = trec;
1312       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1313       R1 = StgCatchRetryFrame_alt_code(frame);
1314       jump stg_ap_v_fast;
1315     } else {
1316       // Retry in the alternative code: propagate the retry
1317       StgTSO_trec(CurrentTSO) = outer;
1318       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1319       goto retry_pop_stack;
1320     }
1321   }
1322
1323   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1324   ASSERT(frame_type == ATOMICALLY_FRAME);
1325   if (outer != NO_TREC) {
1326     // We called retry while checking invariants, so abort the current
1327     // invariant check (merging its TVar accesses into the parents read
1328     // set so we'll wait on them)
1329     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1330     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1331     trec = outer;
1332     StgTSO_trec(CurrentTSO) = trec;
1333     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1334   }
1335   ASSERT(outer == NO_TREC);
1336
1337   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1338   if (r != 0) {
1339     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1340     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1341     Sp = frame;
1342     // Fix up the stack in the unregisterised case: the return convention is different.
1343     IF_NOT_REG_R1(Sp_adj(-2); 
1344                   Sp(1) = stg_NO_FINALIZER_closure;
1345                   Sp(0) = stg_ut_1_0_unreg_info;)
1346     R3 = trec; // passing to stmWaitUnblock()
1347     jump stg_block_stmwait;
1348   } else {
1349     // Transaction was not valid: retry immediately
1350     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1351     StgTSO_trec(CurrentTSO) = trec;
1352     R1 = StgAtomicallyFrame_code(frame);
1353     Sp = frame;
1354     jump stg_ap_v_fast;
1355   }
1356 }
1357
1358
1359 checkzh_fast
1360 {
1361   W_ trec, closure;
1362
1363   /* Args: R1 = invariant closure */
1364   MAYBE_GC (R1_PTR, checkzh_fast); 
1365
1366   trec = StgTSO_trec(CurrentTSO);
1367   closure = R1;
1368   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1369                                      trec "ptr",
1370                                      closure "ptr") [];
1371
1372   jump %ENTRY_CODE(Sp(0));
1373 }
1374
1375
1376 newTVarzh_fast
1377 {
1378   W_ tv;
1379   W_ new_value;
1380
1381   /* Args: R1 = initialisation value */
1382
1383   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1384   new_value = R1;
1385   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1386   RET_P(tv);
1387 }
1388
1389
1390 readTVarzh_fast
1391 {
1392   W_ trec;
1393   W_ tvar;
1394   W_ result;
1395
1396   /* Args: R1 = TVar closure */
1397
1398   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1399   trec = StgTSO_trec(CurrentTSO);
1400   tvar = R1;
1401   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1402
1403   RET_P(result);
1404 }
1405
1406
1407 writeTVarzh_fast
1408 {
1409   W_ trec;
1410   W_ tvar;
1411   W_ new_value;
1412   
1413   /* Args: R1 = TVar closure */
1414   /*       R2 = New value    */
1415
1416   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1417   trec = StgTSO_trec(CurrentTSO);
1418   tvar = R1;
1419   new_value = R2;
1420   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1421
1422   jump %ENTRY_CODE(Sp(0));
1423 }
1424
1425
1426 /* -----------------------------------------------------------------------------
1427  * MVar primitives
1428  *
1429  * take & putMVar work as follows.  Firstly, an important invariant:
1430  *
1431  *    If the MVar is full, then the blocking queue contains only
1432  *    threads blocked on putMVar, and if the MVar is empty then the
1433  *    blocking queue contains only threads blocked on takeMVar.
1434  *
1435  * takeMvar:
1436  *    MVar empty : then add ourselves to the blocking queue
1437  *    MVar full  : remove the value from the MVar, and
1438  *                 blocking queue empty     : return
1439  *                 blocking queue non-empty : perform the first blocked putMVar
1440  *                                            from the queue, and wake up the
1441  *                                            thread (MVar is now full again)
1442  *
1443  * putMVar is just the dual of the above algorithm.
1444  *
1445  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1446  * the stack of the thread waiting to do the putMVar.  See
1447  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1448  * the stack layout, and the PerformPut and PerformTake macros below.
1449  *
1450  * It is important that a blocked take or put is woken up with the
1451  * take/put already performed, because otherwise there would be a
1452  * small window of vulnerability where the thread could receive an
1453  * exception and never perform its take or put, and we'd end up with a
1454  * deadlock.
1455  *
1456  * -------------------------------------------------------------------------- */
1457
1458 isEmptyMVarzh_fast
1459 {
1460     /* args: R1 = MVar closure */
1461
1462     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1463         RET_N(1);
1464     } else {
1465         RET_N(0);
1466     }
1467 }
1468
1469 newMVarzh_fast
1470 {
1471     /* args: none */
1472     W_ mvar;
1473
1474     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1475   
1476     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1477     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1478         // MVARs start dirty: generation 0 has no mutable list
1479     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1480     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1481     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1482     RET_P(mvar);
1483 }
1484
1485
1486 /* If R1 isn't available, pass it on the stack */
1487 #ifdef REG_R1
1488 #define PerformTake(tso, value)                         \
1489     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1490     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1491 #else
1492 #define PerformTake(tso, value)                                 \
1493     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1494     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1495 #endif
1496
1497 #define PerformPut(tso,lval)                    \
1498     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1499     lval = W_[StgTSO_sp(tso) - WDS(1)];
1500
1501 takeMVarzh_fast
1502 {
1503     W_ mvar, val, info, tso;
1504
1505     /* args: R1 = MVar closure */
1506     mvar = R1;
1507
1508 #if defined(THREADED_RTS)
1509     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1510 #else
1511     info = GET_INFO(mvar);
1512 #endif
1513         
1514     if (info == stg_MVAR_CLEAN_info) {
1515         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1516     }
1517
1518     /* If the MVar is empty, put ourselves on its blocking queue,
1519      * and wait until we're woken up.
1520      */
1521     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1522         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1523             StgMVar_head(mvar) = CurrentTSO;
1524         } else {
1525             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1526         }
1527         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1528         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1529         StgTSO_block_info(CurrentTSO)  = mvar;
1530         StgMVar_tail(mvar) = CurrentTSO;
1531         
1532         jump stg_block_takemvar;
1533   }
1534
1535   /* we got the value... */
1536   val = StgMVar_value(mvar);
1537
1538   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1539   {
1540       /* There are putMVar(s) waiting... 
1541        * wake up the first thread on the queue
1542        */
1543       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1544
1545       /* actually perform the putMVar for the thread that we just woke up */
1546       tso = StgMVar_head(mvar);
1547       PerformPut(tso,StgMVar_value(mvar));
1548       dirtyTSO(tso);
1549
1550 #if defined(GRAN) || defined(PAR)
1551       /* ToDo: check 2nd arg (mvar) is right */
1552       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1553       StgMVar_head(mvar) = tso;
1554 #else
1555       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1556                                          StgMVar_head(mvar) "ptr") [];
1557       StgMVar_head(mvar) = tso;
1558 #endif
1559
1560       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1561           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1562       }
1563
1564 #if defined(THREADED_RTS)
1565       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1566 #else
1567       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1568 #endif
1569       RET_P(val);
1570   } 
1571   else
1572   {
1573       /* No further putMVars, MVar is now empty */
1574       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1575  
1576 #if defined(THREADED_RTS)
1577       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1578 #else
1579       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1580 #endif
1581
1582       RET_P(val);
1583   }
1584 }
1585
1586
1587 tryTakeMVarzh_fast
1588 {
1589     W_ mvar, val, info, tso;
1590
1591     /* args: R1 = MVar closure */
1592
1593     mvar = R1;
1594
1595 #if defined(THREADED_RTS)
1596     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1597 #else
1598     info = GET_INFO(mvar);
1599 #endif
1600
1601     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1602 #if defined(THREADED_RTS)
1603         unlockClosure(mvar, info);
1604 #endif
1605         /* HACK: we need a pointer to pass back, 
1606          * so we abuse NO_FINALIZER_closure
1607          */
1608         RET_NP(0, stg_NO_FINALIZER_closure);
1609     }
1610
1611     if (info == stg_MVAR_CLEAN_info) {
1612         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1613     }
1614
1615     /* we got the value... */
1616     val = StgMVar_value(mvar);
1617
1618     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1619
1620         /* There are putMVar(s) waiting... 
1621          * wake up the first thread on the queue
1622          */
1623         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1624
1625         /* actually perform the putMVar for the thread that we just woke up */
1626         tso = StgMVar_head(mvar);
1627         PerformPut(tso,StgMVar_value(mvar));
1628         dirtyTSO(tso);
1629
1630 #if defined(GRAN) || defined(PAR)
1631         /* ToDo: check 2nd arg (mvar) is right */
1632         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1633         StgMVar_head(mvar) = tso;
1634 #else
1635         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1636                                            StgMVar_head(mvar) "ptr") [];
1637         StgMVar_head(mvar) = tso;
1638 #endif
1639
1640         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1641             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1642         }
1643 #if defined(THREADED_RTS)
1644         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1645 #else
1646         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1647 #endif
1648     }
1649     else 
1650     {
1651         /* No further putMVars, MVar is now empty */
1652         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1653 #if defined(THREADED_RTS)
1654         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1655 #else
1656         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1657 #endif
1658     }
1659     
1660     RET_NP(1, val);
1661 }
1662
1663
1664 putMVarzh_fast
1665 {
1666     W_ mvar, info, tso;
1667
1668     /* args: R1 = MVar, R2 = value */
1669     mvar = R1;
1670
1671 #if defined(THREADED_RTS)
1672     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1673 #else
1674     info = GET_INFO(mvar);
1675 #endif
1676
1677     if (info == stg_MVAR_CLEAN_info) {
1678         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1679     }
1680
1681     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1682         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1683             StgMVar_head(mvar) = CurrentTSO;
1684         } else {
1685             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1686         }
1687         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1688         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1689         StgTSO_block_info(CurrentTSO)  = mvar;
1690         StgMVar_tail(mvar) = CurrentTSO;
1691         
1692         jump stg_block_putmvar;
1693     }
1694   
1695     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1696
1697         /* There are takeMVar(s) waiting: wake up the first one
1698          */
1699         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1700
1701         /* actually perform the takeMVar */
1702         tso = StgMVar_head(mvar);
1703         PerformTake(tso, R2);
1704         dirtyTSO(tso);
1705       
1706 #if defined(GRAN) || defined(PAR)
1707         /* ToDo: check 2nd arg (mvar) is right */
1708         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1709         StgMVar_head(mvar) = tso;
1710 #else
1711         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1712         StgMVar_head(mvar) = tso;
1713 #endif
1714
1715         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1716             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1717         }
1718
1719 #if defined(THREADED_RTS)
1720         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1721 #else
1722         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1723 #endif
1724         jump %ENTRY_CODE(Sp(0));
1725     }
1726     else
1727     {
1728         /* No further takes, the MVar is now full. */
1729         StgMVar_value(mvar) = R2;
1730
1731 #if defined(THREADED_RTS)
1732         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1733 #else
1734         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1735 #endif
1736         jump %ENTRY_CODE(Sp(0));
1737     }
1738     
1739     /* ToDo: yield afterward for better communication performance? */
1740 }
1741
1742
1743 tryPutMVarzh_fast
1744 {
1745     W_ mvar, info, tso;
1746
1747     /* args: R1 = MVar, R2 = value */
1748     mvar = R1;
1749
1750 #if defined(THREADED_RTS)
1751     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1752 #else
1753     info = GET_INFO(mvar);
1754 #endif
1755
1756     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1757 #if defined(THREADED_RTS)
1758         unlockClosure(mvar, info);
1759 #endif
1760         RET_N(0);
1761     }
1762   
1763     if (info == stg_MVAR_CLEAN_info) {
1764         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1765     }
1766
1767     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1768
1769         /* There are takeMVar(s) waiting: wake up the first one
1770          */
1771         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1772         
1773         /* actually perform the takeMVar */
1774         tso = StgMVar_head(mvar);
1775         PerformTake(tso, R2);
1776         dirtyTSO(tso);
1777       
1778 #if defined(GRAN) || defined(PAR)
1779         /* ToDo: check 2nd arg (mvar) is right */
1780         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1781         StgMVar_head(mvar) = tso;
1782 #else
1783         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1784         StgMVar_head(mvar) = tso;
1785 #endif
1786
1787         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1788             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1789         }
1790
1791 #if defined(THREADED_RTS)
1792         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1793 #else
1794         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1795 #endif
1796     }
1797     else
1798     {
1799         /* No further takes, the MVar is now full. */
1800         StgMVar_value(mvar) = R2;
1801
1802 #if defined(THREADED_RTS)
1803         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1804 #else
1805         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1806 #endif
1807     }
1808     
1809     RET_N(1);
1810     /* ToDo: yield afterward for better communication performance? */
1811 }
1812
1813
1814 /* -----------------------------------------------------------------------------
1815    Stable pointer primitives
1816    -------------------------------------------------------------------------  */
1817
1818 makeStableNamezh_fast
1819 {
1820     W_ index, sn_obj;
1821
1822     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1823   
1824     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1825
1826     /* Is there already a StableName for this heap object?
1827      *  stable_ptr_table is a pointer to an array of snEntry structs.
1828      */
1829     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1830         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1831         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1832         StgStableName_sn(sn_obj) = index;
1833         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1834     } else {
1835         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1836     }
1837     
1838     RET_P(sn_obj);
1839 }
1840
1841
1842 makeStablePtrzh_fast
1843 {
1844     /* Args: R1 = a */
1845     W_ sp;
1846     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1847     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1848     RET_N(sp);
1849 }
1850
1851 deRefStablePtrzh_fast
1852 {
1853     /* Args: R1 = the stable ptr */
1854     W_ r, sp;
1855     sp = R1;
1856     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1857     RET_P(r);
1858 }
1859
1860 /* -----------------------------------------------------------------------------
1861    Bytecode object primitives
1862    -------------------------------------------------------------------------  */
1863
1864 newBCOzh_fast
1865 {
1866     /* R1 = instrs
1867        R2 = literals
1868        R3 = ptrs
1869        R4 = arity
1870        R5 = bitmap array
1871     */
1872     W_ bco, bitmap_arr, bytes, words;
1873     
1874     bitmap_arr = R5;
1875
1876     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1877     bytes = WDS(words);
1878
1879     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1880
1881     bco = Hp - bytes + WDS(1);
1882     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1883     
1884     StgBCO_instrs(bco)     = R1;
1885     StgBCO_literals(bco)   = R2;
1886     StgBCO_ptrs(bco)       = R3;
1887     StgBCO_arity(bco)      = HALF_W_(R4);
1888     StgBCO_size(bco)       = HALF_W_(words);
1889     
1890     // Copy the arity/bitmap info into the BCO
1891     W_ i;
1892     i = 0;
1893 for:
1894     if (i < StgArrWords_words(bitmap_arr)) {
1895         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1896         i = i + 1;
1897         goto for;
1898     }
1899     
1900     RET_P(bco);
1901 }
1902
1903
1904 mkApUpd0zh_fast
1905 {
1906     // R1 = the BCO# for the AP
1907     //  
1908     W_ ap;
1909
1910     // This function is *only* used to wrap zero-arity BCOs in an
1911     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1912     // saturated and always points directly to a FUN or BCO.
1913     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1914            StgBCO_arity(R1) == HALF_W_(0));
1915
1916     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1917     TICK_ALLOC_UP_THK(0, 0);
1918     CCCS_ALLOC(SIZEOF_StgAP);
1919
1920     ap = Hp - SIZEOF_StgAP + WDS(1);
1921     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1922     
1923     StgAP_n_args(ap) = HALF_W_(0);
1924     StgAP_fun(ap) = R1;
1925     
1926     RET_P(ap);
1927 }
1928
1929 unpackClosurezh_fast
1930 {
1931 /* args: R1 = closure to analyze */
1932 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1933
1934     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1935     info  = %GET_STD_INFO(UNTAG(R1));
1936
1937     // Some closures have non-standard layout, so we omit those here.
1938     W_ type;
1939     type = TO_W_(%INFO_TYPE(info));
1940     switch [0 .. N_CLOSURE_TYPES] type {
1941     case THUNK_SELECTOR : {
1942         ptrs = 1;
1943         nptrs = 0;
1944         goto out;
1945     }
1946     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1947          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1948         ptrs = 0;
1949         nptrs = 0;
1950         goto out;
1951     }
1952     default: {
1953         ptrs  = TO_W_(%INFO_PTRS(info)); 
1954         nptrs = TO_W_(%INFO_NPTRS(info));
1955         goto out;
1956     }}
1957 out:
1958
1959     W_ ptrs_arr_sz, nptrs_arr_sz;
1960     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1961     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1962
1963     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1964
1965     W_ clos;
1966     clos = UNTAG(R1);
1967
1968     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1969     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1970
1971     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1972     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1973     p = 0;
1974 for:
1975     if(p < ptrs) {
1976          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1977          p = p + 1;
1978          goto for;
1979     }
1980     
1981     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1982     StgArrWords_words(nptrs_arr) = nptrs;
1983     p = 0;
1984 for2:
1985     if(p < nptrs) {
1986          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1987          p = p + 1;
1988          goto for2;
1989     }
1990     RET_NPP(info, ptrs_arr, nptrs_arr);
1991 }
1992
1993 /* -----------------------------------------------------------------------------
1994    Thread I/O blocking primitives
1995    -------------------------------------------------------------------------- */
1996
1997 /* Add a thread to the end of the blocked queue. (C-- version of the C
1998  * macro in Schedule.h).
1999  */
2000 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2001     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
2002     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2003       W_[blocked_queue_hd] = tso;                       \
2004     } else {                                            \
2005       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
2006     }                                                   \
2007     W_[blocked_queue_tl] = tso;
2008
2009 waitReadzh_fast
2010 {
2011     /* args: R1 */
2012 #ifdef THREADED_RTS
2013     foreign "C" barf("waitRead# on threaded RTS") never returns;
2014 #else
2015
2016     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2017     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2018     StgTSO_block_info(CurrentTSO) = R1;
2019     // No locking - we're not going to use this interface in the
2020     // threaded RTS anyway.
2021     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2022     jump stg_block_noregs;
2023 #endif
2024 }
2025
2026 waitWritezh_fast
2027 {
2028     /* args: R1 */
2029 #ifdef THREADED_RTS
2030     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2031 #else
2032
2033     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2034     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2035     StgTSO_block_info(CurrentTSO) = R1;
2036     // No locking - we're not going to use this interface in the
2037     // threaded RTS anyway.
2038     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2039     jump stg_block_noregs;
2040 #endif
2041 }
2042
2043
2044 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2045 delayzh_fast
2046 {
2047 #ifdef mingw32_HOST_OS
2048     W_ ares;
2049     CInt reqID;
2050 #else
2051     W_ t, prev, target;
2052 #endif
2053
2054 #ifdef THREADED_RTS
2055     foreign "C" barf("delay# on threaded RTS") never returns;
2056 #else
2057
2058     /* args: R1 (microsecond delay amount) */
2059     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2060     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2061
2062 #ifdef mingw32_HOST_OS
2063
2064     /* could probably allocate this on the heap instead */
2065     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2066                                             stg_delayzh_malloc_str);
2067     (reqID) = foreign "C" addDelayRequest(R1);
2068     StgAsyncIOResult_reqID(ares)   = reqID;
2069     StgAsyncIOResult_len(ares)     = 0;
2070     StgAsyncIOResult_errCode(ares) = 0;
2071     StgTSO_block_info(CurrentTSO)  = ares;
2072
2073     /* Having all async-blocked threads reside on the blocked_queue
2074      * simplifies matters, so change the status to OnDoProc put the
2075      * delayed thread on the blocked_queue.
2076      */
2077     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2078     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2079     jump stg_block_async_void;
2080
2081 #else
2082
2083     W_ time;
2084     W_ divisor;
2085     (time) = foreign "C" getourtimeofday() [R1];
2086     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2087     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2088            + time + 1; /* Add 1 as getourtimeofday rounds down */
2089     StgTSO_block_info(CurrentTSO) = target;
2090
2091     /* Insert the new thread in the sleeping queue. */
2092     prev = NULL;
2093     t = W_[sleeping_queue];
2094 while:
2095     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2096         prev = t;
2097         t = StgTSO_link(t);
2098         goto while;
2099     }
2100
2101     StgTSO_link(CurrentTSO) = t;
2102     if (prev == NULL) {
2103         W_[sleeping_queue] = CurrentTSO;
2104     } else {
2105         StgTSO_link(prev) = CurrentTSO;
2106     }
2107     jump stg_block_noregs;
2108 #endif
2109 #endif /* !THREADED_RTS */
2110 }
2111
2112
2113 #ifdef mingw32_HOST_OS
2114 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2115 asyncReadzh_fast
2116 {
2117     W_ ares;
2118     CInt reqID;
2119
2120 #ifdef THREADED_RTS
2121     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2122 #else
2123
2124     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2125     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2126     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2127
2128     /* could probably allocate this on the heap instead */
2129     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2130                                             stg_asyncReadzh_malloc_str)
2131                         [R1,R2,R3,R4];
2132     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2133     StgAsyncIOResult_reqID(ares)   = reqID;
2134     StgAsyncIOResult_len(ares)     = 0;
2135     StgAsyncIOResult_errCode(ares) = 0;
2136     StgTSO_block_info(CurrentTSO)  = ares;
2137     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2138     jump stg_block_async;
2139 #endif
2140 }
2141
2142 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2143 asyncWritezh_fast
2144 {
2145     W_ ares;
2146     CInt reqID;
2147
2148 #ifdef THREADED_RTS
2149     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2150 #else
2151
2152     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2153     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2154     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2155
2156     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2157                                             stg_asyncWritezh_malloc_str)
2158                         [R1,R2,R3,R4];
2159     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2160
2161     StgAsyncIOResult_reqID(ares)   = reqID;
2162     StgAsyncIOResult_len(ares)     = 0;
2163     StgAsyncIOResult_errCode(ares) = 0;
2164     StgTSO_block_info(CurrentTSO)  = ares;
2165     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2166     jump stg_block_async;
2167 #endif
2168 }
2169
2170 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2171 asyncDoProczh_fast
2172 {
2173     W_ ares;
2174     CInt reqID;
2175
2176 #ifdef THREADED_RTS
2177     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2178 #else
2179
2180     /* args: R1 = proc, R2 = param */
2181     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2182     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2183
2184     /* could probably allocate this on the heap instead */
2185     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2186                                             stg_asyncDoProczh_malloc_str) 
2187                                 [R1,R2];
2188     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2189     StgAsyncIOResult_reqID(ares)   = reqID;
2190     StgAsyncIOResult_len(ares)     = 0;
2191     StgAsyncIOResult_errCode(ares) = 0;
2192     StgTSO_block_info(CurrentTSO) = ares;
2193     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2194     jump stg_block_async;
2195 #endif
2196 }
2197 #endif
2198
2199 // noDuplicate# tries to ensure that none of the thunks under
2200 // evaluation by the current thread are also under evaluation by
2201 // another thread.  It relies on *both* threads doing noDuplicate#;
2202 // the second one will get blocked if they are duplicating some work.
2203 noDuplicatezh_fast
2204 {
2205     SAVE_THREAD_STATE();
2206     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2207     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2208     
2209     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2210         jump stg_threadFinished;
2211     } else {
2212         LOAD_THREAD_STATE();
2213         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2214         jump %ENTRY_CODE(Sp(0));
2215     }
2216 }
2217
2218 getApStackValzh_fast
2219 {
2220    W_ ap_stack, offset, val, ok;
2221
2222    /* args: R1 = AP_STACK, R2 = offset */
2223    ap_stack = R1;
2224    offset   = R2;
2225
2226    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2227         ok = 1;
2228         val = StgAP_STACK_payload(ap_stack,offset); 
2229    } else {
2230         ok = 0;
2231         val = R1;
2232    }
2233    RET_NP(ok,val);
2234 }