put the @N suffix on stdcall foreign calls in .cmm code
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 import __gmpz_init;
32 import __gmpz_add;
33 import __gmpz_sub;
34 import __gmpz_mul;
35 import __gmpz_gcd;
36 import __gmpn_gcd_1;
37 import __gmpn_cmp;
38 import __gmpz_tdiv_q;
39 import __gmpz_tdiv_r;
40 import __gmpz_tdiv_qr;
41 import __gmpz_fdiv_qr;
42 import __gmpz_divexact;
43 import __gmpz_and;
44 import __gmpz_xor;
45 import __gmpz_ior;
46 import __gmpz_com;
47 import base_GHCziIOBase_NestedAtomically_closure;
48 import pthread_mutex_lock;
49 import pthread_mutex_unlock;
50 import EnterCriticalSection
51 import LeaveCriticalSection
52 #endif
53
54 /*-----------------------------------------------------------------------------
55   Array Primitives
56
57   Basically just new*Array - the others are all inline macros.
58
59   The size arg is always passed in R1, and the result returned in R1.
60
61   The slow entry point is for returning from a heap check, the saved
62   size argument must be re-loaded from the stack.
63   -------------------------------------------------------------------------- */
64
65 /* for objects that are *less* than the size of a word, make sure we
66  * round up to the nearest word for the size of the array.
67  */
68
69 newByteArrayzh_fast
70 {
71     W_ words, payload_words, n, p;
72     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
73     n = R1;
74     payload_words = ROUNDUP_BYTES_TO_WDS(n);
75     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
76     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
79     StgArrWords_words(p) = payload_words;
80     RET_P(p);
81 }
82
83 newPinnedByteArrayzh_fast
84 {
85     W_ words, payload_words, n, p;
86
87     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
88     n = R1;
89     payload_words = ROUNDUP_BYTES_TO_WDS(n);
90
91     // We want an 8-byte aligned array.  allocatePinned() gives us
92     // 8-byte aligned memory by default, but we want to align the
93     // *goods* inside the ArrWords object, so we have to check the
94     // size of the ArrWords header and adjust our size accordingly.
95     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
96     if ((SIZEOF_StgArrWords & 7) != 0) {
97         words = words + 1;
98     }
99
100     ("ptr" p) = foreign "C" allocatePinned(words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
102
103     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
104     // have to push the object forward one word so that the goods
105     // fall on an 8-byte boundary.
106     if ((SIZEOF_StgArrWords & 7) != 0) {
107         p = p + WDS(1);
108     }
109
110     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
111     StgArrWords_words(p) = payload_words;
112     RET_P(p);
113 }
114
115 newArrayzh_fast
116 {
117     W_ words, n, init, arr, p;
118     /* Args: R1 = words, R2 = initialisation value */
119
120     n = R1;
121     MAYBE_GC(R2_PTR,newArrayzh_fast);
122
123     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
124     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
125     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
126
127     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
128     StgMutArrPtrs_ptrs(arr) = n;
129
130     // Initialise all elements of the the array with the value in R2
131     init = R2;
132     p = arr + SIZEOF_StgMutArrPtrs;
133   for:
134     if (p < arr + WDS(words)) {
135         W_[p] = init;
136         p = p + WDS(1);
137         goto for;
138     }
139
140     RET_P(arr);
141 }
142
143 unsafeThawArrayzh_fast
144 {
145   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
146   //
147   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
148   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
149   // it on the mutable list for the GC to remove (removing something from
150   // the mutable list is not easy, because the mut_list is only singly-linked).
151   // 
152   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
153   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
154   // to indicate that it is still on the mutable list.
155   //
156   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
157   // either it is on a mut_list, or it isn't.  We adopt the convention that
158   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
159   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
160   // we put it on the mutable list more than once, but it would get scavenged
161   // multiple times during GC, which would be unnecessarily slow.
162   //
163   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
164         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
165         recordMutable(R1, R1);
166         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
167         RET_P(R1);
168   } else {
169         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
170         RET_P(R1);
171   }
172 }
173
174 /* -----------------------------------------------------------------------------
175    MutVar primitives
176    -------------------------------------------------------------------------- */
177
178 newMutVarzh_fast
179 {
180     W_ mv;
181     /* Args: R1 = initialisation value */
182
183     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
184
185     mv = Hp - SIZEOF_StgMutVar + WDS(1);
186     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
187     StgMutVar_var(mv) = R1;
188     
189     RET_P(mv);
190 }
191
192 atomicModifyMutVarzh_fast
193 {
194     W_ mv, z, x, y, r;
195     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
196
197     /* If x is the current contents of the MutVar#, then 
198        We want to make the new contents point to
199
200          (sel_0 (f x))
201  
202        and the return value is
203          
204          (sel_1 (f x))
205
206         obviously we can share (f x).
207
208          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
209          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
210          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
211     */
212
213 #if MIN_UPD_SIZE > 1
214 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
215 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
216 #else
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
219 #endif
220
221 #if MIN_UPD_SIZE > 2
222 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
223 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
224 #else
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
227 #endif
228
229 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
230
231    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
232
233 #if defined(THREADED_RTS)
234     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
235 #endif
236
237    x = StgMutVar_var(R1);
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = R2;
245    StgThunk_payload(z,1) = x;
246
247    TICK_ALLOC_THUNK_1();
248    CCCS_ALLOC(THUNK_1_SIZE);
249    y = z - THUNK_1_SIZE;
250    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
251    LDV_RECORD_CREATE(y);
252    StgThunk_payload(y,0) = z;
253
254    StgMutVar_var(R1) = y;
255    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
256
257    TICK_ALLOC_THUNK_1();
258    CCCS_ALLOC(THUNK_1_SIZE);
259    r = y - THUNK_1_SIZE;
260    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
261    LDV_RECORD_CREATE(r);
262    StgThunk_payload(r,0) = z;
263
264 #if defined(THREADED_RTS)
265     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
266 #endif
267
268    RET_P(r);
269 }
270
271 /* -----------------------------------------------------------------------------
272    Weak Pointer Primitives
273    -------------------------------------------------------------------------- */
274
275 STRING(stg_weak_msg,"New weak pointer at %p\n")
276
277 mkWeakzh_fast
278 {
279   /* R1 = key
280      R2 = value
281      R3 = finalizer (or NULL)
282   */
283   W_ w;
284
285   if (R3 == NULL) {
286     R3 = stg_NO_FINALIZER_closure;
287   }
288
289   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
290
291   w = Hp - SIZEOF_StgWeak + WDS(1);
292   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
293
294   StgWeak_key(w)       = R1;
295   StgWeak_value(w)     = R2;
296   StgWeak_finalizer(w) = R3;
297
298   StgWeak_link(w)       = W_[weak_ptr_list];
299   W_[weak_ptr_list]     = w;
300
301   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
302
303   RET_P(w);
304 }
305
306
307 finalizzeWeakzh_fast
308 {
309   /* R1 = weak ptr
310    */
311   W_ w, f;
312
313   w = R1;
314
315   // already dead?
316   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
317       RET_NP(0,stg_NO_FINALIZER_closure);
318   }
319
320   // kill it
321 #ifdef PROFILING
322   // @LDV profiling
323   // A weak pointer is inherently used, so we do not need to call
324   // LDV_recordDead_FILL_SLOP_DYNAMIC():
325   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
326   // or, LDV_recordDead():
327   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
328   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
329   // large as weak pointers, so there is no need to fill the slop, either.
330   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
331 #endif
332
333   //
334   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
335   //
336   SET_INFO(w,stg_DEAD_WEAK_info);
337   LDV_RECORD_CREATE(w);
338
339   f = StgWeak_finalizer(w);
340   StgDeadWeak_link(w) = StgWeak_link(w);
341
342   /* return the finalizer */
343   if (f == stg_NO_FINALIZER_closure) {
344       RET_NP(0,stg_NO_FINALIZER_closure);
345   } else {
346       RET_NP(1,f);
347   }
348 }
349
350 deRefWeakzh_fast
351 {
352   /* R1 = weak ptr */
353   W_ w, code, val;
354
355   w = R1;
356   if (GET_INFO(w) == stg_WEAK_info) {
357     code = 1;
358     val = StgWeak_value(w);
359   } else {
360     code = 0;
361     val = w;
362   }
363   RET_NP(code,val);
364 }
365
366 /* -----------------------------------------------------------------------------
367    Arbitrary-precision Integer operations.
368
369    There are some assumptions in this code that mp_limb_t == W_.  This is
370    the case for all the platforms that GHC supports, currently.
371    -------------------------------------------------------------------------- */
372
373 int2Integerzh_fast
374 {
375    /* arguments: R1 = Int# */
376
377    W_ val, s, p;        /* to avoid aliasing */
378
379    val = R1;
380    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
381
382    p = Hp - SIZEOF_StgArrWords;
383    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
384    StgArrWords_words(p) = 1;
385
386    /* mpz_set_si is inlined here, makes things simpler */
387    if (%lt(val,0)) { 
388         s  = -1;
389         Hp(0) = -val;
390    } else { 
391      if (%gt(val,0)) {
392         s = 1;
393         Hp(0) = val;
394      } else {
395         s = 0;
396      }
397   }
398
399    /* returns (# size  :: Int#, 
400                  data  :: ByteArray# 
401                #)
402    */
403    RET_NP(s,p);
404 }
405
406 word2Integerzh_fast
407 {
408    /* arguments: R1 = Word# */
409
410    W_ val, s, p;        /* to avoid aliasing */
411
412    val = R1;
413
414    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
415
416    p = Hp - SIZEOF_StgArrWords;
417    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
418    StgArrWords_words(p) = 1;
419
420    if (val != 0) {
421         s = 1;
422         W_[Hp] = val;
423    } else {
424         s = 0;
425    }
426
427    /* returns (# size  :: Int#, 
428                  data  :: ByteArray# #)
429    */
430    RET_NP(s,p);
431 }
432
433
434 /*
435  * 'long long' primops for converting to/from Integers.
436  */
437
438 #ifdef SUPPORT_LONG_LONGS
439
440 int64ToIntegerzh_fast
441 {
442    /* arguments: L1 = Int64# */
443
444    L_ val;
445    W_ hi, lo, s, neg, words_needed, p;
446
447    val = L1;
448    neg = 0;
449
450    hi = TO_W_(val >> 32);
451    lo = TO_W_(val);
452
453    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
454        words_needed = 2;
455    } else { 
456        // minimum is one word
457        words_needed = 1;
458    }
459
460    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
461                NO_PTRS, int64ToIntegerzh_fast );
462
463    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
464    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
465    StgArrWords_words(p) = words_needed;
466
467    if ( %lt(hi,0) ) {
468      neg = 1;
469      lo = -lo;
470      if(lo == 0) {
471        hi = -hi;
472      } else {
473        hi = -hi - 1;
474      }
475    }
476
477    if ( words_needed == 2 )  { 
478       s = 2;
479       Hp(-1) = lo;
480       Hp(0) = hi;
481    } else { 
482        if ( lo != 0 ) {
483            s = 1;
484            Hp(0) = lo;
485        } else /* val==0 */  {
486            s = 0;
487        }
488    }
489    if ( neg != 0 ) {
490         s = -s;
491    }
492
493    /* returns (# size  :: Int#, 
494                  data  :: ByteArray# #)
495    */
496    RET_NP(s,p);
497 }
498 word64ToIntegerzh_fast
499 {
500    /* arguments: L1 = Word64# */
501
502    L_ val;
503    W_ hi, lo, s, words_needed, p;
504
505    val = L1;
506    hi = TO_W_(val >> 32);
507    lo = TO_W_(val);
508
509    if ( hi != 0 ) {
510       words_needed = 2;
511    } else {
512       words_needed = 1;
513    }
514
515    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
516                NO_PTRS, word64ToIntegerzh_fast );
517
518    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
519    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
520    StgArrWords_words(p) = words_needed;
521
522    if ( hi != 0 ) { 
523      s = 2;
524      Hp(-1) = lo;
525      Hp(0)  = hi;
526    } else {
527       if ( lo != 0 ) {
528         s = 1;
529         Hp(0) = lo;
530      } else /* val==0 */  {
531       s = 0;
532      }
533   }
534
535    /* returns (# size  :: Int#, 
536                  data  :: ByteArray# #)
537    */
538    RET_NP(s,p);
539 }
540
541
542
543 #endif /* SUPPORT_LONG_LONGS */
544
545 /* ToDo: this is shockingly inefficient */
546
547 #ifndef THREADED_RTS
548 section "bss" {
549   mp_tmp1:
550     bits8 [SIZEOF_MP_INT];
551 }
552
553 section "bss" {
554   mp_tmp2:
555     bits8 [SIZEOF_MP_INT];
556 }
557
558 section "bss" {
559   mp_result1:
560     bits8 [SIZEOF_MP_INT];
561 }
562
563 section "bss" {
564   mp_result2:
565     bits8 [SIZEOF_MP_INT];
566 }
567 #endif
568
569 #ifdef THREADED_RTS
570 #define FETCH_MP_TEMP(X) \
571 W_ X; \
572 X = BaseReg + (OFFSET_StgRegTable_r ## X);
573 #else
574 #define FETCH_MP_TEMP(X) /* Nothing */
575 #endif
576
577 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
578 name                                                                    \
579 {                                                                       \
580   CInt s1, s2;                                                          \
581   W_ d1, d2;                                                            \
582   FETCH_MP_TEMP(mp_tmp1);                                               \
583   FETCH_MP_TEMP(mp_tmp2);                                               \
584   FETCH_MP_TEMP(mp_result1)                                             \
585   FETCH_MP_TEMP(mp_result2);                                            \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
589                                                                         \
590   s1 = W_TO_INT(R1);                                                    \
591   d1 = R2;                                                              \
592   s2 = W_TO_INT(R3);                                                    \
593   d2 = R4;                                                              \
594                                                                         \
595   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
596   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
597   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
598   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
599   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
600   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
601                                                                         \
602   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
603                                                                         \
604   /* Perform the operation */                                           \
605   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
606                                                                         \
607   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
608          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
609 }
610
611 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
612 name                                                                    \
613 {                                                                       \
614   CInt s1;                                                              \
615   W_ d1;                                                                \
616   FETCH_MP_TEMP(mp_tmp1);                                               \
617   FETCH_MP_TEMP(mp_result1)                                             \
618                                                                         \
619   /* call doYouWantToGC() */                                            \
620   MAYBE_GC(R2_PTR, name);                                               \
621                                                                         \
622   d1 = R2;                                                              \
623   s1 = W_TO_INT(R1);                                                    \
624                                                                         \
625   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
626   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
627   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
628                                                                         \
629   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
630                                                                         \
631   /* Perform the operation */                                           \
632   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
633                                                                         \
634   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
635          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
636 }
637
638 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
639 name                                                                                    \
640 {                                                                                       \
641   CInt s1, s2;                                                                          \
642   W_ d1, d2;                                                                            \
643   FETCH_MP_TEMP(mp_tmp1);                                                               \
644   FETCH_MP_TEMP(mp_tmp2);                                                               \
645   FETCH_MP_TEMP(mp_result1)                                                             \
646   FETCH_MP_TEMP(mp_result2)                                                             \
647                                                                                         \
648   /* call doYouWantToGC() */                                                            \
649   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
650                                                                                         \
651   s1 = W_TO_INT(R1);                                                                    \
652   d1 = R2;                                                                              \
653   s2 = W_TO_INT(R3);                                                                    \
654   d2 = R4;                                                                              \
655                                                                                         \
656   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
657   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
658   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
659   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
660   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
661   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
662                                                                                         \
663   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
664   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
665                                                                                         \
666   /* Perform the operation */                                                           \
667   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
668                                                                                         \
669   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
670            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
671            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
672            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
673 }
674
675 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
676 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
677 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
678 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
679 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
680 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
681 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
682 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
683 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
684 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
685 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
686
687 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
688 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
689
690 #ifndef THREADED_RTS
691 section "bss" {
692   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
693 }
694 #endif
695
696 gcdIntzh_fast
697 {
698     /* R1 = the first Int#; R2 = the second Int# */
699     W_ r; 
700     FETCH_MP_TEMP(mp_tmp_w);
701
702     W_[mp_tmp_w] = R1;
703     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
704
705     R1 = r;
706     /* Result parked in R1, return via info-pointer at TOS */
707     jump %ENTRY_CODE(Sp(0));
708 }
709
710
711 gcdIntegerIntzh_fast
712 {
713     /* R1 = s1; R2 = d1; R3 = the int */
714     W_ s1;
715     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
716     R1 = s1;
717     
718     /* Result parked in R1, return via info-pointer at TOS */
719     jump %ENTRY_CODE(Sp(0));
720 }
721
722
723 cmpIntegerIntzh_fast
724 {
725     /* R1 = s1; R2 = d1; R3 = the int */
726     W_ usize, vsize, v_digit, u_digit;
727
728     usize = R1;
729     vsize = 0;
730     v_digit = R3;
731
732     // paraphrased from __gmpz_cmp_si() in the GMP sources
733     if (%gt(v_digit,0)) {
734         vsize = 1;
735     } else { 
736         if (%lt(v_digit,0)) {
737             vsize = -1;
738             v_digit = -v_digit;
739         }
740     }
741
742     if (usize != vsize) {
743         R1 = usize - vsize; 
744         jump %ENTRY_CODE(Sp(0));
745     }
746
747     if (usize == 0) {
748         R1 = 0; 
749         jump %ENTRY_CODE(Sp(0));
750     }
751
752     u_digit = W_[BYTE_ARR_CTS(R2)];
753
754     if (u_digit == v_digit) {
755         R1 = 0; 
756         jump %ENTRY_CODE(Sp(0));
757     }
758
759     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
760         R1 = usize; 
761     } else {
762         R1 = -usize; 
763     }
764
765     jump %ENTRY_CODE(Sp(0));
766 }
767
768 cmpIntegerzh_fast
769 {
770     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
771     W_ usize, vsize, size, up, vp;
772     CInt cmp;
773
774     // paraphrased from __gmpz_cmp() in the GMP sources
775     usize = R1;
776     vsize = R3;
777
778     if (usize != vsize) {
779         R1 = usize - vsize; 
780         jump %ENTRY_CODE(Sp(0));
781     }
782
783     if (usize == 0) {
784         R1 = 0; 
785         jump %ENTRY_CODE(Sp(0));
786     }
787
788     if (%lt(usize,0)) { // NB. not <, which is unsigned
789         size = -usize;
790     } else {
791         size = usize;
792     }
793
794     up = BYTE_ARR_CTS(R2);
795     vp = BYTE_ARR_CTS(R4);
796
797     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
798
799     if (cmp == 0 :: CInt) {
800         R1 = 0; 
801         jump %ENTRY_CODE(Sp(0));
802     }
803
804     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
805         R1 = 1;
806     } else {
807         R1 = (-1); 
808     }
809     /* Result parked in R1, return via info-pointer at TOS */
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Intzh_fast
814 {
815     /* R1 = s; R2 = d */
816     W_ r, s;
817
818     s = R1;
819     if (s == 0) {
820         r = 0;
821     } else {
822         r = W_[R2 + SIZEOF_StgArrWords];
823         if (%lt(s,0)) {
824             r = -r;
825         }
826     }
827     /* Result parked in R1, return via info-pointer at TOS */
828     R1 = r;
829     jump %ENTRY_CODE(Sp(0));
830 }
831
832 integer2Wordzh_fast
833 {
834   /* R1 = s; R2 = d */
835   W_ r, s;
836
837   s = R1;
838   if (s == 0) {
839     r = 0;
840   } else {
841     r = W_[R2 + SIZEOF_StgArrWords];
842     if (%lt(s,0)) {
843         r = -r;
844     }
845   }
846   /* Result parked in R1, return via info-pointer at TOS */
847   R1 = r;
848   jump %ENTRY_CODE(Sp(0));
849 }
850
851 decodeFloatzh_fast
852
853     W_ p;
854     F_ arg;
855     FETCH_MP_TEMP(mp_tmp1);
856     FETCH_MP_TEMP(mp_tmp_w);
857     
858     /* arguments: F1 = Float# */
859     arg = F1;
860     
861     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
862     
863     /* Be prepared to tell Lennart-coded __decodeFloat
864        where mantissa._mp_d can be put (it does not care about the rest) */
865     p = Hp - SIZEOF_StgArrWords;
866     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
867     StgArrWords_words(p) = 1;
868     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
869     
870     /* Perform the operation */
871     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
872     
873     /* returns: (Int# (expn), Int#, ByteArray#) */
874     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
875 }
876
877 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
878 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
879
880 decodeDoublezh_fast
881
882     D_ arg;
883     W_ p;
884     FETCH_MP_TEMP(mp_tmp1);
885     FETCH_MP_TEMP(mp_tmp_w);
886
887     /* arguments: D1 = Double# */
888     arg = D1;
889
890     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
891     
892     /* Be prepared to tell Lennart-coded __decodeDouble
893        where mantissa.d can be put (it does not care about the rest) */
894     p = Hp - ARR_SIZE + WDS(1);
895     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
896     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
897     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
898
899     /* Perform the operation */
900     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
901     
902     /* returns: (Int# (expn), Int#, ByteArray#) */
903     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
904 }
905
906 /* -----------------------------------------------------------------------------
907  * Concurrency primitives
908  * -------------------------------------------------------------------------- */
909
910 forkzh_fast
911 {
912   /* args: R1 = closure to spark */
913
914   MAYBE_GC(R1_PTR, forkzh_fast);
915
916   W_ closure;
917   W_ threadid;
918   closure = R1;
919
920   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
921                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
922                                 closure "ptr") [];
923   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
924
925   // switch at the earliest opportunity
926   CInt[context_switch] = 1 :: CInt;
927   
928   RET_P(threadid);
929 }
930
931 forkOnzh_fast
932 {
933   /* args: R1 = cpu, R2 = closure to spark */
934
935   MAYBE_GC(R2_PTR, forkOnzh_fast);
936
937   W_ cpu;
938   W_ closure;
939   W_ threadid;
940   cpu = R1;
941   closure = R2;
942
943   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
944                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
945                                 closure "ptr") [];
946   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
947
948   // switch at the earliest opportunity
949   CInt[context_switch] = 1 :: CInt;
950   
951   RET_P(threadid);
952 }
953
954 yieldzh_fast
955 {
956   jump stg_yield_noregs;
957 }
958
959 myThreadIdzh_fast
960 {
961   /* no args. */
962   RET_P(CurrentTSO);
963 }
964
965 labelThreadzh_fast
966 {
967   /* args: 
968         R1 = ThreadId#
969         R2 = Addr# */
970 #ifdef DEBUG
971   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
972 #endif
973   jump %ENTRY_CODE(Sp(0));
974 }
975
976 isCurrentThreadBoundzh_fast
977 {
978   /* no args */
979   W_ r;
980   (r) = foreign "C" isThreadBound(CurrentTSO) [];
981   RET_N(r);
982 }
983
984
985 /* -----------------------------------------------------------------------------
986  * TVar primitives
987  * -------------------------------------------------------------------------- */
988
989 #ifdef REG_R1
990 #define SP_OFF 0
991 #define IF_NOT_REG_R1(x) 
992 #else
993 #define SP_OFF 1
994 #define IF_NOT_REG_R1(x) x
995 #endif
996
997 // Catch retry frame ------------------------------------------------------------
998
999 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1000 #if defined(PROFILING)
1001   W_ unused1, W_ unused2,
1002 #endif
1003   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1004 {
1005    W_ r, frame, trec, outer;
1006    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1007
1008    frame = Sp;
1009    trec = StgTSO_trec(CurrentTSO);
1010    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1011    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1012    if (r != 0) {
1013      /* Succeeded (either first branch or second branch) */
1014      StgTSO_trec(CurrentTSO) = outer;
1015      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1016      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1017      jump %ENTRY_CODE(Sp(SP_OFF));
1018    } else {
1019      /* Did not commit: re-execute */
1020      W_ new_trec;
1021      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1022      StgTSO_trec(CurrentTSO) = new_trec;
1023      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1024        R1 = StgCatchRetryFrame_alt_code(frame);
1025      } else {
1026        R1 = StgCatchRetryFrame_first_code(frame);
1027      }
1028      jump stg_ap_v_fast;
1029    }
1030 }
1031
1032
1033 // Atomically frame ------------------------------------------------------------
1034
1035 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1036 #if defined(PROFILING)
1037   W_ unused1, W_ unused2,
1038 #endif
1039   "ptr" W_ unused3, "ptr" W_ unused4)
1040 {
1041   W_ frame, trec, valid, next_invariant, q, outer;
1042   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1043
1044   frame = Sp;
1045   trec = StgTSO_trec(CurrentTSO);
1046   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1047
1048   if (outer == NO_TREC) {
1049     /* First time back at the atomically frame -- pick up invariants */
1050     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1051     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1052
1053   } else {
1054     /* Second/subsequent time back at the atomically frame -- abort the
1055      * tx that's checking the invariant and move on to the next one */
1056     StgTSO_trec(CurrentTSO) = outer;
1057     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1058     StgInvariantCheckQueue_my_execution(q) = trec;
1059     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1060     /* Don't free trec -- it's linked from q and will be stashed in the
1061      * invariant if we eventually commit. */
1062     q = StgInvariantCheckQueue_next_queue_entry(q);
1063     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1064     trec = outer;
1065   }
1066
1067   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1068
1069   if (q != END_INVARIANT_CHECK_QUEUE) {
1070     /* We can't commit yet: another invariant to check */
1071     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1072     StgTSO_trec(CurrentTSO) = trec;
1073
1074     next_invariant = StgInvariantCheckQueue_invariant(q);
1075     R1 = StgAtomicInvariant_code(next_invariant);
1076     jump stg_ap_v_fast;
1077
1078   } else {
1079
1080     /* We've got no more invariants to check, try to commit */
1081     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1082     if (valid != 0) {
1083       /* Transaction was valid: commit succeeded */
1084       StgTSO_trec(CurrentTSO) = NO_TREC;
1085       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1086       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1087       jump %ENTRY_CODE(Sp(SP_OFF));
1088     } else {
1089       /* Transaction was not valid: try again */
1090       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1091       StgTSO_trec(CurrentTSO) = trec;
1092       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1093       R1 = StgAtomicallyFrame_code(frame);
1094       jump stg_ap_v_fast;
1095     }
1096   }
1097 }
1098
1099 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1100 #if defined(PROFILING)
1101   W_ unused1, W_ unused2,
1102 #endif
1103   "ptr" W_ unused3, "ptr" W_ unused4)
1104 {
1105   W_ frame, trec, valid;
1106   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1107
1108   frame = Sp;
1109
1110   /* The TSO is currently waiting: should we stop waiting? */
1111   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1112   if (valid != 0) {
1113     /* Previous attempt is still valid: no point trying again yet */
1114           IF_NOT_REG_R1(Sp_adj(-2);
1115                         Sp(1) = stg_NO_FINALIZER_closure;
1116                         Sp(0) = stg_ut_1_0_unreg_info;)
1117     jump stg_block_noregs;
1118   } else {
1119     /* Previous attempt is no longer valid: try again */
1120     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1121     StgTSO_trec(CurrentTSO) = trec;
1122     StgHeader_info(frame) = stg_atomically_frame_info;
1123     R1 = StgAtomicallyFrame_code(frame);
1124     jump stg_ap_v_fast;
1125   }
1126 }
1127
1128 // STM catch frame --------------------------------------------------------------
1129
1130 #ifdef REG_R1
1131 #define SP_OFF 0
1132 #else
1133 #define SP_OFF 1
1134 #endif
1135
1136 /* Catch frames are very similar to update frames, but when entering
1137  * one we just pop the frame off the stack and perform the correct
1138  * kind of return to the activation record underneath us on the stack.
1139  */
1140
1141 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1142 #if defined(PROFILING)
1143   W_ unused1, W_ unused2,
1144 #endif
1145   "ptr" W_ unused3, "ptr" W_ unused4)
1146    {
1147       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1148       W_ r, frame, trec, outer;
1149       frame = Sp;
1150       trec = StgTSO_trec(CurrentTSO);
1151       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1152       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1153       if (r != 0) {
1154         /* Commit succeeded */
1155         StgTSO_trec(CurrentTSO) = outer;
1156         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1157         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1158         jump Sp(SP_OFF);
1159       } else {
1160         /* Commit failed */
1161         W_ new_trec;
1162         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1163         StgTSO_trec(CurrentTSO) = new_trec;
1164         R1 = StgCatchSTMFrame_code(frame);
1165         jump stg_ap_v_fast;
1166       }
1167    }
1168
1169
1170 // Primop definition ------------------------------------------------------------
1171
1172 atomicallyzh_fast
1173 {
1174   W_ frame;
1175   W_ old_trec;
1176   W_ new_trec;
1177   
1178   // stmStartTransaction may allocate
1179   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1180
1181   /* Args: R1 = m :: STM a */
1182   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1183
1184   old_trec = StgTSO_trec(CurrentTSO);
1185
1186   /* Nested transactions are not allowed; raise an exception */
1187   if (old_trec != NO_TREC) {
1188      R1 = base_GHCziIOBase_NestedAtomically_closure;
1189      jump raisezh_fast;
1190   }
1191
1192   /* Set up the atomically frame */
1193   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1194   frame = Sp;
1195
1196   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1197   StgAtomicallyFrame_code(frame) = R1;
1198   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1199
1200   /* Start the memory transcation */
1201   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1202   StgTSO_trec(CurrentTSO) = new_trec;
1203
1204   /* Apply R1 to the realworld token */
1205   jump stg_ap_v_fast;
1206 }
1207
1208
1209 catchSTMzh_fast
1210 {
1211   W_ frame;
1212   
1213   /* Args: R1 :: STM a */
1214   /* Args: R2 :: Exception -> STM a */
1215   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1216
1217   /* Set up the catch frame */
1218   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1219   frame = Sp;
1220
1221   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1222   StgCatchSTMFrame_handler(frame) = R2;
1223   StgCatchSTMFrame_code(frame) = R1;
1224
1225   /* Start a nested transaction to run the body of the try block in */
1226   W_ cur_trec;  
1227   W_ new_trec;
1228   cur_trec = StgTSO_trec(CurrentTSO);
1229   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1230   StgTSO_trec(CurrentTSO) = new_trec;
1231
1232   /* Apply R1 to the realworld token */
1233   jump stg_ap_v_fast;
1234 }
1235
1236
1237 catchRetryzh_fast
1238 {
1239   W_ frame;
1240   W_ new_trec;
1241   W_ trec;
1242
1243   // stmStartTransaction may allocate
1244   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1245
1246   /* Args: R1 :: STM a */
1247   /* Args: R2 :: STM a */
1248   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1249
1250   /* Start a nested transaction within which to run the first code */
1251   trec = StgTSO_trec(CurrentTSO);
1252   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1253   StgTSO_trec(CurrentTSO) = new_trec;
1254
1255   /* Set up the catch-retry frame */
1256   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1257   frame = Sp;
1258   
1259   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1260   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1261   StgCatchRetryFrame_first_code(frame) = R1;
1262   StgCatchRetryFrame_alt_code(frame) = R2;
1263
1264   /* Apply R1 to the realworld token */
1265   jump stg_ap_v_fast;
1266 }
1267
1268
1269 retryzh_fast
1270 {
1271   W_ frame_type;
1272   W_ frame;
1273   W_ trec;
1274   W_ outer;
1275   W_ r;
1276
1277   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1278
1279   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1280 retry_pop_stack:
1281   StgTSO_sp(CurrentTSO) = Sp;
1282   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1283   Sp = StgTSO_sp(CurrentTSO);
1284   frame = Sp;
1285   trec = StgTSO_trec(CurrentTSO);
1286   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1287
1288   if (frame_type == CATCH_RETRY_FRAME) {
1289     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1290     ASSERT(outer != NO_TREC);
1291     // Abort the transaction attempting the current branch
1292     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1293     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1294     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1295       // Retry in the first branch: try the alternative
1296       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1297       StgTSO_trec(CurrentTSO) = trec;
1298       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1299       R1 = StgCatchRetryFrame_alt_code(frame);
1300       jump stg_ap_v_fast;
1301     } else {
1302       // Retry in the alternative code: propagate the retry
1303       StgTSO_trec(CurrentTSO) = outer;
1304       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1305       goto retry_pop_stack;
1306     }
1307   }
1308
1309   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1310   ASSERT(frame_type == ATOMICALLY_FRAME);
1311   if (outer != NO_TREC) {
1312     // We called retry while checking invariants, so abort the current
1313     // invariant check (merging its TVar accesses into the parents read
1314     // set so we'll wait on them)
1315     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1316     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1317     trec = outer;
1318     StgTSO_trec(CurrentTSO) = trec;
1319     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1320   }
1321   ASSERT(outer == NO_TREC);
1322
1323   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1324   if (r != 0) {
1325     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1326     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1327     Sp = frame;
1328     // Fix up the stack in the unregisterised case: the return convention is different.
1329     IF_NOT_REG_R1(Sp_adj(-2); 
1330                   Sp(1) = stg_NO_FINALIZER_closure;
1331                   Sp(0) = stg_ut_1_0_unreg_info;)
1332     R3 = trec; // passing to stmWaitUnblock()
1333     jump stg_block_stmwait;
1334   } else {
1335     // Transaction was not valid: retry immediately
1336     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1337     StgTSO_trec(CurrentTSO) = trec;
1338     R1 = StgAtomicallyFrame_code(frame);
1339     Sp = frame;
1340     jump stg_ap_v_fast;
1341   }
1342 }
1343
1344
1345 checkzh_fast
1346 {
1347   W_ trec, closure;
1348
1349   /* Args: R1 = invariant closure */
1350   MAYBE_GC (R1_PTR, checkzh_fast); 
1351
1352   trec = StgTSO_trec(CurrentTSO);
1353   closure = R1;
1354   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1355                                      trec "ptr",
1356                                      closure "ptr") [];
1357
1358   jump %ENTRY_CODE(Sp(0));
1359 }
1360
1361
1362 newTVarzh_fast
1363 {
1364   W_ tv;
1365   W_ new_value;
1366
1367   /* Args: R1 = initialisation value */
1368
1369   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1370   new_value = R1;
1371   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1372   RET_P(tv);
1373 }
1374
1375
1376 readTVarzh_fast
1377 {
1378   W_ trec;
1379   W_ tvar;
1380   W_ result;
1381
1382   /* Args: R1 = TVar closure */
1383
1384   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1385   trec = StgTSO_trec(CurrentTSO);
1386   tvar = R1;
1387   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1388
1389   RET_P(result);
1390 }
1391
1392
1393 writeTVarzh_fast
1394 {
1395   W_ trec;
1396   W_ tvar;
1397   W_ new_value;
1398   
1399   /* Args: R1 = TVar closure */
1400   /*       R2 = New value    */
1401
1402   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1403   trec = StgTSO_trec(CurrentTSO);
1404   tvar = R1;
1405   new_value = R2;
1406   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1407
1408   jump %ENTRY_CODE(Sp(0));
1409 }
1410
1411
1412 /* -----------------------------------------------------------------------------
1413  * MVar primitives
1414  *
1415  * take & putMVar work as follows.  Firstly, an important invariant:
1416  *
1417  *    If the MVar is full, then the blocking queue contains only
1418  *    threads blocked on putMVar, and if the MVar is empty then the
1419  *    blocking queue contains only threads blocked on takeMVar.
1420  *
1421  * takeMvar:
1422  *    MVar empty : then add ourselves to the blocking queue
1423  *    MVar full  : remove the value from the MVar, and
1424  *                 blocking queue empty     : return
1425  *                 blocking queue non-empty : perform the first blocked putMVar
1426  *                                            from the queue, and wake up the
1427  *                                            thread (MVar is now full again)
1428  *
1429  * putMVar is just the dual of the above algorithm.
1430  *
1431  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1432  * the stack of the thread waiting to do the putMVar.  See
1433  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1434  * the stack layout, and the PerformPut and PerformTake macros below.
1435  *
1436  * It is important that a blocked take or put is woken up with the
1437  * take/put already performed, because otherwise there would be a
1438  * small window of vulnerability where the thread could receive an
1439  * exception and never perform its take or put, and we'd end up with a
1440  * deadlock.
1441  *
1442  * -------------------------------------------------------------------------- */
1443
1444 isEmptyMVarzh_fast
1445 {
1446     /* args: R1 = MVar closure */
1447
1448     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1449         RET_N(1);
1450     } else {
1451         RET_N(0);
1452     }
1453 }
1454
1455 newMVarzh_fast
1456 {
1457     /* args: none */
1458     W_ mvar;
1459
1460     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1461   
1462     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1463     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1464     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1465     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1466     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1467     RET_P(mvar);
1468 }
1469
1470
1471 /* If R1 isn't available, pass it on the stack */
1472 #ifdef REG_R1
1473 #define PerformTake(tso, value)                         \
1474     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1475     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1476 #else
1477 #define PerformTake(tso, value)                                 \
1478     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1479     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1480 #endif
1481
1482 #define PerformPut(tso,lval)                    \
1483     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1484     lval = W_[StgTSO_sp(tso) - WDS(1)];
1485
1486 takeMVarzh_fast
1487 {
1488     W_ mvar, val, info, tso;
1489
1490     /* args: R1 = MVar closure */
1491     mvar = R1;
1492
1493 #if defined(THREADED_RTS)
1494     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1495 #else
1496     info = GET_INFO(mvar);
1497 #endif
1498
1499     /* If the MVar is empty, put ourselves on its blocking queue,
1500      * and wait until we're woken up.
1501      */
1502     if (info == stg_EMPTY_MVAR_info) {
1503         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1504             StgMVar_head(mvar) = CurrentTSO;
1505         } else {
1506             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1507         }
1508         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1509         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1510         StgTSO_block_info(CurrentTSO)  = mvar;
1511         StgMVar_tail(mvar) = CurrentTSO;
1512         
1513         jump stg_block_takemvar;
1514   }
1515
1516   /* we got the value... */
1517   val = StgMVar_value(mvar);
1518
1519   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1520   {
1521       /* There are putMVar(s) waiting... 
1522        * wake up the first thread on the queue
1523        */
1524       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1525
1526       /* actually perform the putMVar for the thread that we just woke up */
1527       tso = StgMVar_head(mvar);
1528       PerformPut(tso,StgMVar_value(mvar));
1529       dirtyTSO(tso);
1530
1531 #if defined(GRAN) || defined(PAR)
1532       /* ToDo: check 2nd arg (mvar) is right */
1533       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1534       StgMVar_head(mvar) = tso;
1535 #else
1536       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1537                                          StgMVar_head(mvar) "ptr") [];
1538       StgMVar_head(mvar) = tso;
1539 #endif
1540
1541       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1542           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1543       }
1544
1545 #if defined(THREADED_RTS)
1546       unlockClosure(mvar, stg_FULL_MVAR_info);
1547 #endif
1548       RET_P(val);
1549   } 
1550   else
1551   {
1552       /* No further putMVars, MVar is now empty */
1553       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1554  
1555 #if defined(THREADED_RTS)
1556       unlockClosure(mvar, stg_EMPTY_MVAR_info);
1557 #else
1558       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1559 #endif
1560
1561       RET_P(val);
1562   }
1563 }
1564
1565
1566 tryTakeMVarzh_fast
1567 {
1568     W_ mvar, val, info, tso;
1569
1570     /* args: R1 = MVar closure */
1571
1572     mvar = R1;
1573
1574 #if defined(THREADED_RTS)
1575     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1576 #else
1577     info = GET_INFO(mvar);
1578 #endif
1579
1580     if (info == stg_EMPTY_MVAR_info) {
1581 #if defined(THREADED_RTS)
1582         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1583 #endif
1584         /* HACK: we need a pointer to pass back, 
1585          * so we abuse NO_FINALIZER_closure
1586          */
1587         RET_NP(0, stg_NO_FINALIZER_closure);
1588     }
1589
1590     /* we got the value... */
1591     val = StgMVar_value(mvar);
1592
1593     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1594
1595         /* There are putMVar(s) waiting... 
1596          * wake up the first thread on the queue
1597          */
1598         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1599
1600         /* actually perform the putMVar for the thread that we just woke up */
1601         tso = StgMVar_head(mvar);
1602         PerformPut(tso,StgMVar_value(mvar));
1603         dirtyTSO(tso);
1604
1605 #if defined(GRAN) || defined(PAR)
1606         /* ToDo: check 2nd arg (mvar) is right */
1607         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1608         StgMVar_head(mvar) = tso;
1609 #else
1610         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1611                                            StgMVar_head(mvar) "ptr") [];
1612         StgMVar_head(mvar) = tso;
1613 #endif
1614
1615         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1616             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1617         }
1618 #if defined(THREADED_RTS)
1619         unlockClosure(mvar, stg_FULL_MVAR_info);
1620 #endif
1621     }
1622     else 
1623     {
1624         /* No further putMVars, MVar is now empty */
1625         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1626 #if defined(THREADED_RTS)
1627         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1628 #else
1629         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1630 #endif
1631     }
1632     
1633     RET_NP(1, val);
1634 }
1635
1636
1637 putMVarzh_fast
1638 {
1639     W_ mvar, info, tso;
1640
1641     /* args: R1 = MVar, R2 = value */
1642     mvar = R1;
1643
1644 #if defined(THREADED_RTS)
1645     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1646 #else
1647     info = GET_INFO(mvar);
1648 #endif
1649
1650     if (info == stg_FULL_MVAR_info) {
1651         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1652             StgMVar_head(mvar) = CurrentTSO;
1653         } else {
1654             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1655         }
1656         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1657         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1658         StgTSO_block_info(CurrentTSO)  = mvar;
1659         StgMVar_tail(mvar) = CurrentTSO;
1660         
1661         jump stg_block_putmvar;
1662     }
1663   
1664     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1665
1666         /* There are takeMVar(s) waiting: wake up the first one
1667          */
1668         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1669
1670         /* actually perform the takeMVar */
1671         tso = StgMVar_head(mvar);
1672         PerformTake(tso, R2);
1673         dirtyTSO(tso);
1674       
1675 #if defined(GRAN) || defined(PAR)
1676         /* ToDo: check 2nd arg (mvar) is right */
1677         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1678         StgMVar_head(mvar) = tso;
1679 #else
1680         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1681         StgMVar_head(mvar) = tso;
1682 #endif
1683
1684         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1685             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1686         }
1687
1688 #if defined(THREADED_RTS)
1689         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1690 #endif
1691         jump %ENTRY_CODE(Sp(0));
1692     }
1693     else
1694     {
1695         /* No further takes, the MVar is now full. */
1696         StgMVar_value(mvar) = R2;
1697
1698 #if defined(THREADED_RTS)
1699         unlockClosure(mvar, stg_FULL_MVAR_info);
1700 #else
1701         SET_INFO(mvar,stg_FULL_MVAR_info);
1702 #endif
1703         jump %ENTRY_CODE(Sp(0));
1704     }
1705     
1706     /* ToDo: yield afterward for better communication performance? */
1707 }
1708
1709
1710 tryPutMVarzh_fast
1711 {
1712     W_ mvar, info, tso;
1713
1714     /* args: R1 = MVar, R2 = value */
1715     mvar = R1;
1716
1717 #if defined(THREADED_RTS)
1718     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1719 #else
1720     info = GET_INFO(mvar);
1721 #endif
1722
1723     if (info == stg_FULL_MVAR_info) {
1724 #if defined(THREADED_RTS)
1725         unlockClosure(mvar, stg_FULL_MVAR_info);
1726 #endif
1727         RET_N(0);
1728     }
1729   
1730     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1731
1732         /* There are takeMVar(s) waiting: wake up the first one
1733          */
1734         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1735         
1736         /* actually perform the takeMVar */
1737         tso = StgMVar_head(mvar);
1738         PerformTake(tso, R2);
1739         dirtyTSO(tso);
1740       
1741 #if defined(GRAN) || defined(PAR)
1742         /* ToDo: check 2nd arg (mvar) is right */
1743         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1744         StgMVar_head(mvar) = tso;
1745 #else
1746         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1747         StgMVar_head(mvar) = tso;
1748 #endif
1749
1750         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1751             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1752         }
1753
1754 #if defined(THREADED_RTS)
1755         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1756 #endif
1757     }
1758     else
1759     {
1760         /* No further takes, the MVar is now full. */
1761         StgMVar_value(mvar) = R2;
1762
1763 #if defined(THREADED_RTS)
1764         unlockClosure(mvar, stg_FULL_MVAR_info);
1765 #else
1766         SET_INFO(mvar,stg_FULL_MVAR_info);
1767 #endif
1768     }
1769     
1770     RET_N(1);
1771     /* ToDo: yield afterward for better communication performance? */
1772 }
1773
1774
1775 /* -----------------------------------------------------------------------------
1776    Stable pointer primitives
1777    -------------------------------------------------------------------------  */
1778
1779 makeStableNamezh_fast
1780 {
1781     W_ index, sn_obj;
1782
1783     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1784   
1785     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1786
1787     /* Is there already a StableName for this heap object?
1788      *  stable_ptr_table is a pointer to an array of snEntry structs.
1789      */
1790     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1791         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1792         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1793         StgStableName_sn(sn_obj) = index;
1794         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1795     } else {
1796         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1797     }
1798     
1799     RET_P(sn_obj);
1800 }
1801
1802
1803 makeStablePtrzh_fast
1804 {
1805     /* Args: R1 = a */
1806     W_ sp;
1807     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1808     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1809     RET_N(sp);
1810 }
1811
1812 deRefStablePtrzh_fast
1813 {
1814     /* Args: R1 = the stable ptr */
1815     W_ r, sp;
1816     sp = R1;
1817     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1818     RET_P(r);
1819 }
1820
1821 /* -----------------------------------------------------------------------------
1822    Bytecode object primitives
1823    -------------------------------------------------------------------------  */
1824
1825 newBCOzh_fast
1826 {
1827     /* R1 = instrs
1828        R2 = literals
1829        R3 = ptrs
1830        R4 = arity
1831        R5 = bitmap array
1832     */
1833     W_ bco, bitmap_arr, bytes, words;
1834     
1835     bitmap_arr = R5;
1836
1837     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1838     bytes = WDS(words);
1839
1840     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1841
1842     bco = Hp - bytes + WDS(1);
1843     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1844     
1845     StgBCO_instrs(bco)     = R1;
1846     StgBCO_literals(bco)   = R2;
1847     StgBCO_ptrs(bco)       = R3;
1848     StgBCO_arity(bco)      = HALF_W_(R4);
1849     StgBCO_size(bco)       = HALF_W_(words);
1850     
1851     // Copy the arity/bitmap info into the BCO
1852     W_ i;
1853     i = 0;
1854 for:
1855     if (i < StgArrWords_words(bitmap_arr)) {
1856         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1857         i = i + 1;
1858         goto for;
1859     }
1860     
1861     RET_P(bco);
1862 }
1863
1864
1865 mkApUpd0zh_fast
1866 {
1867     // R1 = the BCO# for the AP
1868     //  
1869     W_ ap;
1870
1871     // This function is *only* used to wrap zero-arity BCOs in an
1872     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1873     // saturated and always points directly to a FUN or BCO.
1874     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1875            StgBCO_arity(R1) == HALF_W_(0));
1876
1877     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1878     TICK_ALLOC_UP_THK(0, 0);
1879     CCCS_ALLOC(SIZEOF_StgAP);
1880
1881     ap = Hp - SIZEOF_StgAP + WDS(1);
1882     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1883     
1884     StgAP_n_args(ap) = HALF_W_(0);
1885     StgAP_fun(ap) = R1;
1886     
1887     RET_P(ap);
1888 }
1889
1890 unpackClosurezh_fast
1891 {
1892 /* args: R1 = closure to analyze */
1893 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1894
1895     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1896     info  = %GET_STD_INFO(UNTAG(R1));
1897
1898     // Some closures have non-standard layout, so we omit those here.
1899     W_ type;
1900     type = TO_W_(%INFO_TYPE(info));
1901     switch [0 .. N_CLOSURE_TYPES] type {
1902     case THUNK_SELECTOR : {
1903         ptrs = 1;
1904         nptrs = 0;
1905         goto out;
1906     }
1907     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1908          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1909         ptrs = 0;
1910         nptrs = 0;
1911         goto out;
1912     }
1913     default: {
1914         ptrs  = TO_W_(%INFO_PTRS(info)); 
1915         nptrs = TO_W_(%INFO_NPTRS(info));
1916         goto out;
1917     }}
1918 out:
1919
1920     W_ ptrs_arr_sz, nptrs_arr_sz;
1921     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1922     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1923
1924     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1925
1926     W_ clos;
1927     clos = UNTAG(R1);
1928
1929     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1930     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1931
1932     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1933     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1934     p = 0;
1935 for:
1936     if(p < ptrs) {
1937          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1938          p = p + 1;
1939          goto for;
1940     }
1941     
1942     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1943     StgArrWords_words(nptrs_arr) = nptrs;
1944     p = 0;
1945 for2:
1946     if(p < nptrs) {
1947          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1948          p = p + 1;
1949          goto for2;
1950     }
1951     RET_NPP(info, ptrs_arr, nptrs_arr);
1952 }
1953
1954 /* -----------------------------------------------------------------------------
1955    Thread I/O blocking primitives
1956    -------------------------------------------------------------------------- */
1957
1958 /* Add a thread to the end of the blocked queue. (C-- version of the C
1959  * macro in Schedule.h).
1960  */
1961 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1962     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1963     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1964       W_[blocked_queue_hd] = tso;                       \
1965     } else {                                            \
1966       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1967     }                                                   \
1968     W_[blocked_queue_tl] = tso;
1969
1970 waitReadzh_fast
1971 {
1972     /* args: R1 */
1973 #ifdef THREADED_RTS
1974     foreign "C" barf("waitRead# on threaded RTS") never returns;
1975 #else
1976
1977     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1978     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1979     StgTSO_block_info(CurrentTSO) = R1;
1980     // No locking - we're not going to use this interface in the
1981     // threaded RTS anyway.
1982     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1983     jump stg_block_noregs;
1984 #endif
1985 }
1986
1987 waitWritezh_fast
1988 {
1989     /* args: R1 */
1990 #ifdef THREADED_RTS
1991     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1992 #else
1993
1994     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1995     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1996     StgTSO_block_info(CurrentTSO) = R1;
1997     // No locking - we're not going to use this interface in the
1998     // threaded RTS anyway.
1999     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2000     jump stg_block_noregs;
2001 #endif
2002 }
2003
2004
2005 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2006 delayzh_fast
2007 {
2008 #ifdef mingw32_HOST_OS
2009     W_ ares;
2010     CInt reqID;
2011 #else
2012     W_ t, prev, target;
2013 #endif
2014
2015 #ifdef THREADED_RTS
2016     foreign "C" barf("delay# on threaded RTS") never returns;
2017 #else
2018
2019     /* args: R1 (microsecond delay amount) */
2020     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2021     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2022
2023 #ifdef mingw32_HOST_OS
2024
2025     /* could probably allocate this on the heap instead */
2026     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2027                                             stg_delayzh_malloc_str);
2028     (reqID) = foreign "C" addDelayRequest(R1);
2029     StgAsyncIOResult_reqID(ares)   = reqID;
2030     StgAsyncIOResult_len(ares)     = 0;
2031     StgAsyncIOResult_errCode(ares) = 0;
2032     StgTSO_block_info(CurrentTSO)  = ares;
2033
2034     /* Having all async-blocked threads reside on the blocked_queue
2035      * simplifies matters, so change the status to OnDoProc put the
2036      * delayed thread on the blocked_queue.
2037      */
2038     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2039     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2040     jump stg_block_async_void;
2041
2042 #else
2043
2044     W_ time;
2045     W_ divisor;
2046     (time) = foreign "C" getourtimeofday() [R1];
2047     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2048     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2049            + time + 1; /* Add 1 as getourtimeofday rounds down */
2050     StgTSO_block_info(CurrentTSO) = target;
2051
2052     /* Insert the new thread in the sleeping queue. */
2053     prev = NULL;
2054     t = W_[sleeping_queue];
2055 while:
2056     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2057         prev = t;
2058         t = StgTSO_link(t);
2059         goto while;
2060     }
2061
2062     StgTSO_link(CurrentTSO) = t;
2063     if (prev == NULL) {
2064         W_[sleeping_queue] = CurrentTSO;
2065     } else {
2066         StgTSO_link(prev) = CurrentTSO;
2067     }
2068     jump stg_block_noregs;
2069 #endif
2070 #endif /* !THREADED_RTS */
2071 }
2072
2073
2074 #ifdef mingw32_HOST_OS
2075 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2076 asyncReadzh_fast
2077 {
2078     W_ ares;
2079     CInt reqID;
2080
2081 #ifdef THREADED_RTS
2082     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2083 #else
2084
2085     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2086     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2087     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2088
2089     /* could probably allocate this on the heap instead */
2090     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2091                                             stg_asyncReadzh_malloc_str)
2092                         [R1,R2,R3,R4];
2093     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2094     StgAsyncIOResult_reqID(ares)   = reqID;
2095     StgAsyncIOResult_len(ares)     = 0;
2096     StgAsyncIOResult_errCode(ares) = 0;
2097     StgTSO_block_info(CurrentTSO)  = ares;
2098     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2099     jump stg_block_async;
2100 #endif
2101 }
2102
2103 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2104 asyncWritezh_fast
2105 {
2106     W_ ares;
2107     CInt reqID;
2108
2109 #ifdef THREADED_RTS
2110     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2111 #else
2112
2113     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2114     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2115     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2116
2117     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2118                                             stg_asyncWritezh_malloc_str)
2119                         [R1,R2,R3,R4];
2120     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2121
2122     StgAsyncIOResult_reqID(ares)   = reqID;
2123     StgAsyncIOResult_len(ares)     = 0;
2124     StgAsyncIOResult_errCode(ares) = 0;
2125     StgTSO_block_info(CurrentTSO)  = ares;
2126     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2127     jump stg_block_async;
2128 #endif
2129 }
2130
2131 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2132 asyncDoProczh_fast
2133 {
2134     W_ ares;
2135     CInt reqID;
2136
2137 #ifdef THREADED_RTS
2138     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2139 #else
2140
2141     /* args: R1 = proc, R2 = param */
2142     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2143     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2144
2145     /* could probably allocate this on the heap instead */
2146     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2147                                             stg_asyncDoProczh_malloc_str) 
2148                                 [R1,R2];
2149     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2150     StgAsyncIOResult_reqID(ares)   = reqID;
2151     StgAsyncIOResult_len(ares)     = 0;
2152     StgAsyncIOResult_errCode(ares) = 0;
2153     StgTSO_block_info(CurrentTSO) = ares;
2154     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2155     jump stg_block_async;
2156 #endif
2157 }
2158 #endif
2159
2160 // noDuplicate# tries to ensure that none of the thunks under
2161 // evaluation by the current thread are also under evaluation by
2162 // another thread.  It relies on *both* threads doing noDuplicate#;
2163 // the second one will get blocked if they are duplicating some work.
2164 noDuplicatezh_fast
2165 {
2166     SAVE_THREAD_STATE();
2167     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2168     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2169     
2170     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2171         jump stg_threadFinished;
2172     } else {
2173         LOAD_THREAD_STATE();
2174         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2175         jump %ENTRY_CODE(Sp(0));
2176     }
2177 }
2178
2179 getApStackValzh_fast
2180 {
2181    W_ ap_stack, offset, val, ok;
2182
2183    /* args: R1 = AP_STACK, R2 = offset */
2184    ap_stack = R1;
2185    offset   = R2;
2186
2187    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2188         ok = 1;
2189         val = StgAP_STACK_payload(ap_stack,offset); 
2190    } else {
2191         ok = 0;
2192         val = R1;
2193    }
2194    RET_NP(ok,val);
2195 }