newPinnedByteArray#: align the result to 16-bytes (part of #2917)
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 newPinnedByteArrayzh_fast
87 {
88     W_ words, payload_words, n, p;
89
90     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
91     n = R1;
92     payload_words = ROUNDUP_BYTES_TO_WDS(n);
93
94     // We want a 16-byte aligned array.  allocatePinned() gives us
95     // 8-byte aligned memory by default, but we want to align the
96     // *goods* inside the ArrWords object, so we have to check the
97     // size of the ArrWords header and adjust our size accordingly.
98     words = payload_words + ((SIZEOF_StgArrWords + 15) & ~15);
99
100     ("ptr" p) = foreign "C" allocatePinned(words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
102
103     // Push the pointer forward so that the goods fall on a 16-byte boundary.
104     p = p + ((p + SIZEOF_StgArrWords) & 15);
105
106     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
107     StgArrWords_words(p) = payload_words;
108     RET_P(p);
109 }
110
111 newArrayzh_fast
112 {
113     W_ words, n, init, arr, p;
114     /* Args: R1 = words, R2 = initialisation value */
115
116     n = R1;
117     MAYBE_GC(R2_PTR,newArrayzh_fast);
118
119     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
120     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
121     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
122
123     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
124     StgMutArrPtrs_ptrs(arr) = n;
125
126     // Initialise all elements of the the array with the value in R2
127     init = R2;
128     p = arr + SIZEOF_StgMutArrPtrs;
129   for:
130     if (p < arr + WDS(words)) {
131         W_[p] = init;
132         p = p + WDS(1);
133         goto for;
134     }
135
136     RET_P(arr);
137 }
138
139 unsafeThawArrayzh_fast
140 {
141   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
142   //
143   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
144   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
145   // it on the mutable list for the GC to remove (removing something from
146   // the mutable list is not easy, because the mut_list is only singly-linked).
147   // 
148   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
149   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
150   // to indicate that it is still on the mutable list.
151   //
152   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
153   // either it is on a mut_list, or it isn't.  We adopt the convention that
154   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
155   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
156   // we put it on the mutable list more than once, but it would get scavenged
157   // multiple times during GC, which would be unnecessarily slow.
158   //
159   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
160         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
161         recordMutable(R1, R1);
162         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
163         RET_P(R1);
164   } else {
165         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
166         RET_P(R1);
167   }
168 }
169
170 /* -----------------------------------------------------------------------------
171    MutVar primitives
172    -------------------------------------------------------------------------- */
173
174 newMutVarzh_fast
175 {
176     W_ mv;
177     /* Args: R1 = initialisation value */
178
179     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
180
181     mv = Hp - SIZEOF_StgMutVar + WDS(1);
182     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
183     StgMutVar_var(mv) = R1;
184     
185     RET_P(mv);
186 }
187
188 atomicModifyMutVarzh_fast
189 {
190     W_ mv, f, z, x, y, r, h;
191     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
192
193     /* If x is the current contents of the MutVar#, then 
194        We want to make the new contents point to
195
196          (sel_0 (f x))
197  
198        and the return value is
199          
200          (sel_1 (f x))
201
202         obviously we can share (f x).
203
204          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
205          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
206          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
207     */
208
209 #if MIN_UPD_SIZE > 1
210 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
211 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
212 #else
213 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
214 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
215 #endif
216
217 #if MIN_UPD_SIZE > 2
218 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
219 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
220 #else
221 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
222 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
223 #endif
224
225 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
226
227    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
228
229    mv = R1;
230    f = R2;
231
232    TICK_ALLOC_THUNK_2();
233    CCCS_ALLOC(THUNK_2_SIZE);
234    z = Hp - THUNK_2_SIZE + WDS(1);
235    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
236    LDV_RECORD_CREATE(z);
237    StgThunk_payload(z,0) = f;
238
239    TICK_ALLOC_THUNK_1();
240    CCCS_ALLOC(THUNK_1_SIZE);
241    y = z - THUNK_1_SIZE;
242    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(y);
244    StgThunk_payload(y,0) = z;
245
246    TICK_ALLOC_THUNK_1();
247    CCCS_ALLOC(THUNK_1_SIZE);
248    r = y - THUNK_1_SIZE;
249    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
250    LDV_RECORD_CREATE(r);
251    StgThunk_payload(r,0) = z;
252
253  retry:
254    x = StgMutVar_var(mv);
255    StgThunk_payload(z,1) = x;
256 #ifdef THREADED_RTS
257    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
258    if (h != x) { goto retry; }
259 #else
260    StgMutVar_var(mv) = y;
261 #endif
262
263    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
264      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
265    }
266
267    RET_P(r);
268 }
269
270 /* -----------------------------------------------------------------------------
271    Weak Pointer Primitives
272    -------------------------------------------------------------------------- */
273
274 STRING(stg_weak_msg,"New weak pointer at %p\n")
275
276 mkWeakzh_fast
277 {
278   /* R1 = key
279      R2 = value
280      R3 = finalizer (or NULL)
281   */
282   W_ w;
283
284   if (R3 == NULL) {
285     R3 = stg_NO_FINALIZER_closure;
286   }
287
288   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
289
290   w = Hp - SIZEOF_StgWeak + WDS(1);
291   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
292
293   // We don't care about cfinalizer here.
294   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
295   // something else?
296
297   StgWeak_key(w)        = R1;
298   StgWeak_value(w)      = R2;
299   StgWeak_finalizer(w)  = R3;
300   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
301
302   StgWeak_link(w)       = W_[weak_ptr_list];
303   W_[weak_ptr_list]     = w;
304
305   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
306
307   RET_P(w);
308 }
309
310 mkWeakForeignEnvzh_fast
311 {
312   /* R1 = key
313      R2 = value
314      R3 = finalizer
315      R4 = pointer
316      R5 = has environment (0 or 1)
317      R6 = environment
318   */
319   W_ w, payload_words, words, p;
320
321   W_ key, val, fptr, ptr, flag, eptr;
322
323   key  = R1;
324   val  = R2;
325   fptr = R3;
326   ptr  = R4;
327   flag = R5;
328   eptr = R6;
329
330   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
331
332   w = Hp - SIZEOF_StgWeak + WDS(1);
333   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
334
335   payload_words = 4;
336   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
337   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
338
339   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
340   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
341
342   StgArrWords_words(p)     = payload_words;
343   StgArrWords_payload(p,0) = fptr;
344   StgArrWords_payload(p,1) = ptr;
345   StgArrWords_payload(p,2) = eptr;
346   StgArrWords_payload(p,3) = flag;
347
348   // We don't care about the value here.
349   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
350
351   StgWeak_key(w)        = key;
352   StgWeak_value(w)      = val;
353   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
354   StgWeak_cfinalizer(w) = p;
355
356   StgWeak_link(w)   = W_[weak_ptr_list];
357   W_[weak_ptr_list] = w;
358
359   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
360
361   RET_P(w);
362 }
363
364 finalizzeWeakzh_fast
365 {
366   /* R1 = weak ptr
367    */
368   W_ w, f, arr;
369
370   w = R1;
371
372   // already dead?
373   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
374       RET_NP(0,stg_NO_FINALIZER_closure);
375   }
376
377   // kill it
378 #ifdef PROFILING
379   // @LDV profiling
380   // A weak pointer is inherently used, so we do not need to call
381   // LDV_recordDead_FILL_SLOP_DYNAMIC():
382   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
383   // or, LDV_recordDead():
384   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
385   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
386   // large as weak pointers, so there is no need to fill the slop, either.
387   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
388 #endif
389
390   //
391   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
392   //
393   SET_INFO(w,stg_DEAD_WEAK_info);
394   LDV_RECORD_CREATE(w);
395
396   f   = StgWeak_finalizer(w);
397   arr = StgWeak_cfinalizer(w);
398
399   StgDeadWeak_link(w) = StgWeak_link(w);
400
401   if (arr != stg_NO_FINALIZER_closure) {
402     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
403                               StgArrWords_payload(arr,1),
404                               StgArrWords_payload(arr,2),
405                               StgArrWords_payload(arr,3)) [];
406   }
407
408   /* return the finalizer */
409   if (f == stg_NO_FINALIZER_closure) {
410       RET_NP(0,stg_NO_FINALIZER_closure);
411   } else {
412       RET_NP(1,f);
413   }
414 }
415
416 deRefWeakzh_fast
417 {
418   /* R1 = weak ptr */
419   W_ w, code, val;
420
421   w = R1;
422   if (GET_INFO(w) == stg_WEAK_info) {
423     code = 1;
424     val = StgWeak_value(w);
425   } else {
426     code = 0;
427     val = w;
428   }
429   RET_NP(code,val);
430 }
431
432 /* -----------------------------------------------------------------------------
433    Arbitrary-precision Integer operations.
434
435    There are some assumptions in this code that mp_limb_t == W_.  This is
436    the case for all the platforms that GHC supports, currently.
437    -------------------------------------------------------------------------- */
438
439 int2Integerzh_fast
440 {
441    /* arguments: R1 = Int# */
442
443    W_ val, s, p;        /* to avoid aliasing */
444
445    val = R1;
446    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
447
448    p = Hp - SIZEOF_StgArrWords;
449    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
450    StgArrWords_words(p) = 1;
451
452    /* mpz_set_si is inlined here, makes things simpler */
453    if (%lt(val,0)) { 
454         s  = -1;
455         Hp(0) = -val;
456    } else { 
457      if (%gt(val,0)) {
458         s = 1;
459         Hp(0) = val;
460      } else {
461         s = 0;
462      }
463   }
464
465    /* returns (# size  :: Int#, 
466                  data  :: ByteArray# 
467                #)
468    */
469    RET_NP(s,p);
470 }
471
472 word2Integerzh_fast
473 {
474    /* arguments: R1 = Word# */
475
476    W_ val, s, p;        /* to avoid aliasing */
477
478    val = R1;
479
480    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
481
482    p = Hp - SIZEOF_StgArrWords;
483    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
484    StgArrWords_words(p) = 1;
485
486    if (val != 0) {
487         s = 1;
488         W_[Hp] = val;
489    } else {
490         s = 0;
491    }
492
493    /* returns (# size  :: Int#, 
494                  data  :: ByteArray# #)
495    */
496    RET_NP(s,p);
497 }
498
499
500 /*
501  * 'long long' primops for converting to/from Integers.
502  */
503
504 #ifdef SUPPORT_LONG_LONGS
505
506 int64ToIntegerzh_fast
507 {
508    /* arguments: L1 = Int64# */
509
510    L_ val;
511    W_ hi, lo, s, neg, words_needed, p;
512
513    val = L1;
514    neg = 0;
515
516    hi = TO_W_(val >> 32);
517    lo = TO_W_(val);
518
519    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
520        // minimum is one word
521        words_needed = 1;
522    } else { 
523        words_needed = 2;
524    }
525
526    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
527                NO_PTRS, int64ToIntegerzh_fast );
528
529    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
530    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
531    StgArrWords_words(p) = words_needed;
532
533    if ( %lt(hi,0) ) {
534      neg = 1;
535      lo = -lo;
536      if(lo == 0) {
537        hi = -hi;
538      } else {
539        hi = -hi - 1;
540      }
541    }
542
543    if ( words_needed == 2 )  { 
544       s = 2;
545       Hp(-1) = lo;
546       Hp(0) = hi;
547    } else { 
548        if ( lo != 0 ) {
549            s = 1;
550            Hp(0) = lo;
551        } else /* val==0 */  {
552            s = 0;
553        }
554    }
555    if ( neg != 0 ) {
556         s = -s;
557    }
558
559    /* returns (# size  :: Int#, 
560                  data  :: ByteArray# #)
561    */
562    RET_NP(s,p);
563 }
564 word64ToIntegerzh_fast
565 {
566    /* arguments: L1 = Word64# */
567
568    L_ val;
569    W_ hi, lo, s, words_needed, p;
570
571    val = L1;
572    hi = TO_W_(val >> 32);
573    lo = TO_W_(val);
574
575    if ( hi != 0 ) {
576       words_needed = 2;
577    } else {
578       words_needed = 1;
579    }
580
581    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
582                NO_PTRS, word64ToIntegerzh_fast );
583
584    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
585    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
586    StgArrWords_words(p) = words_needed;
587
588    if ( hi != 0 ) { 
589      s = 2;
590      Hp(-1) = lo;
591      Hp(0)  = hi;
592    } else {
593       if ( lo != 0 ) {
594         s = 1;
595         Hp(0) = lo;
596      } else /* val==0 */  {
597       s = 0;
598      }
599   }
600
601    /* returns (# size  :: Int#, 
602                  data  :: ByteArray# #)
603    */
604    RET_NP(s,p);
605 }
606
607
608
609 #endif /* SUPPORT_LONG_LONGS */
610
611 /* ToDo: this is shockingly inefficient */
612
613 #ifndef THREADED_RTS
614 section "bss" {
615   mp_tmp1:
616     bits8 [SIZEOF_MP_INT];
617 }
618
619 section "bss" {
620   mp_tmp2:
621     bits8 [SIZEOF_MP_INT];
622 }
623
624 section "bss" {
625   mp_result1:
626     bits8 [SIZEOF_MP_INT];
627 }
628
629 section "bss" {
630   mp_result2:
631     bits8 [SIZEOF_MP_INT];
632 }
633 #endif
634
635 #ifdef THREADED_RTS
636 #define FETCH_MP_TEMP(X) \
637 W_ X; \
638 X = BaseReg + (OFFSET_StgRegTable_r ## X);
639 #else
640 #define FETCH_MP_TEMP(X) /* Nothing */
641 #endif
642
643 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
644 name                                                                    \
645 {                                                                       \
646   CInt s1, s2;                                                          \
647   W_ d1, d2;                                                            \
648   FETCH_MP_TEMP(mp_tmp1);                                               \
649   FETCH_MP_TEMP(mp_tmp2);                                               \
650   FETCH_MP_TEMP(mp_result1)                                             \
651   FETCH_MP_TEMP(mp_result2);                                            \
652                                                                         \
653   /* call doYouWantToGC() */                                            \
654   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
655                                                                         \
656   s1 = W_TO_INT(R1);                                                    \
657   d1 = R2;                                                              \
658   s2 = W_TO_INT(R3);                                                    \
659   d2 = R4;                                                              \
660                                                                         \
661   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
662   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
663   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
664   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
665   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
666   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
667                                                                         \
668   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
669                                                                         \
670   /* Perform the operation */                                           \
671   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
672                                                                         \
673   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
674          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
675 }
676
677 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
678 name                                                                    \
679 {                                                                       \
680   CInt s1;                                                              \
681   W_ d1;                                                                \
682   FETCH_MP_TEMP(mp_tmp1);                                               \
683   FETCH_MP_TEMP(mp_result1)                                             \
684                                                                         \
685   /* call doYouWantToGC() */                                            \
686   MAYBE_GC(R2_PTR, name);                                               \
687                                                                         \
688   d1 = R2;                                                              \
689   s1 = W_TO_INT(R1);                                                    \
690                                                                         \
691   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
692   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
693   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
694                                                                         \
695   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
696                                                                         \
697   /* Perform the operation */                                           \
698   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
699                                                                         \
700   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
701          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
702 }
703
704 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
705 name                                                                                    \
706 {                                                                                       \
707   CInt s1, s2;                                                                          \
708   W_ d1, d2;                                                                            \
709   FETCH_MP_TEMP(mp_tmp1);                                                               \
710   FETCH_MP_TEMP(mp_tmp2);                                                               \
711   FETCH_MP_TEMP(mp_result1)                                                             \
712   FETCH_MP_TEMP(mp_result2)                                                             \
713                                                                                         \
714   /* call doYouWantToGC() */                                                            \
715   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
716                                                                                         \
717   s1 = W_TO_INT(R1);                                                                    \
718   d1 = R2;                                                                              \
719   s2 = W_TO_INT(R3);                                                                    \
720   d2 = R4;                                                                              \
721                                                                                         \
722   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
723   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
724   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
725   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
726   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
727   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
728                                                                                         \
729   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
730   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
731                                                                                         \
732   /* Perform the operation */                                                           \
733   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
734                                                                                         \
735   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
736            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
737            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
738            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
739 }
740
741 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
742 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
743 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
744 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
745 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
746 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
747 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
748 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
749 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
750 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
751 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
752
753 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
754 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
755
756 #ifndef THREADED_RTS
757 section "bss" {
758   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
759 }
760 #endif
761
762 gcdIntzh_fast
763 {
764     /* R1 = the first Int#; R2 = the second Int# */
765     W_ r; 
766     FETCH_MP_TEMP(mp_tmp_w);
767
768     W_[mp_tmp_w] = R1;
769     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
770
771     R1 = r;
772     /* Result parked in R1, return via info-pointer at TOS */
773     jump %ENTRY_CODE(Sp(0));
774 }
775
776
777 gcdIntegerIntzh_fast
778 {
779     /* R1 = s1; R2 = d1; R3 = the int */
780     W_ s1;
781     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
782     R1 = s1;
783     
784     /* Result parked in R1, return via info-pointer at TOS */
785     jump %ENTRY_CODE(Sp(0));
786 }
787
788
789 cmpIntegerIntzh_fast
790 {
791     /* R1 = s1; R2 = d1; R3 = the int */
792     W_ usize, vsize, v_digit, u_digit;
793
794     usize = R1;
795     vsize = 0;
796     v_digit = R3;
797
798     // paraphrased from __gmpz_cmp_si() in the GMP sources
799     if (%gt(v_digit,0)) {
800         vsize = 1;
801     } else { 
802         if (%lt(v_digit,0)) {
803             vsize = -1;
804             v_digit = -v_digit;
805         }
806     }
807
808     if (usize != vsize) {
809         R1 = usize - vsize; 
810         jump %ENTRY_CODE(Sp(0));
811     }
812
813     if (usize == 0) {
814         R1 = 0; 
815         jump %ENTRY_CODE(Sp(0));
816     }
817
818     u_digit = W_[BYTE_ARR_CTS(R2)];
819
820     if (u_digit == v_digit) {
821         R1 = 0; 
822         jump %ENTRY_CODE(Sp(0));
823     }
824
825     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
826         R1 = usize; 
827     } else {
828         R1 = -usize; 
829     }
830
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 cmpIntegerzh_fast
835 {
836     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
837     W_ usize, vsize, size, up, vp;
838     CInt cmp;
839
840     // paraphrased from __gmpz_cmp() in the GMP sources
841     usize = R1;
842     vsize = R3;
843
844     if (usize != vsize) {
845         R1 = usize - vsize; 
846         jump %ENTRY_CODE(Sp(0));
847     }
848
849     if (usize == 0) {
850         R1 = 0; 
851         jump %ENTRY_CODE(Sp(0));
852     }
853
854     if (%lt(usize,0)) { // NB. not <, which is unsigned
855         size = -usize;
856     } else {
857         size = usize;
858     }
859
860     up = BYTE_ARR_CTS(R2);
861     vp = BYTE_ARR_CTS(R4);
862
863     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
864
865     if (cmp == 0 :: CInt) {
866         R1 = 0; 
867         jump %ENTRY_CODE(Sp(0));
868     }
869
870     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
871         R1 = 1;
872     } else {
873         R1 = (-1); 
874     }
875     /* Result parked in R1, return via info-pointer at TOS */
876     jump %ENTRY_CODE(Sp(0));
877 }
878
879 integer2Intzh_fast
880 {
881     /* R1 = s; R2 = d */
882     W_ r, s;
883
884     s = R1;
885     if (s == 0) {
886         r = 0;
887     } else {
888         r = W_[R2 + SIZEOF_StgArrWords];
889         if (%lt(s,0)) {
890             r = -r;
891         }
892     }
893     /* Result parked in R1, return via info-pointer at TOS */
894     R1 = r;
895     jump %ENTRY_CODE(Sp(0));
896 }
897
898 integer2Wordzh_fast
899 {
900   /* R1 = s; R2 = d */
901   W_ r, s;
902
903   s = R1;
904   if (s == 0) {
905     r = 0;
906   } else {
907     r = W_[R2 + SIZEOF_StgArrWords];
908     if (%lt(s,0)) {
909         r = -r;
910     }
911   }
912   /* Result parked in R1, return via info-pointer at TOS */
913   R1 = r;
914   jump %ENTRY_CODE(Sp(0));
915 }
916
917 decodeFloatzh_fast
918
919     W_ p;
920     F_ arg;
921     FETCH_MP_TEMP(mp_tmp1);
922     FETCH_MP_TEMP(mp_tmp_w);
923     
924     /* arguments: F1 = Float# */
925     arg = F1;
926     
927     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
928     
929     /* Be prepared to tell Lennart-coded __decodeFloat
930        where mantissa._mp_d can be put (it does not care about the rest) */
931     p = Hp - SIZEOF_StgArrWords;
932     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
933     StgArrWords_words(p) = 1;
934     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
935     
936     /* Perform the operation */
937     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
938     
939     /* returns: (Int# (expn), Int#, ByteArray#) */
940     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
941 }
942
943 decodeFloatzuIntzh_fast
944
945     W_ p;
946     F_ arg;
947     FETCH_MP_TEMP(mp_tmp1);
948     FETCH_MP_TEMP(mp_tmp_w);
949     
950     /* arguments: F1 = Float# */
951     arg = F1;
952     
953     /* Perform the operation */
954     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
955     
956     /* returns: (Int# (mantissa), Int# (exponent)) */
957     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
958 }
959
960 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
961 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
962
963 decodeDoublezh_fast
964
965     D_ arg;
966     W_ p;
967     FETCH_MP_TEMP(mp_tmp1);
968     FETCH_MP_TEMP(mp_tmp_w);
969
970     /* arguments: D1 = Double# */
971     arg = D1;
972
973     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
974     
975     /* Be prepared to tell Lennart-coded __decodeDouble
976        where mantissa.d can be put (it does not care about the rest) */
977     p = Hp - ARR_SIZE + WDS(1);
978     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
979     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
980     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
981
982     /* Perform the operation */
983     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
984     
985     /* returns: (Int# (expn), Int#, ByteArray#) */
986     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
987 }
988
989 decodeDoublezu2Intzh_fast
990
991     D_ arg;
992     W_ p;
993     FETCH_MP_TEMP(mp_tmp1);
994     FETCH_MP_TEMP(mp_tmp2);
995     FETCH_MP_TEMP(mp_result1);
996     FETCH_MP_TEMP(mp_result2);
997
998     /* arguments: D1 = Double# */
999     arg = D1;
1000
1001     /* Perform the operation */
1002     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1003                                     mp_result1 "ptr", mp_result2 "ptr",
1004                                     arg) [];
1005
1006     /* returns:
1007        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1008     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1009 }
1010
1011 /* -----------------------------------------------------------------------------
1012  * Concurrency primitives
1013  * -------------------------------------------------------------------------- */
1014
1015 forkzh_fast
1016 {
1017   /* args: R1 = closure to spark */
1018
1019   MAYBE_GC(R1_PTR, forkzh_fast);
1020
1021   W_ closure;
1022   W_ threadid;
1023   closure = R1;
1024
1025   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1026                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1027                                 closure "ptr") [];
1028
1029   /* start blocked if the current thread is blocked */
1030   StgTSO_flags(threadid) = 
1031      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1032                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1033
1034   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1035
1036   // switch at the earliest opportunity
1037   Capability_context_switch(MyCapability()) = 1 :: CInt;
1038   
1039   RET_P(threadid);
1040 }
1041
1042 forkOnzh_fast
1043 {
1044   /* args: R1 = cpu, R2 = closure to spark */
1045
1046   MAYBE_GC(R2_PTR, forkOnzh_fast);
1047
1048   W_ cpu;
1049   W_ closure;
1050   W_ threadid;
1051   cpu = R1;
1052   closure = R2;
1053
1054   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1055                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1056                                 closure "ptr") [];
1057
1058   /* start blocked if the current thread is blocked */
1059   StgTSO_flags(threadid) = 
1060      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1061                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1062
1063   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1064
1065   // switch at the earliest opportunity
1066   Capability_context_switch(MyCapability()) = 1 :: CInt;
1067   
1068   RET_P(threadid);
1069 }
1070
1071 yieldzh_fast
1072 {
1073   jump stg_yield_noregs;
1074 }
1075
1076 myThreadIdzh_fast
1077 {
1078   /* no args. */
1079   RET_P(CurrentTSO);
1080 }
1081
1082 labelThreadzh_fast
1083 {
1084   /* args: 
1085         R1 = ThreadId#
1086         R2 = Addr# */
1087 #ifdef DEBUG
1088   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1089 #endif
1090   jump %ENTRY_CODE(Sp(0));
1091 }
1092
1093 isCurrentThreadBoundzh_fast
1094 {
1095   /* no args */
1096   W_ r;
1097   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1098   RET_N(r);
1099 }
1100
1101 threadStatuszh_fast
1102 {
1103     /* args: R1 :: ThreadId# */
1104     W_ tso;
1105     W_ why_blocked;
1106     W_ what_next;
1107     W_ ret;
1108
1109     tso = R1;
1110     loop:
1111       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1112           tso = StgTSO__link(tso);
1113           goto loop;
1114       }
1115
1116     what_next   = TO_W_(StgTSO_what_next(tso));
1117     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1118     // Note: these two reads are not atomic, so they might end up
1119     // being inconsistent.  It doesn't matter, since we
1120     // only return one or the other.  If we wanted to return the
1121     // contents of block_info too, then we'd have to do some synchronisation.
1122
1123     if (what_next == ThreadComplete) {
1124         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1125     } else {
1126         if (what_next == ThreadKilled) {
1127             ret = 17;
1128         } else {
1129             ret = why_blocked;
1130         }
1131     }
1132     RET_N(ret);
1133 }
1134
1135 /* -----------------------------------------------------------------------------
1136  * TVar primitives
1137  * -------------------------------------------------------------------------- */
1138
1139 #define SP_OFF 0
1140
1141 // Catch retry frame ------------------------------------------------------------
1142
1143 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1144 #if defined(PROFILING)
1145   W_ unused1, W_ unused2,
1146 #endif
1147   W_ unused3, P_ unused4, P_ unused5)
1148 {
1149    W_ r, frame, trec, outer;
1150
1151    frame = Sp;
1152    trec = StgTSO_trec(CurrentTSO);
1153    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1154    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1155    if (r != 0) {
1156      /* Succeeded (either first branch or second branch) */
1157      StgTSO_trec(CurrentTSO) = outer;
1158      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1159      jump %ENTRY_CODE(Sp(SP_OFF));
1160    } else {
1161      /* Did not commit: re-execute */
1162      W_ new_trec;
1163      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1164      StgTSO_trec(CurrentTSO) = new_trec;
1165      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1166        R1 = StgCatchRetryFrame_alt_code(frame);
1167      } else {
1168        R1 = StgCatchRetryFrame_first_code(frame);
1169      }
1170      jump stg_ap_v_fast;
1171    }
1172 }
1173
1174
1175 // Atomically frame ------------------------------------------------------------
1176
1177 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1178 #if defined(PROFILING)
1179   W_ unused1, W_ unused2,
1180 #endif
1181   P_ unused3, P_ unused4)
1182 {
1183   W_ frame, trec, valid, next_invariant, q, outer;
1184
1185   frame = Sp;
1186   trec = StgTSO_trec(CurrentTSO);
1187   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1188
1189   if (outer == NO_TREC) {
1190     /* First time back at the atomically frame -- pick up invariants */
1191     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1192     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1193
1194   } else {
1195     /* Second/subsequent time back at the atomically frame -- abort the
1196      * tx that's checking the invariant and move on to the next one */
1197     StgTSO_trec(CurrentTSO) = outer;
1198     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1199     StgInvariantCheckQueue_my_execution(q) = trec;
1200     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1201     /* Don't free trec -- it's linked from q and will be stashed in the
1202      * invariant if we eventually commit. */
1203     q = StgInvariantCheckQueue_next_queue_entry(q);
1204     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1205     trec = outer;
1206   }
1207
1208   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1209
1210   if (q != END_INVARIANT_CHECK_QUEUE) {
1211     /* We can't commit yet: another invariant to check */
1212     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1213     StgTSO_trec(CurrentTSO) = trec;
1214
1215     next_invariant = StgInvariantCheckQueue_invariant(q);
1216     R1 = StgAtomicInvariant_code(next_invariant);
1217     jump stg_ap_v_fast;
1218
1219   } else {
1220
1221     /* We've got no more invariants to check, try to commit */
1222     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1223     if (valid != 0) {
1224       /* Transaction was valid: commit succeeded */
1225       StgTSO_trec(CurrentTSO) = NO_TREC;
1226       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1227       jump %ENTRY_CODE(Sp(SP_OFF));
1228     } else {
1229       /* Transaction was not valid: try again */
1230       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1231       StgTSO_trec(CurrentTSO) = trec;
1232       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1233       R1 = StgAtomicallyFrame_code(frame);
1234       jump stg_ap_v_fast;
1235     }
1236   }
1237 }
1238
1239 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1240 #if defined(PROFILING)
1241   W_ unused1, W_ unused2,
1242 #endif
1243   P_ unused3, P_ unused4)
1244 {
1245   W_ frame, trec, valid;
1246
1247   frame = Sp;
1248
1249   /* The TSO is currently waiting: should we stop waiting? */
1250   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1251   if (valid != 0) {
1252     /* Previous attempt is still valid: no point trying again yet */
1253     jump stg_block_noregs;
1254   } else {
1255     /* Previous attempt is no longer valid: try again */
1256     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1257     StgTSO_trec(CurrentTSO) = trec;
1258     StgHeader_info(frame) = stg_atomically_frame_info;
1259     R1 = StgAtomicallyFrame_code(frame);
1260     jump stg_ap_v_fast;
1261   }
1262 }
1263
1264 // STM catch frame --------------------------------------------------------------
1265
1266 #define SP_OFF 0
1267
1268 /* Catch frames are very similar to update frames, but when entering
1269  * one we just pop the frame off the stack and perform the correct
1270  * kind of return to the activation record underneath us on the stack.
1271  */
1272
1273 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1274 #if defined(PROFILING)
1275   W_ unused1, W_ unused2,
1276 #endif
1277   P_ unused3, P_ unused4)
1278    {
1279       W_ r, frame, trec, outer;
1280       frame = Sp;
1281       trec = StgTSO_trec(CurrentTSO);
1282       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1283       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1284       if (r != 0) {
1285         /* Commit succeeded */
1286         StgTSO_trec(CurrentTSO) = outer;
1287         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1288         jump Sp(SP_OFF);
1289       } else {
1290         /* Commit failed */
1291         W_ new_trec;
1292         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1293         StgTSO_trec(CurrentTSO) = new_trec;
1294         R1 = StgCatchSTMFrame_code(frame);
1295         jump stg_ap_v_fast;
1296       }
1297    }
1298
1299
1300 // Primop definition ------------------------------------------------------------
1301
1302 atomicallyzh_fast
1303 {
1304   W_ frame;
1305   W_ old_trec;
1306   W_ new_trec;
1307   
1308   // stmStartTransaction may allocate
1309   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1310
1311   /* Args: R1 = m :: STM a */
1312   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1313
1314   old_trec = StgTSO_trec(CurrentTSO);
1315
1316   /* Nested transactions are not allowed; raise an exception */
1317   if (old_trec != NO_TREC) {
1318      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1319      jump raisezh_fast;
1320   }
1321
1322   /* Set up the atomically frame */
1323   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1324   frame = Sp;
1325
1326   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1327   StgAtomicallyFrame_code(frame) = R1;
1328   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1329
1330   /* Start the memory transcation */
1331   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1332   StgTSO_trec(CurrentTSO) = new_trec;
1333
1334   /* Apply R1 to the realworld token */
1335   jump stg_ap_v_fast;
1336 }
1337
1338
1339 catchSTMzh_fast
1340 {
1341   W_ frame;
1342   
1343   /* Args: R1 :: STM a */
1344   /* Args: R2 :: Exception -> STM a */
1345   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1346
1347   /* Set up the catch frame */
1348   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1349   frame = Sp;
1350
1351   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1352   StgCatchSTMFrame_handler(frame) = R2;
1353   StgCatchSTMFrame_code(frame) = R1;
1354
1355   /* Start a nested transaction to run the body of the try block in */
1356   W_ cur_trec;  
1357   W_ new_trec;
1358   cur_trec = StgTSO_trec(CurrentTSO);
1359   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1360   StgTSO_trec(CurrentTSO) = new_trec;
1361
1362   /* Apply R1 to the realworld token */
1363   jump stg_ap_v_fast;
1364 }
1365
1366
1367 catchRetryzh_fast
1368 {
1369   W_ frame;
1370   W_ new_trec;
1371   W_ trec;
1372
1373   // stmStartTransaction may allocate
1374   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1375
1376   /* Args: R1 :: STM a */
1377   /* Args: R2 :: STM a */
1378   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1379
1380   /* Start a nested transaction within which to run the first code */
1381   trec = StgTSO_trec(CurrentTSO);
1382   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1383   StgTSO_trec(CurrentTSO) = new_trec;
1384
1385   /* Set up the catch-retry frame */
1386   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1387   frame = Sp;
1388   
1389   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1390   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1391   StgCatchRetryFrame_first_code(frame) = R1;
1392   StgCatchRetryFrame_alt_code(frame) = R2;
1393
1394   /* Apply R1 to the realworld token */
1395   jump stg_ap_v_fast;
1396 }
1397
1398
1399 retryzh_fast
1400 {
1401   W_ frame_type;
1402   W_ frame;
1403   W_ trec;
1404   W_ outer;
1405   W_ r;
1406
1407   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1408
1409   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1410 retry_pop_stack:
1411   StgTSO_sp(CurrentTSO) = Sp;
1412   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1413   Sp = StgTSO_sp(CurrentTSO);
1414   frame = Sp;
1415   trec = StgTSO_trec(CurrentTSO);
1416   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1417
1418   if (frame_type == CATCH_RETRY_FRAME) {
1419     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1420     ASSERT(outer != NO_TREC);
1421     // Abort the transaction attempting the current branch
1422     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1423     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1424     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1425       // Retry in the first branch: try the alternative
1426       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1427       StgTSO_trec(CurrentTSO) = trec;
1428       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1429       R1 = StgCatchRetryFrame_alt_code(frame);
1430       jump stg_ap_v_fast;
1431     } else {
1432       // Retry in the alternative code: propagate the retry
1433       StgTSO_trec(CurrentTSO) = outer;
1434       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1435       goto retry_pop_stack;
1436     }
1437   }
1438
1439   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1440   ASSERT(frame_type == ATOMICALLY_FRAME);
1441   if (outer != NO_TREC) {
1442     // We called retry while checking invariants, so abort the current
1443     // invariant check (merging its TVar accesses into the parents read
1444     // set so we'll wait on them)
1445     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1446     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1447     trec = outer;
1448     StgTSO_trec(CurrentTSO) = trec;
1449     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1450   }
1451   ASSERT(outer == NO_TREC);
1452
1453   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1454   if (r != 0) {
1455     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1456     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1457     Sp = frame;
1458     // Fix up the stack in the unregisterised case: the return convention is different.
1459     R3 = trec; // passing to stmWaitUnblock()
1460     jump stg_block_stmwait;
1461   } else {
1462     // Transaction was not valid: retry immediately
1463     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1464     StgTSO_trec(CurrentTSO) = trec;
1465     R1 = StgAtomicallyFrame_code(frame);
1466     Sp = frame;
1467     jump stg_ap_v_fast;
1468   }
1469 }
1470
1471
1472 checkzh_fast
1473 {
1474   W_ trec, closure;
1475
1476   /* Args: R1 = invariant closure */
1477   MAYBE_GC (R1_PTR, checkzh_fast); 
1478
1479   trec = StgTSO_trec(CurrentTSO);
1480   closure = R1;
1481   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1482                                      trec "ptr",
1483                                      closure "ptr") [];
1484
1485   jump %ENTRY_CODE(Sp(0));
1486 }
1487
1488
1489 newTVarzh_fast
1490 {
1491   W_ tv;
1492   W_ new_value;
1493
1494   /* Args: R1 = initialisation value */
1495
1496   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1497   new_value = R1;
1498   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1499   RET_P(tv);
1500 }
1501
1502
1503 readTVarzh_fast
1504 {
1505   W_ trec;
1506   W_ tvar;
1507   W_ result;
1508
1509   /* Args: R1 = TVar closure */
1510
1511   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1512   trec = StgTSO_trec(CurrentTSO);
1513   tvar = R1;
1514   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1515
1516   RET_P(result);
1517 }
1518
1519 readTVarIOzh_fast
1520 {
1521     W_ result;
1522
1523 again:
1524     result = StgTVar_current_value(R1);
1525     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1526         goto again;
1527     }
1528     RET_P(result);
1529 }
1530
1531 writeTVarzh_fast
1532 {
1533   W_ trec;
1534   W_ tvar;
1535   W_ new_value;
1536   
1537   /* Args: R1 = TVar closure */
1538   /*       R2 = New value    */
1539
1540   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1541   trec = StgTSO_trec(CurrentTSO);
1542   tvar = R1;
1543   new_value = R2;
1544   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1545
1546   jump %ENTRY_CODE(Sp(0));
1547 }
1548
1549
1550 /* -----------------------------------------------------------------------------
1551  * MVar primitives
1552  *
1553  * take & putMVar work as follows.  Firstly, an important invariant:
1554  *
1555  *    If the MVar is full, then the blocking queue contains only
1556  *    threads blocked on putMVar, and if the MVar is empty then the
1557  *    blocking queue contains only threads blocked on takeMVar.
1558  *
1559  * takeMvar:
1560  *    MVar empty : then add ourselves to the blocking queue
1561  *    MVar full  : remove the value from the MVar, and
1562  *                 blocking queue empty     : return
1563  *                 blocking queue non-empty : perform the first blocked putMVar
1564  *                                            from the queue, and wake up the
1565  *                                            thread (MVar is now full again)
1566  *
1567  * putMVar is just the dual of the above algorithm.
1568  *
1569  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1570  * the stack of the thread waiting to do the putMVar.  See
1571  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1572  * the stack layout, and the PerformPut and PerformTake macros below.
1573  *
1574  * It is important that a blocked take or put is woken up with the
1575  * take/put already performed, because otherwise there would be a
1576  * small window of vulnerability where the thread could receive an
1577  * exception and never perform its take or put, and we'd end up with a
1578  * deadlock.
1579  *
1580  * -------------------------------------------------------------------------- */
1581
1582 isEmptyMVarzh_fast
1583 {
1584     /* args: R1 = MVar closure */
1585
1586     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1587         RET_N(1);
1588     } else {
1589         RET_N(0);
1590     }
1591 }
1592
1593 newMVarzh_fast
1594 {
1595     /* args: none */
1596     W_ mvar;
1597
1598     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1599   
1600     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1601     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1602         // MVARs start dirty: generation 0 has no mutable list
1603     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1604     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1605     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1606     RET_P(mvar);
1607 }
1608
1609
1610 #define PerformTake(tso, value)                         \
1611     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1612     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1613
1614 #define PerformPut(tso,lval)                    \
1615     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1616     lval = W_[StgTSO_sp(tso) - WDS(1)];
1617
1618 takeMVarzh_fast
1619 {
1620     W_ mvar, val, info, tso;
1621
1622     /* args: R1 = MVar closure */
1623     mvar = R1;
1624
1625 #if defined(THREADED_RTS)
1626     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1627 #else
1628     info = GET_INFO(mvar);
1629 #endif
1630         
1631     if (info == stg_MVAR_CLEAN_info) {
1632         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1633     }
1634
1635     /* If the MVar is empty, put ourselves on its blocking queue,
1636      * and wait until we're woken up.
1637      */
1638     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1639         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1640             StgMVar_head(mvar) = CurrentTSO;
1641         } else {
1642             foreign "C" setTSOLink(MyCapability() "ptr", 
1643                                    StgMVar_tail(mvar) "ptr",
1644                                    CurrentTSO) [];
1645         }
1646         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1647         StgTSO_block_info(CurrentTSO)  = mvar;
1648         // write barrier for throwTo(), which looks at block_info
1649         // if why_blocked==BlockedOnMVar.
1650         prim %write_barrier() [];
1651         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1652         StgMVar_tail(mvar) = CurrentTSO;
1653         
1654         R1 = mvar;
1655         jump stg_block_takemvar;
1656   }
1657
1658   /* we got the value... */
1659   val = StgMVar_value(mvar);
1660
1661   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1662   {
1663       /* There are putMVar(s) waiting... 
1664        * wake up the first thread on the queue
1665        */
1666       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1667
1668       /* actually perform the putMVar for the thread that we just woke up */
1669       tso = StgMVar_head(mvar);
1670       PerformPut(tso,StgMVar_value(mvar));
1671
1672       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1673           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1674       }
1675
1676       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1677                                             StgMVar_head(mvar) "ptr", 1) [];
1678       StgMVar_head(mvar) = tso;
1679
1680       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1681           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1682       }
1683
1684 #if defined(THREADED_RTS)
1685       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1686 #else
1687       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1688 #endif
1689       RET_P(val);
1690   } 
1691   else
1692   {
1693       /* No further putMVars, MVar is now empty */
1694       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1695  
1696 #if defined(THREADED_RTS)
1697       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1698 #else
1699       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1700 #endif
1701
1702       RET_P(val);
1703   }
1704 }
1705
1706
1707 tryTakeMVarzh_fast
1708 {
1709     W_ mvar, val, info, tso;
1710
1711     /* args: R1 = MVar closure */
1712
1713     mvar = R1;
1714
1715 #if defined(THREADED_RTS)
1716     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1717 #else
1718     info = GET_INFO(mvar);
1719 #endif
1720
1721     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1722 #if defined(THREADED_RTS)
1723         unlockClosure(mvar, info);
1724 #endif
1725         /* HACK: we need a pointer to pass back, 
1726          * so we abuse NO_FINALIZER_closure
1727          */
1728         RET_NP(0, stg_NO_FINALIZER_closure);
1729     }
1730
1731     if (info == stg_MVAR_CLEAN_info) {
1732         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1733     }
1734
1735     /* we got the value... */
1736     val = StgMVar_value(mvar);
1737
1738     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1739
1740         /* There are putMVar(s) waiting... 
1741          * wake up the first thread on the queue
1742          */
1743         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1744
1745         /* actually perform the putMVar for the thread that we just woke up */
1746         tso = StgMVar_head(mvar);
1747         PerformPut(tso,StgMVar_value(mvar));
1748         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1749             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1750         }
1751
1752         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1753                                               StgMVar_head(mvar) "ptr", 1) [];
1754         StgMVar_head(mvar) = tso;
1755
1756         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1757             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1758         }
1759 #if defined(THREADED_RTS)
1760         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1761 #else
1762         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1763 #endif
1764     }
1765     else 
1766     {
1767         /* No further putMVars, MVar is now empty */
1768         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1769 #if defined(THREADED_RTS)
1770         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1771 #else
1772         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1773 #endif
1774     }
1775     
1776     RET_NP(1, val);
1777 }
1778
1779
1780 putMVarzh_fast
1781 {
1782     W_ mvar, val, info, tso;
1783
1784     /* args: R1 = MVar, R2 = value */
1785     mvar = R1;
1786     val  = R2;
1787
1788 #if defined(THREADED_RTS)
1789     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1790 #else
1791     info = GET_INFO(mvar);
1792 #endif
1793
1794     if (info == stg_MVAR_CLEAN_info) {
1795         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1796     }
1797
1798     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1799         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1800             StgMVar_head(mvar) = CurrentTSO;
1801         } else {
1802             foreign "C" setTSOLink(MyCapability() "ptr", 
1803                                    StgMVar_tail(mvar) "ptr",
1804                                    CurrentTSO) [];
1805         }
1806         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1807         StgTSO_block_info(CurrentTSO)  = mvar;
1808         // write barrier for throwTo(), which looks at block_info
1809         // if why_blocked==BlockedOnMVar.
1810         prim %write_barrier() [];
1811         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1812         StgMVar_tail(mvar) = CurrentTSO;
1813         
1814         R1 = mvar;
1815         R2 = val;
1816         jump stg_block_putmvar;
1817     }
1818   
1819     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1820
1821         /* There are takeMVar(s) waiting: wake up the first one
1822          */
1823         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1824
1825         /* actually perform the takeMVar */
1826         tso = StgMVar_head(mvar);
1827         PerformTake(tso, val);
1828         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1829             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1830         }
1831       
1832         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1833                                               StgMVar_head(mvar) "ptr", 1) [];
1834         StgMVar_head(mvar) = tso;
1835
1836         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1837             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1838         }
1839
1840 #if defined(THREADED_RTS)
1841         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1842 #else
1843         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1844 #endif
1845         jump %ENTRY_CODE(Sp(0));
1846     }
1847     else
1848     {
1849         /* No further takes, the MVar is now full. */
1850         StgMVar_value(mvar) = val;
1851
1852 #if defined(THREADED_RTS)
1853         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1854 #else
1855         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1856 #endif
1857         jump %ENTRY_CODE(Sp(0));
1858     }
1859     
1860     /* ToDo: yield afterward for better communication performance? */
1861 }
1862
1863
1864 tryPutMVarzh_fast
1865 {
1866     W_ mvar, info, tso;
1867
1868     /* args: R1 = MVar, R2 = value */
1869     mvar = R1;
1870
1871 #if defined(THREADED_RTS)
1872     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1873 #else
1874     info = GET_INFO(mvar);
1875 #endif
1876
1877     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1878 #if defined(THREADED_RTS)
1879         unlockClosure(mvar, info);
1880 #endif
1881         RET_N(0);
1882     }
1883   
1884     if (info == stg_MVAR_CLEAN_info) {
1885         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1886     }
1887
1888     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1889
1890         /* There are takeMVar(s) waiting: wake up the first one
1891          */
1892         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1893         
1894         /* actually perform the takeMVar */
1895         tso = StgMVar_head(mvar);
1896         PerformTake(tso, R2);
1897         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1898             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1899         }
1900       
1901         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1902                                               StgMVar_head(mvar) "ptr", 1) [];
1903         StgMVar_head(mvar) = tso;
1904
1905         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1906             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1907         }
1908
1909 #if defined(THREADED_RTS)
1910         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1911 #else
1912         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1913 #endif
1914     }
1915     else
1916     {
1917         /* No further takes, the MVar is now full. */
1918         StgMVar_value(mvar) = R2;
1919
1920 #if defined(THREADED_RTS)
1921         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1922 #else
1923         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1924 #endif
1925     }
1926     
1927     RET_N(1);
1928     /* ToDo: yield afterward for better communication performance? */
1929 }
1930
1931
1932 /* -----------------------------------------------------------------------------
1933    Stable pointer primitives
1934    -------------------------------------------------------------------------  */
1935
1936 makeStableNamezh_fast
1937 {
1938     W_ index, sn_obj;
1939
1940     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1941   
1942     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1943
1944     /* Is there already a StableName for this heap object?
1945      *  stable_ptr_table is a pointer to an array of snEntry structs.
1946      */
1947     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1948         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1949         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1950         StgStableName_sn(sn_obj) = index;
1951         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1952     } else {
1953         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1954     }
1955     
1956     RET_P(sn_obj);
1957 }
1958
1959
1960 makeStablePtrzh_fast
1961 {
1962     /* Args: R1 = a */
1963     W_ sp;
1964     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1965     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1966     RET_N(sp);
1967 }
1968
1969 deRefStablePtrzh_fast
1970 {
1971     /* Args: R1 = the stable ptr */
1972     W_ r, sp;
1973     sp = R1;
1974     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1975     RET_P(r);
1976 }
1977
1978 /* -----------------------------------------------------------------------------
1979    Bytecode object primitives
1980    -------------------------------------------------------------------------  */
1981
1982 newBCOzh_fast
1983 {
1984     /* R1 = instrs
1985        R2 = literals
1986        R3 = ptrs
1987        R4 = arity
1988        R5 = bitmap array
1989     */
1990     W_ bco, bitmap_arr, bytes, words;
1991     
1992     bitmap_arr = R5;
1993
1994     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1995     bytes = WDS(words);
1996
1997     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1998
1999     bco = Hp - bytes + WDS(1);
2000     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2001     
2002     StgBCO_instrs(bco)     = R1;
2003     StgBCO_literals(bco)   = R2;
2004     StgBCO_ptrs(bco)       = R3;
2005     StgBCO_arity(bco)      = HALF_W_(R4);
2006     StgBCO_size(bco)       = HALF_W_(words);
2007     
2008     // Copy the arity/bitmap info into the BCO
2009     W_ i;
2010     i = 0;
2011 for:
2012     if (i < StgArrWords_words(bitmap_arr)) {
2013         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2014         i = i + 1;
2015         goto for;
2016     }
2017     
2018     RET_P(bco);
2019 }
2020
2021
2022 mkApUpd0zh_fast
2023 {
2024     // R1 = the BCO# for the AP
2025     //  
2026     W_ ap;
2027
2028     // This function is *only* used to wrap zero-arity BCOs in an
2029     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2030     // saturated and always points directly to a FUN or BCO.
2031     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2032            StgBCO_arity(R1) == HALF_W_(0));
2033
2034     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2035     TICK_ALLOC_UP_THK(0, 0);
2036     CCCS_ALLOC(SIZEOF_StgAP);
2037
2038     ap = Hp - SIZEOF_StgAP + WDS(1);
2039     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2040     
2041     StgAP_n_args(ap) = HALF_W_(0);
2042     StgAP_fun(ap) = R1;
2043     
2044     RET_P(ap);
2045 }
2046
2047 unpackClosurezh_fast
2048 {
2049 /* args: R1 = closure to analyze */
2050 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2051
2052     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2053     info  = %GET_STD_INFO(UNTAG(R1));
2054
2055     // Some closures have non-standard layout, so we omit those here.
2056     W_ type;
2057     type = TO_W_(%INFO_TYPE(info));
2058     switch [0 .. N_CLOSURE_TYPES] type {
2059     case THUNK_SELECTOR : {
2060         ptrs = 1;
2061         nptrs = 0;
2062         goto out;
2063     }
2064     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2065          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2066         ptrs = 0;
2067         nptrs = 0;
2068         goto out;
2069     }
2070     default: {
2071         ptrs  = TO_W_(%INFO_PTRS(info)); 
2072         nptrs = TO_W_(%INFO_NPTRS(info));
2073         goto out;
2074     }}
2075 out:
2076
2077     W_ ptrs_arr_sz, nptrs_arr_sz;
2078     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2079     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2080
2081     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2082
2083     W_ clos;
2084     clos = UNTAG(R1);
2085
2086     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2087     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2088
2089     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2090     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2091     p = 0;
2092 for:
2093     if(p < ptrs) {
2094          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2095          p = p + 1;
2096          goto for;
2097     }
2098     
2099     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2100     StgArrWords_words(nptrs_arr) = nptrs;
2101     p = 0;
2102 for2:
2103     if(p < nptrs) {
2104          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2105          p = p + 1;
2106          goto for2;
2107     }
2108     RET_NPP(info, ptrs_arr, nptrs_arr);
2109 }
2110
2111 /* -----------------------------------------------------------------------------
2112    Thread I/O blocking primitives
2113    -------------------------------------------------------------------------- */
2114
2115 /* Add a thread to the end of the blocked queue. (C-- version of the C
2116  * macro in Schedule.h).
2117  */
2118 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2119     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2120     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2121       W_[blocked_queue_hd] = tso;                       \
2122     } else {                                            \
2123       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2124     }                                                   \
2125     W_[blocked_queue_tl] = tso;
2126
2127 waitReadzh_fast
2128 {
2129     /* args: R1 */
2130 #ifdef THREADED_RTS
2131     foreign "C" barf("waitRead# on threaded RTS") never returns;
2132 #else
2133
2134     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2135     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2136     StgTSO_block_info(CurrentTSO) = R1;
2137     // No locking - we're not going to use this interface in the
2138     // threaded RTS anyway.
2139     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2140     jump stg_block_noregs;
2141 #endif
2142 }
2143
2144 waitWritezh_fast
2145 {
2146     /* args: R1 */
2147 #ifdef THREADED_RTS
2148     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2149 #else
2150
2151     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2152     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2153     StgTSO_block_info(CurrentTSO) = R1;
2154     // No locking - we're not going to use this interface in the
2155     // threaded RTS anyway.
2156     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2157     jump stg_block_noregs;
2158 #endif
2159 }
2160
2161
2162 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2163 delayzh_fast
2164 {
2165 #ifdef mingw32_HOST_OS
2166     W_ ares;
2167     CInt reqID;
2168 #else
2169     W_ t, prev, target;
2170 #endif
2171
2172 #ifdef THREADED_RTS
2173     foreign "C" barf("delay# on threaded RTS") never returns;
2174 #else
2175
2176     /* args: R1 (microsecond delay amount) */
2177     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2178     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2179
2180 #ifdef mingw32_HOST_OS
2181
2182     /* could probably allocate this on the heap instead */
2183     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2184                                             stg_delayzh_malloc_str);
2185     (reqID) = foreign "C" addDelayRequest(R1);
2186     StgAsyncIOResult_reqID(ares)   = reqID;
2187     StgAsyncIOResult_len(ares)     = 0;
2188     StgAsyncIOResult_errCode(ares) = 0;
2189     StgTSO_block_info(CurrentTSO)  = ares;
2190
2191     /* Having all async-blocked threads reside on the blocked_queue
2192      * simplifies matters, so change the status to OnDoProc put the
2193      * delayed thread on the blocked_queue.
2194      */
2195     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2196     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2197     jump stg_block_async_void;
2198
2199 #else
2200
2201     W_ time;
2202     W_ divisor;
2203     (time) = foreign "C" getourtimeofday() [R1];
2204     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2205     if (divisor == 0) {
2206         divisor = 50;
2207     }
2208     divisor = divisor * 1000;
2209     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2210            + time + 1; /* Add 1 as getourtimeofday rounds down */
2211     StgTSO_block_info(CurrentTSO) = target;
2212
2213     /* Insert the new thread in the sleeping queue. */
2214     prev = NULL;
2215     t = W_[sleeping_queue];
2216 while:
2217     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2218         prev = t;
2219         t = StgTSO__link(t);
2220         goto while;
2221     }
2222
2223     StgTSO__link(CurrentTSO) = t;
2224     if (prev == NULL) {
2225         W_[sleeping_queue] = CurrentTSO;
2226     } else {
2227         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2228     }
2229     jump stg_block_noregs;
2230 #endif
2231 #endif /* !THREADED_RTS */
2232 }
2233
2234
2235 #ifdef mingw32_HOST_OS
2236 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2237 asyncReadzh_fast
2238 {
2239     W_ ares;
2240     CInt reqID;
2241
2242 #ifdef THREADED_RTS
2243     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2244 #else
2245
2246     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2247     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2248     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2249
2250     /* could probably allocate this on the heap instead */
2251     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2252                                             stg_asyncReadzh_malloc_str)
2253                         [R1,R2,R3,R4];
2254     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2255     StgAsyncIOResult_reqID(ares)   = reqID;
2256     StgAsyncIOResult_len(ares)     = 0;
2257     StgAsyncIOResult_errCode(ares) = 0;
2258     StgTSO_block_info(CurrentTSO)  = ares;
2259     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2260     jump stg_block_async;
2261 #endif
2262 }
2263
2264 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2265 asyncWritezh_fast
2266 {
2267     W_ ares;
2268     CInt reqID;
2269
2270 #ifdef THREADED_RTS
2271     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2272 #else
2273
2274     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2275     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2276     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2277
2278     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2279                                             stg_asyncWritezh_malloc_str)
2280                         [R1,R2,R3,R4];
2281     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2282
2283     StgAsyncIOResult_reqID(ares)   = reqID;
2284     StgAsyncIOResult_len(ares)     = 0;
2285     StgAsyncIOResult_errCode(ares) = 0;
2286     StgTSO_block_info(CurrentTSO)  = ares;
2287     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2288     jump stg_block_async;
2289 #endif
2290 }
2291
2292 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2293 asyncDoProczh_fast
2294 {
2295     W_ ares;
2296     CInt reqID;
2297
2298 #ifdef THREADED_RTS
2299     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2300 #else
2301
2302     /* args: R1 = proc, R2 = param */
2303     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2304     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2305
2306     /* could probably allocate this on the heap instead */
2307     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2308                                             stg_asyncDoProczh_malloc_str) 
2309                                 [R1,R2];
2310     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2311     StgAsyncIOResult_reqID(ares)   = reqID;
2312     StgAsyncIOResult_len(ares)     = 0;
2313     StgAsyncIOResult_errCode(ares) = 0;
2314     StgTSO_block_info(CurrentTSO) = ares;
2315     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2316     jump stg_block_async;
2317 #endif
2318 }
2319 #endif
2320
2321 // noDuplicate# tries to ensure that none of the thunks under
2322 // evaluation by the current thread are also under evaluation by
2323 // another thread.  It relies on *both* threads doing noDuplicate#;
2324 // the second one will get blocked if they are duplicating some work.
2325 noDuplicatezh_fast
2326 {
2327     SAVE_THREAD_STATE();
2328     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2329     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2330     
2331     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2332         jump stg_threadFinished;
2333     } else {
2334         LOAD_THREAD_STATE();
2335         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2336         jump %ENTRY_CODE(Sp(0));
2337     }
2338 }
2339
2340 getApStackValzh_fast
2341 {
2342    W_ ap_stack, offset, val, ok;
2343
2344    /* args: R1 = AP_STACK, R2 = offset */
2345    ap_stack = R1;
2346    offset   = R2;
2347
2348    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2349         ok = 1;
2350         val = StgAP_STACK_payload(ap_stack,offset); 
2351    } else {
2352         ok = 0;
2353         val = R1;
2354    }
2355    RET_NP(ok,val);
2356 }
2357
2358 /* -----------------------------------------------------------------------------
2359    Misc. primitives
2360    -------------------------------------------------------------------------- */
2361
2362 // Write the cost center stack of the first argument on stderr; return
2363 // the second.  Possibly only makes sense for already evaluated
2364 // things?
2365 traceCcszh_fast
2366 {
2367     W_ ccs;
2368
2369 #ifdef PROFILING
2370     ccs = StgHeader_ccs(UNTAG(R1));
2371     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2372 #endif
2373
2374     R1 = R2;
2375     ENTER();
2376 }
2377
2378 getSparkzh_fast
2379 {
2380    W_ spark;
2381
2382 #ifndef THREADED_RTS
2383    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2384 #else
2385    (spark) = foreign "C" findSpark(MyCapability());
2386    if (spark != 0) {
2387       RET_NP(1,spark);
2388    } else {
2389       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2390    }
2391 #endif
2392 }