FIX #1364: added support for C finalizers that run as soon as the value is not longer...
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 newPinnedByteArrayzh_fast
87 {
88     W_ words, payload_words, n, p;
89
90     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
91     n = R1;
92     payload_words = ROUNDUP_BYTES_TO_WDS(n);
93
94     // We want an 8-byte aligned array.  allocatePinned() gives us
95     // 8-byte aligned memory by default, but we want to align the
96     // *goods* inside the ArrWords object, so we have to check the
97     // size of the ArrWords header and adjust our size accordingly.
98     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
99     if ((SIZEOF_StgArrWords & 7) != 0) {
100         words = words + 1;
101     }
102
103     ("ptr" p) = foreign "C" allocatePinned(words) [];
104     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
105
106     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
107     // have to push the object forward one word so that the goods
108     // fall on an 8-byte boundary.
109     if ((SIZEOF_StgArrWords & 7) != 0) {
110         p = p + WDS(1);
111     }
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newArrayzh_fast
119 {
120     W_ words, n, init, arr, p;
121     /* Args: R1 = words, R2 = initialisation value */
122
123     n = R1;
124     MAYBE_GC(R2_PTR,newArrayzh_fast);
125
126     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
127     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
128     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
129
130     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
131     StgMutArrPtrs_ptrs(arr) = n;
132
133     // Initialise all elements of the the array with the value in R2
134     init = R2;
135     p = arr + SIZEOF_StgMutArrPtrs;
136   for:
137     if (p < arr + WDS(words)) {
138         W_[p] = init;
139         p = p + WDS(1);
140         goto for;
141     }
142
143     RET_P(arr);
144 }
145
146 unsafeThawArrayzh_fast
147 {
148   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
149   //
150   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
151   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
152   // it on the mutable list for the GC to remove (removing something from
153   // the mutable list is not easy, because the mut_list is only singly-linked).
154   // 
155   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
156   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
157   // to indicate that it is still on the mutable list.
158   //
159   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
160   // either it is on a mut_list, or it isn't.  We adopt the convention that
161   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
162   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
163   // we put it on the mutable list more than once, but it would get scavenged
164   // multiple times during GC, which would be unnecessarily slow.
165   //
166   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
167         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
168         recordMutable(R1, R1);
169         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
170         RET_P(R1);
171   } else {
172         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
173         RET_P(R1);
174   }
175 }
176
177 /* -----------------------------------------------------------------------------
178    MutVar primitives
179    -------------------------------------------------------------------------- */
180
181 newMutVarzh_fast
182 {
183     W_ mv;
184     /* Args: R1 = initialisation value */
185
186     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
187
188     mv = Hp - SIZEOF_StgMutVar + WDS(1);
189     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
190     StgMutVar_var(mv) = R1;
191     
192     RET_P(mv);
193 }
194
195 atomicModifyMutVarzh_fast
196 {
197     W_ mv, f, z, x, y, r, h;
198     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
199
200     /* If x is the current contents of the MutVar#, then 
201        We want to make the new contents point to
202
203          (sel_0 (f x))
204  
205        and the return value is
206          
207          (sel_1 (f x))
208
209         obviously we can share (f x).
210
211          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
212          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
213          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
214     */
215
216 #if MIN_UPD_SIZE > 1
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
219 #else
220 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
221 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
222 #endif
223
224 #if MIN_UPD_SIZE > 2
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
227 #else
228 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
229 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
230 #endif
231
232 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
233
234    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
235
236    mv = R1;
237    f = R2;
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = f;
245
246    TICK_ALLOC_THUNK_1();
247    CCCS_ALLOC(THUNK_1_SIZE);
248    y = z - THUNK_1_SIZE;
249    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
250    LDV_RECORD_CREATE(y);
251    StgThunk_payload(y,0) = z;
252
253    TICK_ALLOC_THUNK_1();
254    CCCS_ALLOC(THUNK_1_SIZE);
255    r = y - THUNK_1_SIZE;
256    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
257    LDV_RECORD_CREATE(r);
258    StgThunk_payload(r,0) = z;
259
260  retry:
261    x = StgMutVar_var(mv);
262    StgThunk_payload(z,1) = x;
263 #ifdef THREADED_RTS
264    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
265    if (h != x) { goto retry; }
266 #else
267    StgMutVar_var(mv) = y;
268 #endif
269
270    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
271      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
272    }
273
274    RET_P(r);
275 }
276
277 /* -----------------------------------------------------------------------------
278    Weak Pointer Primitives
279    -------------------------------------------------------------------------- */
280
281 STRING(stg_weak_msg,"New weak pointer at %p\n")
282
283 mkWeakzh_fast
284 {
285   /* R1 = key
286      R2 = value
287      R3 = finalizer (or NULL)
288   */
289   W_ w;
290
291   if (R3 == NULL) {
292     R3 = stg_NO_FINALIZER_closure;
293   }
294
295   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
296
297   w = Hp - SIZEOF_StgWeak + WDS(1);
298   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
299
300   // We don't care about cfinalizer here.
301   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
302   // something else?
303
304   StgWeak_key(w)        = R1;
305   StgWeak_value(w)      = R2;
306   StgWeak_finalizer(w)  = R3;
307   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
308
309   StgWeak_link(w)       = W_[weak_ptr_list];
310   W_[weak_ptr_list]     = w;
311
312   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
313
314   RET_P(w);
315 }
316
317 mkWeakForeignEnvzh_fast
318 {
319   /* R1 = key
320      R2 = value
321      R3 = finalizer
322      R4 = pointer
323      R5 = has environment (0 or 1)
324      R6 = environment
325   */
326   W_ w, payload_words, words, p;
327
328   W_ key, val, fptr, ptr, flag, eptr;
329
330   key  = R1;
331   val  = R2;
332   fptr = R3;
333   ptr  = R4;
334   flag = R5;
335   eptr = R6;
336
337   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
338
339   w = Hp - SIZEOF_StgWeak + WDS(1);
340   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
341
342   payload_words = 4;
343   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
344   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
345
346   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
347   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
348
349   StgArrWords_words(p)     = payload_words;
350   StgArrWords_payload(p,0) = fptr;
351   StgArrWords_payload(p,1) = ptr;
352   StgArrWords_payload(p,2) = eptr;
353   StgArrWords_payload(p,3) = flag;
354
355   // We don't care about the value here.
356   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
357
358   StgWeak_key(w)        = key;
359   StgWeak_value(w)      = val;
360   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
361   StgWeak_cfinalizer(w) = p;
362
363   StgWeak_link(w)   = W_[weak_ptr_list];
364   W_[weak_ptr_list] = w;
365
366   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
367
368   RET_P(w);
369 }
370
371 finalizzeWeakzh_fast
372 {
373   /* R1 = weak ptr
374    */
375   W_ w, f, arr;
376
377   w = R1;
378
379   // already dead?
380   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
381       RET_NP(0,stg_NO_FINALIZER_closure);
382   }
383
384   // kill it
385 #ifdef PROFILING
386   // @LDV profiling
387   // A weak pointer is inherently used, so we do not need to call
388   // LDV_recordDead_FILL_SLOP_DYNAMIC():
389   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
390   // or, LDV_recordDead():
391   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
392   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
393   // large as weak pointers, so there is no need to fill the slop, either.
394   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
395 #endif
396
397   //
398   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
399   //
400   SET_INFO(w,stg_DEAD_WEAK_info);
401   LDV_RECORD_CREATE(w);
402
403   f   = StgWeak_finalizer(w);
404   arr = StgWeak_cfinalizer(w);
405
406   StgDeadWeak_link(w) = StgWeak_link(w);
407
408   if (arr != stg_NO_FINALIZER_closure) {
409     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
410                               StgArrWords_payload(arr,1),
411                               StgArrWords_payload(arr,2),
412                               StgArrWords_payload(arr,3)) [];
413   }
414
415   /* return the finalizer */
416   if (f == stg_NO_FINALIZER_closure) {
417       RET_NP(0,stg_NO_FINALIZER_closure);
418   } else {
419       RET_NP(1,f);
420   }
421 }
422
423 deRefWeakzh_fast
424 {
425   /* R1 = weak ptr */
426   W_ w, code, val;
427
428   w = R1;
429   if (GET_INFO(w) == stg_WEAK_info) {
430     code = 1;
431     val = StgWeak_value(w);
432   } else {
433     code = 0;
434     val = w;
435   }
436   RET_NP(code,val);
437 }
438
439 /* -----------------------------------------------------------------------------
440    Arbitrary-precision Integer operations.
441
442    There are some assumptions in this code that mp_limb_t == W_.  This is
443    the case for all the platforms that GHC supports, currently.
444    -------------------------------------------------------------------------- */
445
446 int2Integerzh_fast
447 {
448    /* arguments: R1 = Int# */
449
450    W_ val, s, p;        /* to avoid aliasing */
451
452    val = R1;
453    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
454
455    p = Hp - SIZEOF_StgArrWords;
456    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
457    StgArrWords_words(p) = 1;
458
459    /* mpz_set_si is inlined here, makes things simpler */
460    if (%lt(val,0)) { 
461         s  = -1;
462         Hp(0) = -val;
463    } else { 
464      if (%gt(val,0)) {
465         s = 1;
466         Hp(0) = val;
467      } else {
468         s = 0;
469      }
470   }
471
472    /* returns (# size  :: Int#, 
473                  data  :: ByteArray# 
474                #)
475    */
476    RET_NP(s,p);
477 }
478
479 word2Integerzh_fast
480 {
481    /* arguments: R1 = Word# */
482
483    W_ val, s, p;        /* to avoid aliasing */
484
485    val = R1;
486
487    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
488
489    p = Hp - SIZEOF_StgArrWords;
490    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
491    StgArrWords_words(p) = 1;
492
493    if (val != 0) {
494         s = 1;
495         W_[Hp] = val;
496    } else {
497         s = 0;
498    }
499
500    /* returns (# size  :: Int#, 
501                  data  :: ByteArray# #)
502    */
503    RET_NP(s,p);
504 }
505
506
507 /*
508  * 'long long' primops for converting to/from Integers.
509  */
510
511 #ifdef SUPPORT_LONG_LONGS
512
513 int64ToIntegerzh_fast
514 {
515    /* arguments: L1 = Int64# */
516
517    L_ val;
518    W_ hi, lo, s, neg, words_needed, p;
519
520    val = L1;
521    neg = 0;
522
523    hi = TO_W_(val >> 32);
524    lo = TO_W_(val);
525
526    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
527        // minimum is one word
528        words_needed = 1;
529    } else { 
530        words_needed = 2;
531    }
532
533    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
534                NO_PTRS, int64ToIntegerzh_fast );
535
536    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
537    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
538    StgArrWords_words(p) = words_needed;
539
540    if ( %lt(hi,0) ) {
541      neg = 1;
542      lo = -lo;
543      if(lo == 0) {
544        hi = -hi;
545      } else {
546        hi = -hi - 1;
547      }
548    }
549
550    if ( words_needed == 2 )  { 
551       s = 2;
552       Hp(-1) = lo;
553       Hp(0) = hi;
554    } else { 
555        if ( lo != 0 ) {
556            s = 1;
557            Hp(0) = lo;
558        } else /* val==0 */  {
559            s = 0;
560        }
561    }
562    if ( neg != 0 ) {
563         s = -s;
564    }
565
566    /* returns (# size  :: Int#, 
567                  data  :: ByteArray# #)
568    */
569    RET_NP(s,p);
570 }
571 word64ToIntegerzh_fast
572 {
573    /* arguments: L1 = Word64# */
574
575    L_ val;
576    W_ hi, lo, s, words_needed, p;
577
578    val = L1;
579    hi = TO_W_(val >> 32);
580    lo = TO_W_(val);
581
582    if ( hi != 0 ) {
583       words_needed = 2;
584    } else {
585       words_needed = 1;
586    }
587
588    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
589                NO_PTRS, word64ToIntegerzh_fast );
590
591    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
592    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
593    StgArrWords_words(p) = words_needed;
594
595    if ( hi != 0 ) { 
596      s = 2;
597      Hp(-1) = lo;
598      Hp(0)  = hi;
599    } else {
600       if ( lo != 0 ) {
601         s = 1;
602         Hp(0) = lo;
603      } else /* val==0 */  {
604       s = 0;
605      }
606   }
607
608    /* returns (# size  :: Int#, 
609                  data  :: ByteArray# #)
610    */
611    RET_NP(s,p);
612 }
613
614
615
616 #endif /* SUPPORT_LONG_LONGS */
617
618 /* ToDo: this is shockingly inefficient */
619
620 #ifndef THREADED_RTS
621 section "bss" {
622   mp_tmp1:
623     bits8 [SIZEOF_MP_INT];
624 }
625
626 section "bss" {
627   mp_tmp2:
628     bits8 [SIZEOF_MP_INT];
629 }
630
631 section "bss" {
632   mp_result1:
633     bits8 [SIZEOF_MP_INT];
634 }
635
636 section "bss" {
637   mp_result2:
638     bits8 [SIZEOF_MP_INT];
639 }
640 #endif
641
642 #ifdef THREADED_RTS
643 #define FETCH_MP_TEMP(X) \
644 W_ X; \
645 X = BaseReg + (OFFSET_StgRegTable_r ## X);
646 #else
647 #define FETCH_MP_TEMP(X) /* Nothing */
648 #endif
649
650 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
651 name                                                                    \
652 {                                                                       \
653   CInt s1, s2;                                                          \
654   W_ d1, d2;                                                            \
655   FETCH_MP_TEMP(mp_tmp1);                                               \
656   FETCH_MP_TEMP(mp_tmp2);                                               \
657   FETCH_MP_TEMP(mp_result1)                                             \
658   FETCH_MP_TEMP(mp_result2);                                            \
659                                                                         \
660   /* call doYouWantToGC() */                                            \
661   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
662                                                                         \
663   s1 = W_TO_INT(R1);                                                    \
664   d1 = R2;                                                              \
665   s2 = W_TO_INT(R3);                                                    \
666   d2 = R4;                                                              \
667                                                                         \
668   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
669   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
670   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
671   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
672   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
673   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
674                                                                         \
675   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
676                                                                         \
677   /* Perform the operation */                                           \
678   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
679                                                                         \
680   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
681          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
682 }
683
684 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
685 name                                                                    \
686 {                                                                       \
687   CInt s1;                                                              \
688   W_ d1;                                                                \
689   FETCH_MP_TEMP(mp_tmp1);                                               \
690   FETCH_MP_TEMP(mp_result1)                                             \
691                                                                         \
692   /* call doYouWantToGC() */                                            \
693   MAYBE_GC(R2_PTR, name);                                               \
694                                                                         \
695   d1 = R2;                                                              \
696   s1 = W_TO_INT(R1);                                                    \
697                                                                         \
698   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
699   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
700   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
701                                                                         \
702   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
703                                                                         \
704   /* Perform the operation */                                           \
705   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
706                                                                         \
707   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
708          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
709 }
710
711 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
712 name                                                                                    \
713 {                                                                                       \
714   CInt s1, s2;                                                                          \
715   W_ d1, d2;                                                                            \
716   FETCH_MP_TEMP(mp_tmp1);                                                               \
717   FETCH_MP_TEMP(mp_tmp2);                                                               \
718   FETCH_MP_TEMP(mp_result1)                                                             \
719   FETCH_MP_TEMP(mp_result2)                                                             \
720                                                                                         \
721   /* call doYouWantToGC() */                                                            \
722   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
723                                                                                         \
724   s1 = W_TO_INT(R1);                                                                    \
725   d1 = R2;                                                                              \
726   s2 = W_TO_INT(R3);                                                                    \
727   d2 = R4;                                                                              \
728                                                                                         \
729   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
730   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
731   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
732   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
733   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
734   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
735                                                                                         \
736   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
737   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
738                                                                                         \
739   /* Perform the operation */                                                           \
740   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
741                                                                                         \
742   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
743            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
744            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
745            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
746 }
747
748 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
749 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
750 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
751 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
752 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
753 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
754 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
755 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
756 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
757 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
758 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
759
760 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
761 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
762
763 #ifndef THREADED_RTS
764 section "bss" {
765   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
766 }
767 #endif
768
769 gcdIntzh_fast
770 {
771     /* R1 = the first Int#; R2 = the second Int# */
772     W_ r; 
773     FETCH_MP_TEMP(mp_tmp_w);
774
775     W_[mp_tmp_w] = R1;
776     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
777
778     R1 = r;
779     /* Result parked in R1, return via info-pointer at TOS */
780     jump %ENTRY_CODE(Sp(0));
781 }
782
783
784 gcdIntegerIntzh_fast
785 {
786     /* R1 = s1; R2 = d1; R3 = the int */
787     W_ s1;
788     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
789     R1 = s1;
790     
791     /* Result parked in R1, return via info-pointer at TOS */
792     jump %ENTRY_CODE(Sp(0));
793 }
794
795
796 cmpIntegerIntzh_fast
797 {
798     /* R1 = s1; R2 = d1; R3 = the int */
799     W_ usize, vsize, v_digit, u_digit;
800
801     usize = R1;
802     vsize = 0;
803     v_digit = R3;
804
805     // paraphrased from __gmpz_cmp_si() in the GMP sources
806     if (%gt(v_digit,0)) {
807         vsize = 1;
808     } else { 
809         if (%lt(v_digit,0)) {
810             vsize = -1;
811             v_digit = -v_digit;
812         }
813     }
814
815     if (usize != vsize) {
816         R1 = usize - vsize; 
817         jump %ENTRY_CODE(Sp(0));
818     }
819
820     if (usize == 0) {
821         R1 = 0; 
822         jump %ENTRY_CODE(Sp(0));
823     }
824
825     u_digit = W_[BYTE_ARR_CTS(R2)];
826
827     if (u_digit == v_digit) {
828         R1 = 0; 
829         jump %ENTRY_CODE(Sp(0));
830     }
831
832     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
833         R1 = usize; 
834     } else {
835         R1 = -usize; 
836     }
837
838     jump %ENTRY_CODE(Sp(0));
839 }
840
841 cmpIntegerzh_fast
842 {
843     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
844     W_ usize, vsize, size, up, vp;
845     CInt cmp;
846
847     // paraphrased from __gmpz_cmp() in the GMP sources
848     usize = R1;
849     vsize = R3;
850
851     if (usize != vsize) {
852         R1 = usize - vsize; 
853         jump %ENTRY_CODE(Sp(0));
854     }
855
856     if (usize == 0) {
857         R1 = 0; 
858         jump %ENTRY_CODE(Sp(0));
859     }
860
861     if (%lt(usize,0)) { // NB. not <, which is unsigned
862         size = -usize;
863     } else {
864         size = usize;
865     }
866
867     up = BYTE_ARR_CTS(R2);
868     vp = BYTE_ARR_CTS(R4);
869
870     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
871
872     if (cmp == 0 :: CInt) {
873         R1 = 0; 
874         jump %ENTRY_CODE(Sp(0));
875     }
876
877     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
878         R1 = 1;
879     } else {
880         R1 = (-1); 
881     }
882     /* Result parked in R1, return via info-pointer at TOS */
883     jump %ENTRY_CODE(Sp(0));
884 }
885
886 integer2Intzh_fast
887 {
888     /* R1 = s; R2 = d */
889     W_ r, s;
890
891     s = R1;
892     if (s == 0) {
893         r = 0;
894     } else {
895         r = W_[R2 + SIZEOF_StgArrWords];
896         if (%lt(s,0)) {
897             r = -r;
898         }
899     }
900     /* Result parked in R1, return via info-pointer at TOS */
901     R1 = r;
902     jump %ENTRY_CODE(Sp(0));
903 }
904
905 integer2Wordzh_fast
906 {
907   /* R1 = s; R2 = d */
908   W_ r, s;
909
910   s = R1;
911   if (s == 0) {
912     r = 0;
913   } else {
914     r = W_[R2 + SIZEOF_StgArrWords];
915     if (%lt(s,0)) {
916         r = -r;
917     }
918   }
919   /* Result parked in R1, return via info-pointer at TOS */
920   R1 = r;
921   jump %ENTRY_CODE(Sp(0));
922 }
923
924 decodeFloatzh_fast
925
926     W_ p;
927     F_ arg;
928     FETCH_MP_TEMP(mp_tmp1);
929     FETCH_MP_TEMP(mp_tmp_w);
930     
931     /* arguments: F1 = Float# */
932     arg = F1;
933     
934     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
935     
936     /* Be prepared to tell Lennart-coded __decodeFloat
937        where mantissa._mp_d can be put (it does not care about the rest) */
938     p = Hp - SIZEOF_StgArrWords;
939     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
940     StgArrWords_words(p) = 1;
941     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
942     
943     /* Perform the operation */
944     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
945     
946     /* returns: (Int# (expn), Int#, ByteArray#) */
947     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
948 }
949
950 decodeFloatzuIntzh_fast
951
952     W_ p;
953     F_ arg;
954     FETCH_MP_TEMP(mp_tmp1);
955     FETCH_MP_TEMP(mp_tmp_w);
956     
957     /* arguments: F1 = Float# */
958     arg = F1;
959     
960     /* Perform the operation */
961     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
962     
963     /* returns: (Int# (mantissa), Int# (exponent)) */
964     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
965 }
966
967 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
968 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
969
970 decodeDoublezh_fast
971
972     D_ arg;
973     W_ p;
974     FETCH_MP_TEMP(mp_tmp1);
975     FETCH_MP_TEMP(mp_tmp_w);
976
977     /* arguments: D1 = Double# */
978     arg = D1;
979
980     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
981     
982     /* Be prepared to tell Lennart-coded __decodeDouble
983        where mantissa.d can be put (it does not care about the rest) */
984     p = Hp - ARR_SIZE + WDS(1);
985     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
986     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
987     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
988
989     /* Perform the operation */
990     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
991     
992     /* returns: (Int# (expn), Int#, ByteArray#) */
993     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
994 }
995
996 decodeDoublezu2Intzh_fast
997
998     D_ arg;
999     W_ p;
1000     FETCH_MP_TEMP(mp_tmp1);
1001     FETCH_MP_TEMP(mp_tmp2);
1002     FETCH_MP_TEMP(mp_result1);
1003     FETCH_MP_TEMP(mp_result2);
1004
1005     /* arguments: D1 = Double# */
1006     arg = D1;
1007
1008     /* Perform the operation */
1009     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1010                                     mp_result1 "ptr", mp_result2 "ptr",
1011                                     arg) [];
1012
1013     /* returns:
1014        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1015     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1016 }
1017
1018 /* -----------------------------------------------------------------------------
1019  * Concurrency primitives
1020  * -------------------------------------------------------------------------- */
1021
1022 forkzh_fast
1023 {
1024   /* args: R1 = closure to spark */
1025
1026   MAYBE_GC(R1_PTR, forkzh_fast);
1027
1028   W_ closure;
1029   W_ threadid;
1030   closure = R1;
1031
1032   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1033                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1034                                 closure "ptr") [];
1035
1036   /* start blocked if the current thread is blocked */
1037   StgTSO_flags(threadid) = 
1038      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1039                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1040
1041   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1042
1043   // switch at the earliest opportunity
1044   Capability_context_switch(MyCapability()) = 1 :: CInt;
1045   
1046   RET_P(threadid);
1047 }
1048
1049 forkOnzh_fast
1050 {
1051   /* args: R1 = cpu, R2 = closure to spark */
1052
1053   MAYBE_GC(R2_PTR, forkOnzh_fast);
1054
1055   W_ cpu;
1056   W_ closure;
1057   W_ threadid;
1058   cpu = R1;
1059   closure = R2;
1060
1061   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1062                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1063                                 closure "ptr") [];
1064
1065   /* start blocked if the current thread is blocked */
1066   StgTSO_flags(threadid) = 
1067      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1068                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1069
1070   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1071
1072   // switch at the earliest opportunity
1073   Capability_context_switch(MyCapability()) = 1 :: CInt;
1074   
1075   RET_P(threadid);
1076 }
1077
1078 yieldzh_fast
1079 {
1080   jump stg_yield_noregs;
1081 }
1082
1083 myThreadIdzh_fast
1084 {
1085   /* no args. */
1086   RET_P(CurrentTSO);
1087 }
1088
1089 labelThreadzh_fast
1090 {
1091   /* args: 
1092         R1 = ThreadId#
1093         R2 = Addr# */
1094 #ifdef DEBUG
1095   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1096 #endif
1097   jump %ENTRY_CODE(Sp(0));
1098 }
1099
1100 isCurrentThreadBoundzh_fast
1101 {
1102   /* no args */
1103   W_ r;
1104   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1105   RET_N(r);
1106 }
1107
1108 threadStatuszh_fast
1109 {
1110     /* args: R1 :: ThreadId# */
1111     W_ tso;
1112     W_ why_blocked;
1113     W_ what_next;
1114     W_ ret;
1115
1116     tso = R1;
1117     loop:
1118       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1119           tso = StgTSO__link(tso);
1120           goto loop;
1121       }
1122
1123     what_next   = TO_W_(StgTSO_what_next(tso));
1124     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1125     // Note: these two reads are not atomic, so they might end up
1126     // being inconsistent.  It doesn't matter, since we
1127     // only return one or the other.  If we wanted to return the
1128     // contents of block_info too, then we'd have to do some synchronisation.
1129
1130     if (what_next == ThreadComplete) {
1131         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1132     } else {
1133         if (what_next == ThreadKilled) {
1134             ret = 17;
1135         } else {
1136             ret = why_blocked;
1137         }
1138     }
1139     RET_N(ret);
1140 }
1141
1142 /* -----------------------------------------------------------------------------
1143  * TVar primitives
1144  * -------------------------------------------------------------------------- */
1145
1146 #define SP_OFF 0
1147
1148 // Catch retry frame ------------------------------------------------------------
1149
1150 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1151 #if defined(PROFILING)
1152   W_ unused1, W_ unused2,
1153 #endif
1154   W_ unused3, P_ unused4, P_ unused5)
1155 {
1156    W_ r, frame, trec, outer;
1157
1158    frame = Sp;
1159    trec = StgTSO_trec(CurrentTSO);
1160    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1161    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1162    if (r != 0) {
1163      /* Succeeded (either first branch or second branch) */
1164      StgTSO_trec(CurrentTSO) = outer;
1165      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1166      jump %ENTRY_CODE(Sp(SP_OFF));
1167    } else {
1168      /* Did not commit: re-execute */
1169      W_ new_trec;
1170      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1171      StgTSO_trec(CurrentTSO) = new_trec;
1172      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1173        R1 = StgCatchRetryFrame_alt_code(frame);
1174      } else {
1175        R1 = StgCatchRetryFrame_first_code(frame);
1176      }
1177      jump stg_ap_v_fast;
1178    }
1179 }
1180
1181
1182 // Atomically frame ------------------------------------------------------------
1183
1184 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1185 #if defined(PROFILING)
1186   W_ unused1, W_ unused2,
1187 #endif
1188   P_ unused3, P_ unused4)
1189 {
1190   W_ frame, trec, valid, next_invariant, q, outer;
1191
1192   frame = Sp;
1193   trec = StgTSO_trec(CurrentTSO);
1194   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1195
1196   if (outer == NO_TREC) {
1197     /* First time back at the atomically frame -- pick up invariants */
1198     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1199     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1200
1201   } else {
1202     /* Second/subsequent time back at the atomically frame -- abort the
1203      * tx that's checking the invariant and move on to the next one */
1204     StgTSO_trec(CurrentTSO) = outer;
1205     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1206     StgInvariantCheckQueue_my_execution(q) = trec;
1207     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1208     /* Don't free trec -- it's linked from q and will be stashed in the
1209      * invariant if we eventually commit. */
1210     q = StgInvariantCheckQueue_next_queue_entry(q);
1211     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1212     trec = outer;
1213   }
1214
1215   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1216
1217   if (q != END_INVARIANT_CHECK_QUEUE) {
1218     /* We can't commit yet: another invariant to check */
1219     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1220     StgTSO_trec(CurrentTSO) = trec;
1221
1222     next_invariant = StgInvariantCheckQueue_invariant(q);
1223     R1 = StgAtomicInvariant_code(next_invariant);
1224     jump stg_ap_v_fast;
1225
1226   } else {
1227
1228     /* We've got no more invariants to check, try to commit */
1229     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1230     if (valid != 0) {
1231       /* Transaction was valid: commit succeeded */
1232       StgTSO_trec(CurrentTSO) = NO_TREC;
1233       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1234       jump %ENTRY_CODE(Sp(SP_OFF));
1235     } else {
1236       /* Transaction was not valid: try again */
1237       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1238       StgTSO_trec(CurrentTSO) = trec;
1239       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1240       R1 = StgAtomicallyFrame_code(frame);
1241       jump stg_ap_v_fast;
1242     }
1243   }
1244 }
1245
1246 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1247 #if defined(PROFILING)
1248   W_ unused1, W_ unused2,
1249 #endif
1250   P_ unused3, P_ unused4)
1251 {
1252   W_ frame, trec, valid;
1253
1254   frame = Sp;
1255
1256   /* The TSO is currently waiting: should we stop waiting? */
1257   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1258   if (valid != 0) {
1259     /* Previous attempt is still valid: no point trying again yet */
1260     jump stg_block_noregs;
1261   } else {
1262     /* Previous attempt is no longer valid: try again */
1263     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1264     StgTSO_trec(CurrentTSO) = trec;
1265     StgHeader_info(frame) = stg_atomically_frame_info;
1266     R1 = StgAtomicallyFrame_code(frame);
1267     jump stg_ap_v_fast;
1268   }
1269 }
1270
1271 // STM catch frame --------------------------------------------------------------
1272
1273 #define SP_OFF 0
1274
1275 /* Catch frames are very similar to update frames, but when entering
1276  * one we just pop the frame off the stack and perform the correct
1277  * kind of return to the activation record underneath us on the stack.
1278  */
1279
1280 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1281 #if defined(PROFILING)
1282   W_ unused1, W_ unused2,
1283 #endif
1284   P_ unused3, P_ unused4)
1285    {
1286       W_ r, frame, trec, outer;
1287       frame = Sp;
1288       trec = StgTSO_trec(CurrentTSO);
1289       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1290       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1291       if (r != 0) {
1292         /* Commit succeeded */
1293         StgTSO_trec(CurrentTSO) = outer;
1294         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1295         jump Sp(SP_OFF);
1296       } else {
1297         /* Commit failed */
1298         W_ new_trec;
1299         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1300         StgTSO_trec(CurrentTSO) = new_trec;
1301         R1 = StgCatchSTMFrame_code(frame);
1302         jump stg_ap_v_fast;
1303       }
1304    }
1305
1306
1307 // Primop definition ------------------------------------------------------------
1308
1309 atomicallyzh_fast
1310 {
1311   W_ frame;
1312   W_ old_trec;
1313   W_ new_trec;
1314   
1315   // stmStartTransaction may allocate
1316   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1317
1318   /* Args: R1 = m :: STM a */
1319   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1320
1321   old_trec = StgTSO_trec(CurrentTSO);
1322
1323   /* Nested transactions are not allowed; raise an exception */
1324   if (old_trec != NO_TREC) {
1325      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1326      jump raisezh_fast;
1327   }
1328
1329   /* Set up the atomically frame */
1330   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1331   frame = Sp;
1332
1333   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1334   StgAtomicallyFrame_code(frame) = R1;
1335   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1336
1337   /* Start the memory transcation */
1338   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1339   StgTSO_trec(CurrentTSO) = new_trec;
1340
1341   /* Apply R1 to the realworld token */
1342   jump stg_ap_v_fast;
1343 }
1344
1345
1346 catchSTMzh_fast
1347 {
1348   W_ frame;
1349   
1350   /* Args: R1 :: STM a */
1351   /* Args: R2 :: Exception -> STM a */
1352   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1353
1354   /* Set up the catch frame */
1355   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1356   frame = Sp;
1357
1358   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1359   StgCatchSTMFrame_handler(frame) = R2;
1360   StgCatchSTMFrame_code(frame) = R1;
1361
1362   /* Start a nested transaction to run the body of the try block in */
1363   W_ cur_trec;  
1364   W_ new_trec;
1365   cur_trec = StgTSO_trec(CurrentTSO);
1366   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1367   StgTSO_trec(CurrentTSO) = new_trec;
1368
1369   /* Apply R1 to the realworld token */
1370   jump stg_ap_v_fast;
1371 }
1372
1373
1374 catchRetryzh_fast
1375 {
1376   W_ frame;
1377   W_ new_trec;
1378   W_ trec;
1379
1380   // stmStartTransaction may allocate
1381   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1382
1383   /* Args: R1 :: STM a */
1384   /* Args: R2 :: STM a */
1385   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1386
1387   /* Start a nested transaction within which to run the first code */
1388   trec = StgTSO_trec(CurrentTSO);
1389   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1390   StgTSO_trec(CurrentTSO) = new_trec;
1391
1392   /* Set up the catch-retry frame */
1393   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1394   frame = Sp;
1395   
1396   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1397   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1398   StgCatchRetryFrame_first_code(frame) = R1;
1399   StgCatchRetryFrame_alt_code(frame) = R2;
1400
1401   /* Apply R1 to the realworld token */
1402   jump stg_ap_v_fast;
1403 }
1404
1405
1406 retryzh_fast
1407 {
1408   W_ frame_type;
1409   W_ frame;
1410   W_ trec;
1411   W_ outer;
1412   W_ r;
1413
1414   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1415
1416   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1417 retry_pop_stack:
1418   StgTSO_sp(CurrentTSO) = Sp;
1419   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1420   Sp = StgTSO_sp(CurrentTSO);
1421   frame = Sp;
1422   trec = StgTSO_trec(CurrentTSO);
1423   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1424
1425   if (frame_type == CATCH_RETRY_FRAME) {
1426     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1427     ASSERT(outer != NO_TREC);
1428     // Abort the transaction attempting the current branch
1429     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1430     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1431     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1432       // Retry in the first branch: try the alternative
1433       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1434       StgTSO_trec(CurrentTSO) = trec;
1435       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1436       R1 = StgCatchRetryFrame_alt_code(frame);
1437       jump stg_ap_v_fast;
1438     } else {
1439       // Retry in the alternative code: propagate the retry
1440       StgTSO_trec(CurrentTSO) = outer;
1441       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1442       goto retry_pop_stack;
1443     }
1444   }
1445
1446   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1447   ASSERT(frame_type == ATOMICALLY_FRAME);
1448   if (outer != NO_TREC) {
1449     // We called retry while checking invariants, so abort the current
1450     // invariant check (merging its TVar accesses into the parents read
1451     // set so we'll wait on them)
1452     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1453     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1454     trec = outer;
1455     StgTSO_trec(CurrentTSO) = trec;
1456     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1457   }
1458   ASSERT(outer == NO_TREC);
1459
1460   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1461   if (r != 0) {
1462     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1463     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1464     Sp = frame;
1465     // Fix up the stack in the unregisterised case: the return convention is different.
1466     R3 = trec; // passing to stmWaitUnblock()
1467     jump stg_block_stmwait;
1468   } else {
1469     // Transaction was not valid: retry immediately
1470     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1471     StgTSO_trec(CurrentTSO) = trec;
1472     R1 = StgAtomicallyFrame_code(frame);
1473     Sp = frame;
1474     jump stg_ap_v_fast;
1475   }
1476 }
1477
1478
1479 checkzh_fast
1480 {
1481   W_ trec, closure;
1482
1483   /* Args: R1 = invariant closure */
1484   MAYBE_GC (R1_PTR, checkzh_fast); 
1485
1486   trec = StgTSO_trec(CurrentTSO);
1487   closure = R1;
1488   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1489                                      trec "ptr",
1490                                      closure "ptr") [];
1491
1492   jump %ENTRY_CODE(Sp(0));
1493 }
1494
1495
1496 newTVarzh_fast
1497 {
1498   W_ tv;
1499   W_ new_value;
1500
1501   /* Args: R1 = initialisation value */
1502
1503   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1504   new_value = R1;
1505   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1506   RET_P(tv);
1507 }
1508
1509
1510 readTVarzh_fast
1511 {
1512   W_ trec;
1513   W_ tvar;
1514   W_ result;
1515
1516   /* Args: R1 = TVar closure */
1517
1518   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1519   trec = StgTSO_trec(CurrentTSO);
1520   tvar = R1;
1521   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1522
1523   RET_P(result);
1524 }
1525
1526 readTVarIOzh_fast
1527 {
1528     W_ result;
1529
1530 again:
1531     result = StgTVar_current_value(R1);
1532     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1533         goto again;
1534     }
1535     RET_P(result);
1536 }
1537
1538 writeTVarzh_fast
1539 {
1540   W_ trec;
1541   W_ tvar;
1542   W_ new_value;
1543   
1544   /* Args: R1 = TVar closure */
1545   /*       R2 = New value    */
1546
1547   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1548   trec = StgTSO_trec(CurrentTSO);
1549   tvar = R1;
1550   new_value = R2;
1551   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1552
1553   jump %ENTRY_CODE(Sp(0));
1554 }
1555
1556
1557 /* -----------------------------------------------------------------------------
1558  * MVar primitives
1559  *
1560  * take & putMVar work as follows.  Firstly, an important invariant:
1561  *
1562  *    If the MVar is full, then the blocking queue contains only
1563  *    threads blocked on putMVar, and if the MVar is empty then the
1564  *    blocking queue contains only threads blocked on takeMVar.
1565  *
1566  * takeMvar:
1567  *    MVar empty : then add ourselves to the blocking queue
1568  *    MVar full  : remove the value from the MVar, and
1569  *                 blocking queue empty     : return
1570  *                 blocking queue non-empty : perform the first blocked putMVar
1571  *                                            from the queue, and wake up the
1572  *                                            thread (MVar is now full again)
1573  *
1574  * putMVar is just the dual of the above algorithm.
1575  *
1576  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1577  * the stack of the thread waiting to do the putMVar.  See
1578  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1579  * the stack layout, and the PerformPut and PerformTake macros below.
1580  *
1581  * It is important that a blocked take or put is woken up with the
1582  * take/put already performed, because otherwise there would be a
1583  * small window of vulnerability where the thread could receive an
1584  * exception and never perform its take or put, and we'd end up with a
1585  * deadlock.
1586  *
1587  * -------------------------------------------------------------------------- */
1588
1589 isEmptyMVarzh_fast
1590 {
1591     /* args: R1 = MVar closure */
1592
1593     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1594         RET_N(1);
1595     } else {
1596         RET_N(0);
1597     }
1598 }
1599
1600 newMVarzh_fast
1601 {
1602     /* args: none */
1603     W_ mvar;
1604
1605     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1606   
1607     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1608     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1609         // MVARs start dirty: generation 0 has no mutable list
1610     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1611     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1612     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1613     RET_P(mvar);
1614 }
1615
1616
1617 #define PerformTake(tso, value)                         \
1618     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1619     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1620
1621 #define PerformPut(tso,lval)                    \
1622     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1623     lval = W_[StgTSO_sp(tso) - WDS(1)];
1624
1625 takeMVarzh_fast
1626 {
1627     W_ mvar, val, info, tso;
1628
1629     /* args: R1 = MVar closure */
1630     mvar = R1;
1631
1632 #if defined(THREADED_RTS)
1633     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1634 #else
1635     info = GET_INFO(mvar);
1636 #endif
1637         
1638     if (info == stg_MVAR_CLEAN_info) {
1639         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1640     }
1641
1642     /* If the MVar is empty, put ourselves on its blocking queue,
1643      * and wait until we're woken up.
1644      */
1645     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1646         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1647             StgMVar_head(mvar) = CurrentTSO;
1648         } else {
1649             foreign "C" setTSOLink(MyCapability() "ptr", 
1650                                    StgMVar_tail(mvar) "ptr",
1651                                    CurrentTSO) [];
1652         }
1653         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1654         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1655         StgTSO_block_info(CurrentTSO)  = mvar;
1656         StgMVar_tail(mvar) = CurrentTSO;
1657         
1658         R1 = mvar;
1659         jump stg_block_takemvar;
1660   }
1661
1662   /* we got the value... */
1663   val = StgMVar_value(mvar);
1664
1665   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1666   {
1667       /* There are putMVar(s) waiting... 
1668        * wake up the first thread on the queue
1669        */
1670       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1671
1672       /* actually perform the putMVar for the thread that we just woke up */
1673       tso = StgMVar_head(mvar);
1674       PerformPut(tso,StgMVar_value(mvar));
1675
1676       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1677           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1678       }
1679
1680       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1681                                             StgMVar_head(mvar) "ptr", 1) [];
1682       StgMVar_head(mvar) = tso;
1683
1684       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1685           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1686       }
1687
1688 #if defined(THREADED_RTS)
1689       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1690 #else
1691       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1692 #endif
1693       RET_P(val);
1694   } 
1695   else
1696   {
1697       /* No further putMVars, MVar is now empty */
1698       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1699  
1700 #if defined(THREADED_RTS)
1701       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1702 #else
1703       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1704 #endif
1705
1706       RET_P(val);
1707   }
1708 }
1709
1710
1711 tryTakeMVarzh_fast
1712 {
1713     W_ mvar, val, info, tso;
1714
1715     /* args: R1 = MVar closure */
1716
1717     mvar = R1;
1718
1719 #if defined(THREADED_RTS)
1720     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1721 #else
1722     info = GET_INFO(mvar);
1723 #endif
1724
1725     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1726 #if defined(THREADED_RTS)
1727         unlockClosure(mvar, info);
1728 #endif
1729         /* HACK: we need a pointer to pass back, 
1730          * so we abuse NO_FINALIZER_closure
1731          */
1732         RET_NP(0, stg_NO_FINALIZER_closure);
1733     }
1734
1735     if (info == stg_MVAR_CLEAN_info) {
1736         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1737     }
1738
1739     /* we got the value... */
1740     val = StgMVar_value(mvar);
1741
1742     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1743
1744         /* There are putMVar(s) waiting... 
1745          * wake up the first thread on the queue
1746          */
1747         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1748
1749         /* actually perform the putMVar for the thread that we just woke up */
1750         tso = StgMVar_head(mvar);
1751         PerformPut(tso,StgMVar_value(mvar));
1752         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1753             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1754         }
1755
1756         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1757                                               StgMVar_head(mvar) "ptr", 1) [];
1758         StgMVar_head(mvar) = tso;
1759
1760         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1761             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1762         }
1763 #if defined(THREADED_RTS)
1764         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1765 #else
1766         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1767 #endif
1768     }
1769     else 
1770     {
1771         /* No further putMVars, MVar is now empty */
1772         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1773 #if defined(THREADED_RTS)
1774         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1775 #else
1776         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1777 #endif
1778     }
1779     
1780     RET_NP(1, val);
1781 }
1782
1783
1784 putMVarzh_fast
1785 {
1786     W_ mvar, val, info, tso;
1787
1788     /* args: R1 = MVar, R2 = value */
1789     mvar = R1;
1790     val  = R2;
1791
1792 #if defined(THREADED_RTS)
1793     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1794 #else
1795     info = GET_INFO(mvar);
1796 #endif
1797
1798     if (info == stg_MVAR_CLEAN_info) {
1799         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1800     }
1801
1802     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1803         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1804             StgMVar_head(mvar) = CurrentTSO;
1805         } else {
1806             foreign "C" setTSOLink(MyCapability() "ptr", 
1807                                    StgMVar_tail(mvar) "ptr",
1808                                    CurrentTSO) [];
1809         }
1810         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1811         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1812         StgTSO_block_info(CurrentTSO)  = mvar;
1813         StgMVar_tail(mvar) = CurrentTSO;
1814         
1815         R1 = mvar;
1816         R2 = val;
1817         jump stg_block_putmvar;
1818     }
1819   
1820     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1821
1822         /* There are takeMVar(s) waiting: wake up the first one
1823          */
1824         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1825
1826         /* actually perform the takeMVar */
1827         tso = StgMVar_head(mvar);
1828         PerformTake(tso, val);
1829         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1830             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1831         }
1832       
1833         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1834                                               StgMVar_head(mvar) "ptr", 1) [];
1835         StgMVar_head(mvar) = tso;
1836
1837         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1838             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1839         }
1840
1841 #if defined(THREADED_RTS)
1842         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1843 #else
1844         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1845 #endif
1846         jump %ENTRY_CODE(Sp(0));
1847     }
1848     else
1849     {
1850         /* No further takes, the MVar is now full. */
1851         StgMVar_value(mvar) = val;
1852
1853 #if defined(THREADED_RTS)
1854         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1855 #else
1856         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1857 #endif
1858         jump %ENTRY_CODE(Sp(0));
1859     }
1860     
1861     /* ToDo: yield afterward for better communication performance? */
1862 }
1863
1864
1865 tryPutMVarzh_fast
1866 {
1867     W_ mvar, info, tso;
1868
1869     /* args: R1 = MVar, R2 = value */
1870     mvar = R1;
1871
1872 #if defined(THREADED_RTS)
1873     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1874 #else
1875     info = GET_INFO(mvar);
1876 #endif
1877
1878     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1879 #if defined(THREADED_RTS)
1880         unlockClosure(mvar, info);
1881 #endif
1882         RET_N(0);
1883     }
1884   
1885     if (info == stg_MVAR_CLEAN_info) {
1886         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1887     }
1888
1889     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1890
1891         /* There are takeMVar(s) waiting: wake up the first one
1892          */
1893         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1894         
1895         /* actually perform the takeMVar */
1896         tso = StgMVar_head(mvar);
1897         PerformTake(tso, R2);
1898         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1899             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1900         }
1901       
1902         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1903                                               StgMVar_head(mvar) "ptr", 1) [];
1904         StgMVar_head(mvar) = tso;
1905
1906         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1907             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1908         }
1909
1910 #if defined(THREADED_RTS)
1911         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1912 #else
1913         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1914 #endif
1915     }
1916     else
1917     {
1918         /* No further takes, the MVar is now full. */
1919         StgMVar_value(mvar) = R2;
1920
1921 #if defined(THREADED_RTS)
1922         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1923 #else
1924         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1925 #endif
1926     }
1927     
1928     RET_N(1);
1929     /* ToDo: yield afterward for better communication performance? */
1930 }
1931
1932
1933 /* -----------------------------------------------------------------------------
1934    Stable pointer primitives
1935    -------------------------------------------------------------------------  */
1936
1937 makeStableNamezh_fast
1938 {
1939     W_ index, sn_obj;
1940
1941     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1942   
1943     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1944
1945     /* Is there already a StableName for this heap object?
1946      *  stable_ptr_table is a pointer to an array of snEntry structs.
1947      */
1948     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1949         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1950         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1951         StgStableName_sn(sn_obj) = index;
1952         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1953     } else {
1954         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1955     }
1956     
1957     RET_P(sn_obj);
1958 }
1959
1960
1961 makeStablePtrzh_fast
1962 {
1963     /* Args: R1 = a */
1964     W_ sp;
1965     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1966     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1967     RET_N(sp);
1968 }
1969
1970 deRefStablePtrzh_fast
1971 {
1972     /* Args: R1 = the stable ptr */
1973     W_ r, sp;
1974     sp = R1;
1975     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1976     RET_P(r);
1977 }
1978
1979 /* -----------------------------------------------------------------------------
1980    Bytecode object primitives
1981    -------------------------------------------------------------------------  */
1982
1983 newBCOzh_fast
1984 {
1985     /* R1 = instrs
1986        R2 = literals
1987        R3 = ptrs
1988        R4 = arity
1989        R5 = bitmap array
1990     */
1991     W_ bco, bitmap_arr, bytes, words;
1992     
1993     bitmap_arr = R5;
1994
1995     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1996     bytes = WDS(words);
1997
1998     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1999
2000     bco = Hp - bytes + WDS(1);
2001     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2002     
2003     StgBCO_instrs(bco)     = R1;
2004     StgBCO_literals(bco)   = R2;
2005     StgBCO_ptrs(bco)       = R3;
2006     StgBCO_arity(bco)      = HALF_W_(R4);
2007     StgBCO_size(bco)       = HALF_W_(words);
2008     
2009     // Copy the arity/bitmap info into the BCO
2010     W_ i;
2011     i = 0;
2012 for:
2013     if (i < StgArrWords_words(bitmap_arr)) {
2014         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2015         i = i + 1;
2016         goto for;
2017     }
2018     
2019     RET_P(bco);
2020 }
2021
2022
2023 mkApUpd0zh_fast
2024 {
2025     // R1 = the BCO# for the AP
2026     //  
2027     W_ ap;
2028
2029     // This function is *only* used to wrap zero-arity BCOs in an
2030     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2031     // saturated and always points directly to a FUN or BCO.
2032     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2033            StgBCO_arity(R1) == HALF_W_(0));
2034
2035     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2036     TICK_ALLOC_UP_THK(0, 0);
2037     CCCS_ALLOC(SIZEOF_StgAP);
2038
2039     ap = Hp - SIZEOF_StgAP + WDS(1);
2040     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2041     
2042     StgAP_n_args(ap) = HALF_W_(0);
2043     StgAP_fun(ap) = R1;
2044     
2045     RET_P(ap);
2046 }
2047
2048 unpackClosurezh_fast
2049 {
2050 /* args: R1 = closure to analyze */
2051 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2052
2053     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2054     info  = %GET_STD_INFO(UNTAG(R1));
2055
2056     // Some closures have non-standard layout, so we omit those here.
2057     W_ type;
2058     type = TO_W_(%INFO_TYPE(info));
2059     switch [0 .. N_CLOSURE_TYPES] type {
2060     case THUNK_SELECTOR : {
2061         ptrs = 1;
2062         nptrs = 0;
2063         goto out;
2064     }
2065     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2066          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2067         ptrs = 0;
2068         nptrs = 0;
2069         goto out;
2070     }
2071     default: {
2072         ptrs  = TO_W_(%INFO_PTRS(info)); 
2073         nptrs = TO_W_(%INFO_NPTRS(info));
2074         goto out;
2075     }}
2076 out:
2077
2078     W_ ptrs_arr_sz, nptrs_arr_sz;
2079     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2080     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2081
2082     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2083
2084     W_ clos;
2085     clos = UNTAG(R1);
2086
2087     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2088     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2089
2090     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2091     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2092     p = 0;
2093 for:
2094     if(p < ptrs) {
2095          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2096          p = p + 1;
2097          goto for;
2098     }
2099     
2100     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2101     StgArrWords_words(nptrs_arr) = nptrs;
2102     p = 0;
2103 for2:
2104     if(p < nptrs) {
2105          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2106          p = p + 1;
2107          goto for2;
2108     }
2109     RET_NPP(info, ptrs_arr, nptrs_arr);
2110 }
2111
2112 /* -----------------------------------------------------------------------------
2113    Thread I/O blocking primitives
2114    -------------------------------------------------------------------------- */
2115
2116 /* Add a thread to the end of the blocked queue. (C-- version of the C
2117  * macro in Schedule.h).
2118  */
2119 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2120     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2121     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2122       W_[blocked_queue_hd] = tso;                       \
2123     } else {                                            \
2124       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2125     }                                                   \
2126     W_[blocked_queue_tl] = tso;
2127
2128 waitReadzh_fast
2129 {
2130     /* args: R1 */
2131 #ifdef THREADED_RTS
2132     foreign "C" barf("waitRead# on threaded RTS") never returns;
2133 #else
2134
2135     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2136     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2137     StgTSO_block_info(CurrentTSO) = R1;
2138     // No locking - we're not going to use this interface in the
2139     // threaded RTS anyway.
2140     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2141     jump stg_block_noregs;
2142 #endif
2143 }
2144
2145 waitWritezh_fast
2146 {
2147     /* args: R1 */
2148 #ifdef THREADED_RTS
2149     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2150 #else
2151
2152     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2153     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2154     StgTSO_block_info(CurrentTSO) = R1;
2155     // No locking - we're not going to use this interface in the
2156     // threaded RTS anyway.
2157     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2158     jump stg_block_noregs;
2159 #endif
2160 }
2161
2162
2163 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2164 delayzh_fast
2165 {
2166 #ifdef mingw32_HOST_OS
2167     W_ ares;
2168     CInt reqID;
2169 #else
2170     W_ t, prev, target;
2171 #endif
2172
2173 #ifdef THREADED_RTS
2174     foreign "C" barf("delay# on threaded RTS") never returns;
2175 #else
2176
2177     /* args: R1 (microsecond delay amount) */
2178     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2179     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2180
2181 #ifdef mingw32_HOST_OS
2182
2183     /* could probably allocate this on the heap instead */
2184     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2185                                             stg_delayzh_malloc_str);
2186     (reqID) = foreign "C" addDelayRequest(R1);
2187     StgAsyncIOResult_reqID(ares)   = reqID;
2188     StgAsyncIOResult_len(ares)     = 0;
2189     StgAsyncIOResult_errCode(ares) = 0;
2190     StgTSO_block_info(CurrentTSO)  = ares;
2191
2192     /* Having all async-blocked threads reside on the blocked_queue
2193      * simplifies matters, so change the status to OnDoProc put the
2194      * delayed thread on the blocked_queue.
2195      */
2196     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2197     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2198     jump stg_block_async_void;
2199
2200 #else
2201
2202     W_ time;
2203     W_ divisor;
2204     (time) = foreign "C" getourtimeofday() [R1];
2205     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2206     if (divisor == 0) {
2207         divisor = 50;
2208     }
2209     divisor = divisor * 1000;
2210     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2211            + time + 1; /* Add 1 as getourtimeofday rounds down */
2212     StgTSO_block_info(CurrentTSO) = target;
2213
2214     /* Insert the new thread in the sleeping queue. */
2215     prev = NULL;
2216     t = W_[sleeping_queue];
2217 while:
2218     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2219         prev = t;
2220         t = StgTSO__link(t);
2221         goto while;
2222     }
2223
2224     StgTSO__link(CurrentTSO) = t;
2225     if (prev == NULL) {
2226         W_[sleeping_queue] = CurrentTSO;
2227     } else {
2228         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2229     }
2230     jump stg_block_noregs;
2231 #endif
2232 #endif /* !THREADED_RTS */
2233 }
2234
2235
2236 #ifdef mingw32_HOST_OS
2237 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2238 asyncReadzh_fast
2239 {
2240     W_ ares;
2241     CInt reqID;
2242
2243 #ifdef THREADED_RTS
2244     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2245 #else
2246
2247     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2248     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2249     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2250
2251     /* could probably allocate this on the heap instead */
2252     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2253                                             stg_asyncReadzh_malloc_str)
2254                         [R1,R2,R3,R4];
2255     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2256     StgAsyncIOResult_reqID(ares)   = reqID;
2257     StgAsyncIOResult_len(ares)     = 0;
2258     StgAsyncIOResult_errCode(ares) = 0;
2259     StgTSO_block_info(CurrentTSO)  = ares;
2260     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2261     jump stg_block_async;
2262 #endif
2263 }
2264
2265 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2266 asyncWritezh_fast
2267 {
2268     W_ ares;
2269     CInt reqID;
2270
2271 #ifdef THREADED_RTS
2272     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2273 #else
2274
2275     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2276     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2277     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2278
2279     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2280                                             stg_asyncWritezh_malloc_str)
2281                         [R1,R2,R3,R4];
2282     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2283
2284     StgAsyncIOResult_reqID(ares)   = reqID;
2285     StgAsyncIOResult_len(ares)     = 0;
2286     StgAsyncIOResult_errCode(ares) = 0;
2287     StgTSO_block_info(CurrentTSO)  = ares;
2288     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2289     jump stg_block_async;
2290 #endif
2291 }
2292
2293 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2294 asyncDoProczh_fast
2295 {
2296     W_ ares;
2297     CInt reqID;
2298
2299 #ifdef THREADED_RTS
2300     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2301 #else
2302
2303     /* args: R1 = proc, R2 = param */
2304     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2305     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2306
2307     /* could probably allocate this on the heap instead */
2308     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2309                                             stg_asyncDoProczh_malloc_str) 
2310                                 [R1,R2];
2311     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2312     StgAsyncIOResult_reqID(ares)   = reqID;
2313     StgAsyncIOResult_len(ares)     = 0;
2314     StgAsyncIOResult_errCode(ares) = 0;
2315     StgTSO_block_info(CurrentTSO) = ares;
2316     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2317     jump stg_block_async;
2318 #endif
2319 }
2320 #endif
2321
2322 // noDuplicate# tries to ensure that none of the thunks under
2323 // evaluation by the current thread are also under evaluation by
2324 // another thread.  It relies on *both* threads doing noDuplicate#;
2325 // the second one will get blocked if they are duplicating some work.
2326 noDuplicatezh_fast
2327 {
2328     SAVE_THREAD_STATE();
2329     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2330     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2331     
2332     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2333         jump stg_threadFinished;
2334     } else {
2335         LOAD_THREAD_STATE();
2336         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2337         jump %ENTRY_CODE(Sp(0));
2338     }
2339 }
2340
2341 getApStackValzh_fast
2342 {
2343    W_ ap_stack, offset, val, ok;
2344
2345    /* args: R1 = AP_STACK, R2 = offset */
2346    ap_stack = R1;
2347    offset   = R2;
2348
2349    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2350         ok = 1;
2351         val = StgAP_STACK_payload(ap_stack,offset); 
2352    } else {
2353         ok = 0;
2354         val = R1;
2355    }
2356    RET_NP(ok,val);
2357 }
2358
2359 getSparkzh_fast
2360 {
2361    W_ spark;
2362
2363 #ifndef THREADED_RTS
2364    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2365 #else
2366    (spark) = foreign "C" findSpark(MyCapability());
2367    if (spark != 0) {
2368       RET_NP(1,spark);
2369    } else {
2370       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2371    }
2372 #endif
2373 }