STM invariants
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
427        words_needed = 2;
428    } else { 
429        // minimum is one word
430        words_needed = 1;
431    }
432
433    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
434                NO_PTRS, int64ToIntegerzh_fast );
435
436    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
437    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
438    StgArrWords_words(p) = words_needed;
439
440    if ( %lt(val,0::L_) ) {
441      neg = 1;
442      val = -val;
443    }
444
445    hi = TO_W_(val >> 32);
446
447    if ( words_needed == 2 )  { 
448       s = 2;
449       Hp(-1) = TO_W_(val);
450       Hp(0) = hi;
451    } else { 
452        if ( val != 0::L_ ) {
453            s = 1;
454            Hp(0) = TO_W_(val);
455        } else /* val==0 */  {
456            s = 0;
457        }
458    }
459    if ( neg != 0 ) {
460         s = -s;
461    }
462
463    /* returns (# size  :: Int#, 
464                  data  :: ByteArray# #)
465    */
466    RET_NP(s,p);
467 }
468
469 word64ToIntegerzh_fast
470 {
471    /* arguments: L1 = Word64# */
472
473    L_ val;
474    W_ hi, s, words_needed, p;
475
476    val = L1;
477    if ( val >= 0x100000000::L_ ) {
478       words_needed = 2;
479    } else {
480       words_needed = 1;
481    }
482
483    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
484                NO_PTRS, word64ToIntegerzh_fast );
485
486    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
487    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
488    StgArrWords_words(p) = words_needed;
489
490    hi = TO_W_(val >> 32);
491    if ( val >= 0x100000000::L_ ) { 
492      s = 2;
493      Hp(-1) = TO_W_(val);
494      Hp(0)  = hi;
495    } else {
496       if ( val != 0::L_ ) {
497         s = 1;
498         Hp(0) = TO_W_(val);
499      } else /* val==0 */  {
500       s = 0;
501      }
502   }
503
504    /* returns (# size  :: Int#, 
505                  data  :: ByteArray# #)
506    */
507    RET_NP(s,p);
508 }
509
510
511 #endif /* SUPPORT_LONG_LONGS */
512
513 /* ToDo: this is shockingly inefficient */
514
515 #ifndef THREADED_RTS
516 section "bss" {
517   mp_tmp1:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_tmp2:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result1:
528     bits8 [SIZEOF_MP_INT];
529 }
530
531 section "bss" {
532   mp_result2:
533     bits8 [SIZEOF_MP_INT];
534 }
535 #endif
536
537 #ifdef THREADED_RTS
538 #define FETCH_MP_TEMP(X) \
539 W_ X; \
540 X = BaseReg + (OFFSET_StgRegTable_r ## X);
541 #else
542 #define FETCH_MP_TEMP(X) /* Nothing */
543 #endif
544
545 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
546 name                                                                    \
547 {                                                                       \
548   CInt s1, s2;                                                          \
549   W_ d1, d2;                                                            \
550   FETCH_MP_TEMP(mp_tmp1);                                               \
551   FETCH_MP_TEMP(mp_tmp2);                                               \
552   FETCH_MP_TEMP(mp_result1)                                             \
553   FETCH_MP_TEMP(mp_result2);                                            \
554                                                                         \
555   /* call doYouWantToGC() */                                            \
556   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
557                                                                         \
558   s1 = W_TO_INT(R1);                                                    \
559   d1 = R2;                                                              \
560   s2 = W_TO_INT(R3);                                                    \
561   d2 = R4;                                                              \
562                                                                         \
563   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
564   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
565   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
566   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
567   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
568   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
569                                                                         \
570   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
571                                                                         \
572   /* Perform the operation */                                           \
573   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
574                                                                         \
575   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
576          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
577 }
578
579 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1;                                                              \
583   W_ d1;                                                                \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_result1)                                             \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR, name);                                               \
589                                                                         \
590   d1 = R2;                                                              \
591   s1 = W_TO_INT(R1);                                                    \
592                                                                         \
593   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
594   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
595   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
596                                                                         \
597   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
598                                                                         \
599   /* Perform the operation */                                           \
600   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
601                                                                         \
602   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
603          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
604 }
605
606 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
607 name                                                                                    \
608 {                                                                                       \
609   CInt s1, s2;                                                                          \
610   W_ d1, d2;                                                                            \
611   FETCH_MP_TEMP(mp_tmp1);                                                               \
612   FETCH_MP_TEMP(mp_tmp2);                                                               \
613   FETCH_MP_TEMP(mp_result1)                                                             \
614   FETCH_MP_TEMP(mp_result2)                                                             \
615                                                                                         \
616   /* call doYouWantToGC() */                                                            \
617   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
618                                                                                         \
619   s1 = W_TO_INT(R1);                                                                    \
620   d1 = R2;                                                                              \
621   s2 = W_TO_INT(R3);                                                                    \
622   d2 = R4;                                                                              \
623                                                                                         \
624   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
625   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
626   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
627   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
628   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
629   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
630                                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
632   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
633                                                                                         \
634   /* Perform the operation */                                                           \
635   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
636                                                                                         \
637   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
638            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
639            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
640            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
641 }
642
643 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
644 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
645 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
646 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
647 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
648 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
649 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
650 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
651 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
652 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
653 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
654
655 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
656 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
657
658 #ifndef THREADED_RTS
659 section "bss" {
660   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
661 }
662 #endif
663
664 gcdIntzh_fast
665 {
666     /* R1 = the first Int#; R2 = the second Int# */
667     W_ r; 
668     FETCH_MP_TEMP(mp_tmp_w);
669
670     W_[mp_tmp_w] = R1;
671     r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
672
673     R1 = r;
674     /* Result parked in R1, return via info-pointer at TOS */
675     jump %ENTRY_CODE(Sp(0));
676 }
677
678
679 gcdIntegerIntzh_fast
680 {
681     /* R1 = s1; R2 = d1; R3 = the int */
682     R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
683     
684     /* Result parked in R1, return via info-pointer at TOS */
685     jump %ENTRY_CODE(Sp(0));
686 }
687
688
689 cmpIntegerIntzh_fast
690 {
691     /* R1 = s1; R2 = d1; R3 = the int */
692     W_ usize, vsize, v_digit, u_digit;
693
694     usize = R1;
695     vsize = 0;
696     v_digit = R3;
697
698     // paraphrased from __gmpz_cmp_si() in the GMP sources
699     if (%gt(v_digit,0)) {
700         vsize = 1;
701     } else { 
702         if (%lt(v_digit,0)) {
703             vsize = -1;
704             v_digit = -v_digit;
705         }
706     }
707
708     if (usize != vsize) {
709         R1 = usize - vsize; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     if (usize == 0) {
714         R1 = 0; 
715         jump %ENTRY_CODE(Sp(0));
716     }
717
718     u_digit = W_[BYTE_ARR_CTS(R2)];
719
720     if (u_digit == v_digit) {
721         R1 = 0; 
722         jump %ENTRY_CODE(Sp(0));
723     }
724
725     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
726         R1 = usize; 
727     } else {
728         R1 = -usize; 
729     }
730
731     jump %ENTRY_CODE(Sp(0));
732 }
733
734 cmpIntegerzh_fast
735 {
736     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
737     W_ usize, vsize, size, up, vp;
738     CInt cmp;
739
740     // paraphrased from __gmpz_cmp() in the GMP sources
741     usize = R1;
742     vsize = R3;
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     if (%lt(usize,0)) { // NB. not <, which is unsigned
755         size = -usize;
756     } else {
757         size = usize;
758     }
759
760     up = BYTE_ARR_CTS(R2);
761     vp = BYTE_ARR_CTS(R4);
762
763     cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
764
765     if (cmp == 0 :: CInt) {
766         R1 = 0; 
767         jump %ENTRY_CODE(Sp(0));
768     }
769
770     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
771         R1 = 1;
772     } else {
773         R1 = (-1); 
774     }
775     /* Result parked in R1, return via info-pointer at TOS */
776     jump %ENTRY_CODE(Sp(0));
777 }
778
779 integer2Intzh_fast
780 {
781     /* R1 = s; R2 = d */
782     W_ r, s;
783
784     s = R1;
785     if (s == 0) {
786         r = 0;
787     } else {
788         r = W_[R2 + SIZEOF_StgArrWords];
789         if (%lt(s,0)) {
790             r = -r;
791         }
792     }
793     /* Result parked in R1, return via info-pointer at TOS */
794     R1 = r;
795     jump %ENTRY_CODE(Sp(0));
796 }
797
798 integer2Wordzh_fast
799 {
800   /* R1 = s; R2 = d */
801   W_ r, s;
802
803   s = R1;
804   if (s == 0) {
805     r = 0;
806   } else {
807     r = W_[R2 + SIZEOF_StgArrWords];
808     if (%lt(s,0)) {
809         r = -r;
810     }
811   }
812   /* Result parked in R1, return via info-pointer at TOS */
813   R1 = r;
814   jump %ENTRY_CODE(Sp(0));
815 }
816
817 decodeFloatzh_fast
818
819     W_ p;
820     F_ arg;
821     FETCH_MP_TEMP(mp_tmp1);
822     FETCH_MP_TEMP(mp_tmp_w);
823     
824     /* arguments: F1 = Float# */
825     arg = F1;
826     
827     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
828     
829     /* Be prepared to tell Lennart-coded __decodeFloat
830        where mantissa._mp_d can be put (it does not care about the rest) */
831     p = Hp - SIZEOF_StgArrWords;
832     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
833     StgArrWords_words(p) = 1;
834     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
835     
836     /* Perform the operation */
837     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
838     
839     /* returns: (Int# (expn), Int#, ByteArray#) */
840     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
841 }
842
843 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
844 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
845
846 decodeDoublezh_fast
847
848     D_ arg;
849     W_ p;
850     FETCH_MP_TEMP(mp_tmp1);
851     FETCH_MP_TEMP(mp_tmp_w);
852
853     /* arguments: D1 = Double# */
854     arg = D1;
855
856     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
857     
858     /* Be prepared to tell Lennart-coded __decodeDouble
859        where mantissa.d can be put (it does not care about the rest) */
860     p = Hp - ARR_SIZE + WDS(1);
861     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
862     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
863     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
864
865     /* Perform the operation */
866     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
867     
868     /* returns: (Int# (expn), Int#, ByteArray#) */
869     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
870 }
871
872 /* -----------------------------------------------------------------------------
873  * Concurrency primitives
874  * -------------------------------------------------------------------------- */
875
876 forkzh_fast
877 {
878   /* args: R1 = closure to spark */
879
880   MAYBE_GC(R1_PTR, forkzh_fast);
881
882   W_ closure;
883   W_ threadid;
884   closure = R1;
885
886   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
887                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
888                                 closure "ptr") [];
889   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
890
891   // switch at the earliest opportunity
892   CInt[context_switch] = 1 :: CInt;
893   
894   RET_P(threadid);
895 }
896
897 forkOnzh_fast
898 {
899   /* args: R1 = cpu, R2 = closure to spark */
900
901   MAYBE_GC(R2_PTR, forkOnzh_fast);
902
903   W_ cpu;
904   W_ closure;
905   W_ threadid;
906   cpu = R1;
907   closure = R2;
908
909   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
910                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
911                                 closure "ptr") [];
912   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
913
914   // switch at the earliest opportunity
915   CInt[context_switch] = 1 :: CInt;
916   
917   RET_P(threadid);
918 }
919
920 yieldzh_fast
921 {
922   jump stg_yield_noregs;
923 }
924
925 myThreadIdzh_fast
926 {
927   /* no args. */
928   RET_P(CurrentTSO);
929 }
930
931 labelThreadzh_fast
932 {
933   /* args: 
934         R1 = ThreadId#
935         R2 = Addr# */
936 #ifdef DEBUG
937   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
938 #endif
939   jump %ENTRY_CODE(Sp(0));
940 }
941
942 isCurrentThreadBoundzh_fast
943 {
944   /* no args */
945   W_ r;
946   r = foreign "C" isThreadBound(CurrentTSO) [];
947   RET_N(r);
948 }
949
950
951 /* -----------------------------------------------------------------------------
952  * TVar primitives
953  * -------------------------------------------------------------------------- */
954
955 #ifdef REG_R1
956 #define SP_OFF 0
957 #define IF_NOT_REG_R1(x) 
958 #else
959 #define SP_OFF 1
960 #define IF_NOT_REG_R1(x) x
961 #endif
962
963 // Catch retry frame ------------------------------------------------------------
964
965 #define CATCH_RETRY_FRAME_ERROR(label) \
966   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
967
968 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
969 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
970 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
971 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
972 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
973 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
974 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
975 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
976
977 #if MAX_VECTORED_RTN > 8
978 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
979 #endif
980
981 #if defined(PROFILING)
982 #define CATCH_RETRY_FRAME_BITMAP 7
983 #define CATCH_RETRY_FRAME_WORDS  5
984 #else
985 #define CATCH_RETRY_FRAME_BITMAP 1
986 #define CATCH_RETRY_FRAME_WORDS  3
987 #endif
988
989 INFO_TABLE_RET(stg_catch_retry_frame,
990                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
991                CATCH_RETRY_FRAME,
992                stg_catch_retry_frame_0_ret,
993                stg_catch_retry_frame_1_ret,
994                stg_catch_retry_frame_2_ret,
995                stg_catch_retry_frame_3_ret,
996                stg_catch_retry_frame_4_ret,
997                stg_catch_retry_frame_5_ret,
998                stg_catch_retry_frame_6_ret,
999                stg_catch_retry_frame_7_ret)
1000 {
1001    W_ r, frame, trec, outer;
1002    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1003
1004    frame = Sp;
1005    trec = StgTSO_trec(CurrentTSO);
1006    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1007    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1008    if (r != 0) {
1009      /* Succeeded (either first branch or second branch) */
1010      StgTSO_trec(CurrentTSO) = outer;
1011      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1012      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1013      jump %ENTRY_CODE(Sp(SP_OFF));
1014    } else {
1015      /* Did not commit: re-execute */
1016      W_ new_trec;
1017      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1018      StgTSO_trec(CurrentTSO) = new_trec;
1019      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1020        R1 = StgCatchRetryFrame_alt_code(frame);
1021      } else {
1022        R1 = StgCatchRetryFrame_first_code(frame);
1023      }
1024      jump stg_ap_v_fast;
1025    }
1026 }
1027
1028
1029 // Atomically frame -------------------------------------------------------------
1030
1031
1032 #define ATOMICALLY_FRAME_ERROR(label) \
1033   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1034
1035 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1036 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1037 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1038 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1039 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1040 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1041 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1042 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1043
1044 #if MAX_VECTORED_RTN > 8
1045 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1046 #endif
1047
1048 #if defined(PROFILING)
1049 #define ATOMICALLY_FRAME_BITMAP 3
1050 #define ATOMICALLY_FRAME_WORDS  4
1051 #else
1052 #define ATOMICALLY_FRAME_BITMAP 0
1053 #define ATOMICALLY_FRAME_WORDS  2
1054 #endif
1055
1056
1057 INFO_TABLE_RET(stg_atomically_frame,
1058                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1059                ATOMICALLY_FRAME,
1060                stg_atomically_frame_0_ret,
1061                stg_atomically_frame_1_ret,
1062                stg_atomically_frame_2_ret,
1063                stg_atomically_frame_3_ret,
1064                stg_atomically_frame_4_ret,
1065                stg_atomically_frame_5_ret,
1066                stg_atomically_frame_6_ret,
1067                stg_atomically_frame_7_ret)
1068 {
1069   W_ frame, trec, valid, next_invariant, q, outer;
1070   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1071
1072   frame = Sp;
1073   trec = StgTSO_trec(CurrentTSO);
1074   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1075
1076   if (outer == NO_TREC) {
1077     /* First time back at the atomically frame -- pick up invariants */
1078     "ptr" q = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1079     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1080
1081   } else {
1082     /* Second/subsequent time back at the atomically frame -- abort the
1083      * tx that's checking the invariant and move on to the next one */
1084     StgTSO_trec(CurrentTSO) = outer;
1085     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1086     StgInvariantCheckQueue_my_execution(q) = trec;
1087     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1088     /* Don't free trec -- it's linked from q and will be stashed in the
1089      * invariant if we eventually commit. */
1090     q = StgInvariantCheckQueue_next_queue_entry(q);
1091     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1092     trec = outer;
1093   }
1094
1095   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1096
1097   if (q != END_INVARIANT_CHECK_QUEUE) {
1098     /* We can't commit yet: another invariant to check */
1099     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1100     StgTSO_trec(CurrentTSO) = trec;
1101
1102     next_invariant = StgInvariantCheckQueue_invariant(q);
1103     R1 = StgAtomicInvariant_code(next_invariant);
1104     jump stg_ap_v_fast;
1105
1106   } else {
1107
1108     /* We've got no more invariants to check, try to commit */
1109     valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1110     if (valid != 0) {
1111       /* Transaction was valid: commit succeeded */
1112       StgTSO_trec(CurrentTSO) = NO_TREC;
1113       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1114       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1115       jump %ENTRY_CODE(Sp(SP_OFF));
1116     } else {
1117       /* Transaction was not valid: try again */
1118       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1119       StgTSO_trec(CurrentTSO) = trec;
1120       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1121       R1 = StgAtomicallyFrame_code(frame);
1122       jump stg_ap_v_fast;
1123     }
1124   }
1125 }
1126
1127 INFO_TABLE_RET(stg_atomically_waiting_frame,
1128                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1129                ATOMICALLY_FRAME,
1130                stg_atomically_frame_0_ret,
1131                stg_atomically_frame_1_ret,
1132                stg_atomically_frame_2_ret,
1133                stg_atomically_frame_3_ret,
1134                stg_atomically_frame_4_ret,
1135                stg_atomically_frame_5_ret,
1136                stg_atomically_frame_6_ret,
1137                stg_atomically_frame_7_ret)
1138 {
1139   W_ frame, trec, valid;
1140   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1141
1142   frame = Sp;
1143
1144   /* The TSO is currently waiting: should we stop waiting? */
1145   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1146   if (valid != 0) {
1147     /* Previous attempt is still valid: no point trying again yet */
1148           IF_NOT_REG_R1(Sp_adj(-2);
1149                         Sp(1) = stg_NO_FINALIZER_closure;
1150                         Sp(0) = stg_ut_1_0_unreg_info;)
1151     jump stg_block_noregs;
1152   } else {
1153     /* Previous attempt is no longer valid: try again */
1154     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1155     StgTSO_trec(CurrentTSO) = trec;
1156     StgHeader_info(frame) = stg_atomically_frame_info;
1157     R1 = StgAtomicallyFrame_code(frame);
1158     jump stg_ap_v_fast;
1159   }
1160 }
1161
1162 // STM catch frame --------------------------------------------------------------
1163
1164 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)                                               \
1165    label                                                                                        \
1166    {                                                                                            \
1167       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )                                       \
1168       W_ r, frame, trec, outer;                                                                 \
1169       frame = Sp;                                                                               \
1170       trec = StgTSO_trec(CurrentTSO);                                                           \
1171       "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];                             \
1172       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];          \
1173       if (r != 0) {                                                                             \
1174         /* Commit succeeded */                                                                  \
1175         StgTSO_trec(CurrentTSO) = outer;                                                        \
1176         Sp = Sp + SIZEOF_StgCatchSTMFrame;                                                      \
1177         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                                \
1178         jump ret;                                                                               \
1179       } else {                                                                                  \
1180         /* Commit failed */                                                                     \
1181         W_ new_trec;                                                                            \
1182         "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; \
1183         StgTSO_trec(CurrentTSO) = new_trec;                                                     \
1184         R1 = StgCatchSTMFrame_code(frame);                                                      \
1185         jump stg_ap_v_fast;                                                                     \
1186       }                                                                                         \
1187    }
1188
1189 #ifdef REG_R1
1190 #define SP_OFF 0
1191 #else
1192 #define SP_OFF 1
1193 #endif
1194
1195 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1196 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1197 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1198 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1199 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1200 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1201 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1202 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1203
1204 #if MAX_VECTORED_RTN > 8
1205 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1206 #endif
1207
1208 #if defined(PROFILING)
1209 #define CATCH_STM_FRAME_BITMAP 3
1210 #define CATCH_STM_FRAME_WORDS  4
1211 #else
1212 #define CATCH_STM_FRAME_BITMAP 0
1213 #define CATCH_STM_FRAME_WORDS  2
1214 #endif
1215
1216 /* Catch frames are very similar to update frames, but when entering
1217  * one we just pop the frame off the stack and perform the correct
1218  * kind of return to the activation record underneath us on the stack.
1219  */
1220
1221 INFO_TABLE_RET(stg_catch_stm_frame,
1222                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1223                CATCH_STM_FRAME,
1224                stg_catch_stm_frame_0_ret,
1225                stg_catch_stm_frame_1_ret,
1226                stg_catch_stm_frame_2_ret,
1227                stg_catch_stm_frame_3_ret,
1228                stg_catch_stm_frame_4_ret,
1229                stg_catch_stm_frame_5_ret,
1230                stg_catch_stm_frame_6_ret,
1231                stg_catch_stm_frame_7_ret)
1232 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1233
1234
1235 // Primop definition ------------------------------------------------------------
1236
1237 atomicallyzh_fast
1238 {
1239   W_ frame;
1240   W_ old_trec;
1241   W_ new_trec;
1242   
1243   // stmStartTransaction may allocate
1244   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1245
1246   /* Args: R1 = m :: STM a */
1247   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1248
1249   old_trec = StgTSO_trec(CurrentTSO);
1250
1251   /* Nested transactions are not allowed; raise an exception */
1252   if (old_trec != NO_TREC) {
1253      R1 = base_GHCziIOBase_NestedAtomically_closure;
1254      jump raisezh_fast;
1255   }
1256
1257   /* Set up the atomically frame */
1258   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1259   frame = Sp;
1260
1261   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1262   StgAtomicallyFrame_code(frame) = R1;
1263   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1264
1265   /* Start the memory transcation */
1266   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1267   StgTSO_trec(CurrentTSO) = new_trec;
1268
1269   /* Apply R1 to the realworld token */
1270   jump stg_ap_v_fast;
1271 }
1272
1273
1274 catchSTMzh_fast
1275 {
1276   W_ frame;
1277   
1278   /* Args: R1 :: STM a */
1279   /* Args: R2 :: Exception -> STM a */
1280   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1281
1282   /* Set up the catch frame */
1283   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1284   frame = Sp;
1285
1286   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1287   StgCatchSTMFrame_handler(frame) = R2;
1288   StgCatchSTMFrame_code(frame) = R1;
1289
1290   /* Start a nested transaction to run the body of the try block in */
1291   W_ cur_trec;  
1292   W_ new_trec;
1293   cur_trec = StgTSO_trec(CurrentTSO);
1294   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1295   StgTSO_trec(CurrentTSO) = new_trec;
1296
1297   /* Apply R1 to the realworld token */
1298   jump stg_ap_v_fast;
1299 }
1300
1301
1302 catchRetryzh_fast
1303 {
1304   W_ frame;
1305   W_ new_trec;
1306   W_ trec;
1307
1308   // stmStartTransaction may allocate
1309   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1310
1311   /* Args: R1 :: STM a */
1312   /* Args: R2 :: STM a */
1313   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1314
1315   /* Start a nested transaction within which to run the first code */
1316   trec = StgTSO_trec(CurrentTSO);
1317   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1318   StgTSO_trec(CurrentTSO) = new_trec;
1319
1320   /* Set up the catch-retry frame */
1321   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1322   frame = Sp;
1323   
1324   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1325   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1326   StgCatchRetryFrame_first_code(frame) = R1;
1327   StgCatchRetryFrame_alt_code(frame) = R2;
1328
1329   /* Apply R1 to the realworld token */
1330   jump stg_ap_v_fast;
1331 }
1332
1333
1334 retryzh_fast
1335 {
1336   W_ frame_type;
1337   W_ frame;
1338   W_ trec;
1339   W_ outer;
1340   W_ r;
1341
1342   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1343
1344   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1345 retry_pop_stack:
1346   StgTSO_sp(CurrentTSO) = Sp;
1347   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1348   Sp = StgTSO_sp(CurrentTSO);
1349   frame = Sp;
1350   trec = StgTSO_trec(CurrentTSO);
1351   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1352
1353   if (frame_type == CATCH_RETRY_FRAME) {
1354     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1355     ASSERT(outer != NO_TREC);
1356     // Abort the transaction attempting the current branch
1357     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1358     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1359     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1360       // Retry in the first branch: try the alternative
1361       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1362       StgTSO_trec(CurrentTSO) = trec;
1363       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1364       R1 = StgCatchRetryFrame_alt_code(frame);
1365       jump stg_ap_v_fast;
1366     } else {
1367       // Retry in the alternative code: propagate the retry
1368       StgTSO_trec(CurrentTSO) = outer;
1369       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1370       goto retry_pop_stack;
1371     }
1372   }
1373
1374   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1375   ASSERT(frame_type == ATOMICALLY_FRAME);
1376   if (outer != NO_TREC) {
1377     // We called retry while checking invariants, so abort the current
1378     // invariant check (merging its TVar accesses into the parents read
1379     // set so we'll wait on them)
1380     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1381     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1382     trec = outer;
1383      StgTSO_trec(CurrentTSO) = trec;
1384     "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1385   }
1386   ASSERT(outer == NO_TREC);
1387
1388   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1389   if (r != 0) {
1390     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1391     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1392     Sp = frame;
1393     // Fix up the stack in the unregisterised case: the return convention is different.
1394     IF_NOT_REG_R1(Sp_adj(-2); 
1395                   Sp(1) = stg_NO_FINALIZER_closure;
1396                   Sp(0) = stg_ut_1_0_unreg_info;)
1397     R3 = trec; // passing to stmWaitUnblock()
1398     jump stg_block_stmwait;
1399   } else {
1400     // Transaction was not valid: retry immediately
1401     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1402     StgTSO_trec(CurrentTSO) = trec;
1403     R1 = StgAtomicallyFrame_code(frame);
1404     Sp = frame;
1405     jump stg_ap_v_fast;
1406   }
1407 }
1408
1409
1410 checkzh_fast
1411 {
1412   W_ trec, closure;
1413
1414   /* Args: R1 = invariant closure */
1415   MAYBE_GC (R1_PTR, checkzh_fast); 
1416
1417   trec = StgTSO_trec(CurrentTSO);
1418   closure = R1;
1419   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1420                                      trec "ptr",
1421                                      closure "ptr") [];
1422
1423   jump %ENTRY_CODE(Sp(0));
1424 }
1425
1426
1427 newTVarzh_fast
1428 {
1429   W_ tv;
1430   W_ new_value;
1431
1432   /* Args: R1 = initialisation value */
1433
1434   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1435   new_value = R1;
1436   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1437   RET_P(tv);
1438 }
1439
1440
1441 readTVarzh_fast
1442 {
1443   W_ trec;
1444   W_ tvar;
1445   W_ result;
1446
1447   /* Args: R1 = TVar closure */
1448
1449   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1450   trec = StgTSO_trec(CurrentTSO);
1451   tvar = R1;
1452   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1453
1454   RET_P(result);
1455 }
1456
1457
1458 writeTVarzh_fast
1459 {
1460   W_ trec;
1461   W_ tvar;
1462   W_ new_value;
1463   
1464   /* Args: R1 = TVar closure */
1465   /*       R2 = New value    */
1466
1467   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1468   trec = StgTSO_trec(CurrentTSO);
1469   tvar = R1;
1470   new_value = R2;
1471   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1472
1473   jump %ENTRY_CODE(Sp(0));
1474 }
1475
1476
1477 /* -----------------------------------------------------------------------------
1478  * MVar primitives
1479  *
1480  * take & putMVar work as follows.  Firstly, an important invariant:
1481  *
1482  *    If the MVar is full, then the blocking queue contains only
1483  *    threads blocked on putMVar, and if the MVar is empty then the
1484  *    blocking queue contains only threads blocked on takeMVar.
1485  *
1486  * takeMvar:
1487  *    MVar empty : then add ourselves to the blocking queue
1488  *    MVar full  : remove the value from the MVar, and
1489  *                 blocking queue empty     : return
1490  *                 blocking queue non-empty : perform the first blocked putMVar
1491  *                                            from the queue, and wake up the
1492  *                                            thread (MVar is now full again)
1493  *
1494  * putMVar is just the dual of the above algorithm.
1495  *
1496  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1497  * the stack of the thread waiting to do the putMVar.  See
1498  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1499  * the stack layout, and the PerformPut and PerformTake macros below.
1500  *
1501  * It is important that a blocked take or put is woken up with the
1502  * take/put already performed, because otherwise there would be a
1503  * small window of vulnerability where the thread could receive an
1504  * exception and never perform its take or put, and we'd end up with a
1505  * deadlock.
1506  *
1507  * -------------------------------------------------------------------------- */
1508
1509 isEmptyMVarzh_fast
1510 {
1511     /* args: R1 = MVar closure */
1512
1513     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1514         RET_N(1);
1515     } else {
1516         RET_N(0);
1517     }
1518 }
1519
1520 newMVarzh_fast
1521 {
1522     /* args: none */
1523     W_ mvar;
1524
1525     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1526   
1527     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1528     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1529     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1530     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1531     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1532     RET_P(mvar);
1533 }
1534
1535
1536 /* If R1 isn't available, pass it on the stack */
1537 #ifdef REG_R1
1538 #define PerformTake(tso, value)                         \
1539     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1540     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1541 #else
1542 #define PerformTake(tso, value)                                 \
1543     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1544     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1545 #endif
1546
1547 #define PerformPut(tso,lval)                    \
1548     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1549     lval = W_[StgTSO_sp(tso) - WDS(1)];
1550
1551 takeMVarzh_fast
1552 {
1553     W_ mvar, val, info, tso;
1554
1555     /* args: R1 = MVar closure */
1556     mvar = R1;
1557
1558 #if defined(THREADED_RTS)
1559     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1560 #else
1561     info = GET_INFO(mvar);
1562 #endif
1563
1564     /* If the MVar is empty, put ourselves on its blocking queue,
1565      * and wait until we're woken up.
1566      */
1567     if (info == stg_EMPTY_MVAR_info) {
1568         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1569             StgMVar_head(mvar) = CurrentTSO;
1570         } else {
1571             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1572         }
1573         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1574         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1575         StgTSO_block_info(CurrentTSO)  = mvar;
1576         StgMVar_tail(mvar) = CurrentTSO;
1577         
1578         jump stg_block_takemvar;
1579   }
1580
1581   /* we got the value... */
1582   val = StgMVar_value(mvar);
1583
1584   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1585   {
1586       /* There are putMVar(s) waiting... 
1587        * wake up the first thread on the queue
1588        */
1589       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1590
1591       /* actually perform the putMVar for the thread that we just woke up */
1592       tso = StgMVar_head(mvar);
1593       PerformPut(tso,StgMVar_value(mvar));
1594       dirtyTSO(tso);
1595
1596 #if defined(GRAN) || defined(PAR)
1597       /* ToDo: check 2nd arg (mvar) is right */
1598       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1599       StgMVar_head(mvar) = tso;
1600 #else
1601       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1602                                          StgMVar_head(mvar) "ptr") [];
1603       StgMVar_head(mvar) = tso;
1604 #endif
1605
1606       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1607           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1608       }
1609
1610 #if defined(THREADED_RTS)
1611       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1612 #endif
1613       RET_P(val);
1614   } 
1615   else
1616   {
1617       /* No further putMVars, MVar is now empty */
1618       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1619  
1620 #if defined(THREADED_RTS)
1621       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1622 #else
1623       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1624 #endif
1625
1626       RET_P(val);
1627   }
1628 }
1629
1630
1631 tryTakeMVarzh_fast
1632 {
1633     W_ mvar, val, info, tso;
1634
1635     /* args: R1 = MVar closure */
1636
1637     mvar = R1;
1638
1639 #if defined(THREADED_RTS)
1640     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1641 #else
1642     info = GET_INFO(mvar);
1643 #endif
1644
1645     if (info == stg_EMPTY_MVAR_info) {
1646 #if defined(THREADED_RTS)
1647         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1648 #endif
1649         /* HACK: we need a pointer to pass back, 
1650          * so we abuse NO_FINALIZER_closure
1651          */
1652         RET_NP(0, stg_NO_FINALIZER_closure);
1653     }
1654
1655     /* we got the value... */
1656     val = StgMVar_value(mvar);
1657
1658     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1659
1660         /* There are putMVar(s) waiting... 
1661          * wake up the first thread on the queue
1662          */
1663         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1664
1665         /* actually perform the putMVar for the thread that we just woke up */
1666         tso = StgMVar_head(mvar);
1667         PerformPut(tso,StgMVar_value(mvar));
1668         dirtyTSO(tso);
1669
1670 #if defined(GRAN) || defined(PAR)
1671         /* ToDo: check 2nd arg (mvar) is right */
1672         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1673         StgMVar_head(mvar) = tso;
1674 #else
1675         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1676                                            StgMVar_head(mvar) "ptr") [];
1677         StgMVar_head(mvar) = tso;
1678 #endif
1679
1680         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1681             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1682         }
1683 #if defined(THREADED_RTS)
1684         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1685 #endif
1686     }
1687     else 
1688     {
1689         /* No further putMVars, MVar is now empty */
1690         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1691 #if defined(THREADED_RTS)
1692         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1693 #else
1694         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1695 #endif
1696     }
1697     
1698     RET_NP(1, val);
1699 }
1700
1701
1702 putMVarzh_fast
1703 {
1704     W_ mvar, info, tso;
1705
1706     /* args: R1 = MVar, R2 = value */
1707     mvar = R1;
1708
1709 #if defined(THREADED_RTS)
1710     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1711 #else
1712     info = GET_INFO(mvar);
1713 #endif
1714
1715     if (info == stg_FULL_MVAR_info) {
1716         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1717             StgMVar_head(mvar) = CurrentTSO;
1718         } else {
1719             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1720         }
1721         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1722         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1723         StgTSO_block_info(CurrentTSO)  = mvar;
1724         StgMVar_tail(mvar) = CurrentTSO;
1725         
1726         jump stg_block_putmvar;
1727     }
1728   
1729     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1730
1731         /* There are takeMVar(s) waiting: wake up the first one
1732          */
1733         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1734
1735         /* actually perform the takeMVar */
1736         tso = StgMVar_head(mvar);
1737         PerformTake(tso, R2);
1738         dirtyTSO(tso);
1739       
1740 #if defined(GRAN) || defined(PAR)
1741         /* ToDo: check 2nd arg (mvar) is right */
1742         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1743         StgMVar_head(mvar) = tso;
1744 #else
1745         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1746         StgMVar_head(mvar) = tso;
1747 #endif
1748
1749         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1750             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1751         }
1752
1753 #if defined(THREADED_RTS)
1754         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1755 #endif
1756         jump %ENTRY_CODE(Sp(0));
1757     }
1758     else
1759     {
1760         /* No further takes, the MVar is now full. */
1761         StgMVar_value(mvar) = R2;
1762
1763 #if defined(THREADED_RTS)
1764         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1765 #else
1766         SET_INFO(mvar,stg_FULL_MVAR_info);
1767 #endif
1768         jump %ENTRY_CODE(Sp(0));
1769     }
1770     
1771     /* ToDo: yield afterward for better communication performance? */
1772 }
1773
1774
1775 tryPutMVarzh_fast
1776 {
1777     W_ mvar, info, tso;
1778
1779     /* args: R1 = MVar, R2 = value */
1780     mvar = R1;
1781
1782 #if defined(THREADED_RTS)
1783     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1784 #else
1785     info = GET_INFO(mvar);
1786 #endif
1787
1788     if (info == stg_FULL_MVAR_info) {
1789 #if defined(THREADED_RTS)
1790         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1791 #endif
1792         RET_N(0);
1793     }
1794   
1795     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1796
1797         /* There are takeMVar(s) waiting: wake up the first one
1798          */
1799         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1800         
1801         /* actually perform the takeMVar */
1802         tso = StgMVar_head(mvar);
1803         PerformTake(tso, R2);
1804         dirtyTSO(tso);
1805       
1806 #if defined(GRAN) || defined(PAR)
1807         /* ToDo: check 2nd arg (mvar) is right */
1808         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1809         StgMVar_head(mvar) = tso;
1810 #else
1811         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1812         StgMVar_head(mvar) = tso;
1813 #endif
1814
1815         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1816             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1817         }
1818
1819 #if defined(THREADED_RTS)
1820         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1821 #endif
1822     }
1823     else
1824     {
1825         /* No further takes, the MVar is now full. */
1826         StgMVar_value(mvar) = R2;
1827
1828 #if defined(THREADED_RTS)
1829         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1830 #else
1831         SET_INFO(mvar,stg_FULL_MVAR_info);
1832 #endif
1833     }
1834     
1835     RET_N(1);
1836     /* ToDo: yield afterward for better communication performance? */
1837 }
1838
1839
1840 /* -----------------------------------------------------------------------------
1841    Stable pointer primitives
1842    -------------------------------------------------------------------------  */
1843
1844 makeStableNamezh_fast
1845 {
1846     W_ index, sn_obj;
1847
1848     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1849   
1850     index = foreign "C" lookupStableName(R1 "ptr") [];
1851
1852     /* Is there already a StableName for this heap object?
1853      *  stable_ptr_table is a pointer to an array of snEntry structs.
1854      */
1855     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1856         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1857         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1858         StgStableName_sn(sn_obj) = index;
1859         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1860     } else {
1861         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1862     }
1863     
1864     RET_P(sn_obj);
1865 }
1866
1867
1868 makeStablePtrzh_fast
1869 {
1870     /* Args: R1 = a */
1871     W_ sp;
1872     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1873     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1874     RET_N(sp);
1875 }
1876
1877 deRefStablePtrzh_fast
1878 {
1879     /* Args: R1 = the stable ptr */
1880     W_ r, sp;
1881     sp = R1;
1882     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1883     RET_P(r);
1884 }
1885
1886 /* -----------------------------------------------------------------------------
1887    Bytecode object primitives
1888    -------------------------------------------------------------------------  */
1889
1890 newBCOzh_fast
1891 {
1892     /* R1 = instrs
1893        R2 = literals
1894        R3 = ptrs
1895        R4 = itbls
1896        R5 = arity
1897        R6 = bitmap array
1898     */
1899     W_ bco, bitmap_arr, bytes, words;
1900     
1901     bitmap_arr = R6;
1902     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1903     bytes = WDS(words);
1904
1905     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1906
1907     bco = Hp - bytes + WDS(1);
1908     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1909     
1910     StgBCO_instrs(bco)     = R1;
1911     StgBCO_literals(bco)   = R2;
1912     StgBCO_ptrs(bco)       = R3;
1913     StgBCO_itbls(bco)      = R4;
1914     StgBCO_arity(bco)      = HALF_W_(R5);
1915     StgBCO_size(bco)       = HALF_W_(words);
1916     
1917     // Copy the arity/bitmap info into the BCO
1918     W_ i;
1919     i = 0;
1920 for:
1921     if (i < StgArrWords_words(bitmap_arr)) {
1922         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1923         i = i + 1;
1924         goto for;
1925     }
1926     
1927     RET_P(bco);
1928 }
1929
1930
1931 mkApUpd0zh_fast
1932 {
1933     // R1 = the BCO# for the AP
1934     //  
1935     W_ ap;
1936
1937     // This function is *only* used to wrap zero-arity BCOs in an
1938     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1939     // saturated and always points directly to a FUN or BCO.
1940     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1941            StgBCO_arity(R1) == HALF_W_(0));
1942
1943     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1944     TICK_ALLOC_UP_THK(0, 0);
1945     CCCS_ALLOC(SIZEOF_StgAP);
1946
1947     ap = Hp - SIZEOF_StgAP + WDS(1);
1948     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1949     
1950     StgAP_n_args(ap) = HALF_W_(0);
1951     StgAP_fun(ap) = R1;
1952     
1953     RET_P(ap);
1954 }
1955
1956 /* -----------------------------------------------------------------------------
1957    Thread I/O blocking primitives
1958    -------------------------------------------------------------------------- */
1959
1960 /* Add a thread to the end of the blocked queue. (C-- version of the C
1961  * macro in Schedule.h).
1962  */
1963 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1964     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1965     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1966       W_[blocked_queue_hd] = tso;                       \
1967     } else {                                            \
1968       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1969     }                                                   \
1970     W_[blocked_queue_tl] = tso;
1971
1972 waitReadzh_fast
1973 {
1974     /* args: R1 */
1975 #ifdef THREADED_RTS
1976     foreign "C" barf("waitRead# on threaded RTS");
1977 #else
1978
1979     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1980     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1981     StgTSO_block_info(CurrentTSO) = R1;
1982     // No locking - we're not going to use this interface in the
1983     // threaded RTS anyway.
1984     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1985     jump stg_block_noregs;
1986 #endif
1987 }
1988
1989 waitWritezh_fast
1990 {
1991     /* args: R1 */
1992 #ifdef THREADED_RTS
1993     foreign "C" barf("waitWrite# on threaded RTS");
1994 #else
1995
1996     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1997     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1998     StgTSO_block_info(CurrentTSO) = R1;
1999     // No locking - we're not going to use this interface in the
2000     // threaded RTS anyway.
2001     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2002     jump stg_block_noregs;
2003 #endif
2004 }
2005
2006
2007 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2008 delayzh_fast
2009 {
2010 #ifdef mingw32_HOST_OS
2011     W_ ares;
2012     CInt reqID;
2013 #else
2014     W_ t, prev, target;
2015 #endif
2016
2017 #ifdef THREADED_RTS
2018     foreign "C" barf("delay# on threaded RTS");
2019 #else
2020
2021     /* args: R1 (microsecond delay amount) */
2022     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2023     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2024
2025 #ifdef mingw32_HOST_OS
2026
2027     /* could probably allocate this on the heap instead */
2028     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2029                                             stg_delayzh_malloc_str);
2030     reqID = foreign "C" addDelayRequest(R1);
2031     StgAsyncIOResult_reqID(ares)   = reqID;
2032     StgAsyncIOResult_len(ares)     = 0;
2033     StgAsyncIOResult_errCode(ares) = 0;
2034     StgTSO_block_info(CurrentTSO)  = ares;
2035
2036     /* Having all async-blocked threads reside on the blocked_queue
2037      * simplifies matters, so change the status to OnDoProc put the
2038      * delayed thread on the blocked_queue.
2039      */
2040     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2041     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2042     jump stg_block_async_void;
2043
2044 #else
2045
2046     W_ time;
2047     time = foreign "C" getourtimeofday() [R1];
2048     target = (R1 / (TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000)) + time;
2049     StgTSO_block_info(CurrentTSO) = target;
2050
2051     /* Insert the new thread in the sleeping queue. */
2052     prev = NULL;
2053     t = W_[sleeping_queue];
2054 while:
2055     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2056         prev = t;
2057         t = StgTSO_link(t);
2058         goto while;
2059     }
2060
2061     StgTSO_link(CurrentTSO) = t;
2062     if (prev == NULL) {
2063         W_[sleeping_queue] = CurrentTSO;
2064     } else {
2065         StgTSO_link(prev) = CurrentTSO;
2066     }
2067     jump stg_block_noregs;
2068 #endif
2069 #endif /* !THREADED_RTS */
2070 }
2071
2072
2073 #ifdef mingw32_HOST_OS
2074 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2075 asyncReadzh_fast
2076 {
2077     W_ ares;
2078     CInt reqID;
2079
2080 #ifdef THREADED_RTS
2081     foreign "C" barf("asyncRead# on threaded RTS");
2082 #else
2083
2084     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2085     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2086     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2087
2088     /* could probably allocate this on the heap instead */
2089     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2090                                             stg_asyncReadzh_malloc_str)
2091                         [R1,R2,R3,R4];
2092     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2093     StgAsyncIOResult_reqID(ares)   = reqID;
2094     StgAsyncIOResult_len(ares)     = 0;
2095     StgAsyncIOResult_errCode(ares) = 0;
2096     StgTSO_block_info(CurrentTSO)  = ares;
2097     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2098     jump stg_block_async;
2099 #endif
2100 }
2101
2102 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2103 asyncWritezh_fast
2104 {
2105     W_ ares;
2106     CInt reqID;
2107
2108 #ifdef THREADED_RTS
2109     foreign "C" barf("asyncWrite# on threaded RTS");
2110 #else
2111
2112     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2113     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2114     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2115
2116     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2117                                             stg_asyncWritezh_malloc_str)
2118                         [R1,R2,R3,R4];
2119     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2120
2121     StgAsyncIOResult_reqID(ares)   = reqID;
2122     StgAsyncIOResult_len(ares)     = 0;
2123     StgAsyncIOResult_errCode(ares) = 0;
2124     StgTSO_block_info(CurrentTSO)  = ares;
2125     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2126     jump stg_block_async;
2127 #endif
2128 }
2129
2130 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2131 asyncDoProczh_fast
2132 {
2133     W_ ares;
2134     CInt reqID;
2135
2136 #ifdef THREADED_RTS
2137     foreign "C" barf("asyncDoProc# on threaded RTS");
2138 #else
2139
2140     /* args: R1 = proc, R2 = param */
2141     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2142     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2143
2144     /* could probably allocate this on the heap instead */
2145     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2146                                             stg_asyncDoProczh_malloc_str) 
2147                                 [R1,R2];
2148     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2149     StgAsyncIOResult_reqID(ares)   = reqID;
2150     StgAsyncIOResult_len(ares)     = 0;
2151     StgAsyncIOResult_errCode(ares) = 0;
2152     StgTSO_block_info(CurrentTSO) = ares;
2153     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2154     jump stg_block_async;
2155 #endif
2156 }
2157 #endif
2158
2159 /* -----------------------------------------------------------------------------
2160   ** temporary **
2161
2162    classes CCallable and CReturnable don't really exist, but the
2163    compiler insists on generating dictionaries containing references
2164    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2165    for these.  Some C compilers can't cope with zero-length static arrays,
2166    so we have to make these one element long.
2167   --------------------------------------------------------------------------- */
2168
2169 section "rodata" {
2170   GHC_ZCCCallable_static_info:   W_ 0;
2171 }
2172
2173 section "rodata" {
2174   GHC_ZCCReturnable_static_info: W_ 0;
2175 }