Remove the itbls field of BCO, put itbls in with the literals
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, lo, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    hi = TO_W_(val >> 32);
427    lo = TO_W_(val);
428
429    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(hi,0) ) {
444      neg = 1;
445      lo = -lo;
446      if(lo == 0) {
447        hi = -hi;
448      } else {
449        hi = -hi - 1;
450      }
451    }
452
453    if ( words_needed == 2 )  { 
454       s = 2;
455       Hp(-1) = lo;
456       Hp(0) = hi;
457    } else { 
458        if ( lo != 0 ) {
459            s = 1;
460            Hp(0) = lo;
461        } else /* val==0 */  {
462            s = 0;
463        }
464    }
465    if ( neg != 0 ) {
466         s = -s;
467    }
468
469    /* returns (# size  :: Int#, 
470                  data  :: ByteArray# #)
471    */
472    RET_NP(s,p);
473 }
474 word64ToIntegerzh_fast
475 {
476    /* arguments: L1 = Word64# */
477
478    L_ val;
479    W_ hi, lo, s, words_needed, p;
480
481    val = L1;
482    hi = TO_W_(val >> 32);
483    lo = TO_W_(val);
484
485    if ( hi != 0 ) {
486       words_needed = 2;
487    } else {
488       words_needed = 1;
489    }
490
491    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
492                NO_PTRS, word64ToIntegerzh_fast );
493
494    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
495    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
496    StgArrWords_words(p) = words_needed;
497
498    if ( hi != 0 ) { 
499      s = 2;
500      Hp(-1) = lo;
501      Hp(0)  = hi;
502    } else {
503       if ( lo != 0 ) {
504         s = 1;
505         Hp(0) = lo;
506      } else /* val==0 */  {
507       s = 0;
508      }
509   }
510
511    /* returns (# size  :: Int#, 
512                  data  :: ByteArray# #)
513    */
514    RET_NP(s,p);
515 }
516
517
518
519 #endif /* SUPPORT_LONG_LONGS */
520
521 /* ToDo: this is shockingly inefficient */
522
523 #ifndef THREADED_RTS
524 section "bss" {
525   mp_tmp1:
526     bits8 [SIZEOF_MP_INT];
527 }
528
529 section "bss" {
530   mp_tmp2:
531     bits8 [SIZEOF_MP_INT];
532 }
533
534 section "bss" {
535   mp_result1:
536     bits8 [SIZEOF_MP_INT];
537 }
538
539 section "bss" {
540   mp_result2:
541     bits8 [SIZEOF_MP_INT];
542 }
543 #endif
544
545 #ifdef THREADED_RTS
546 #define FETCH_MP_TEMP(X) \
547 W_ X; \
548 X = BaseReg + (OFFSET_StgRegTable_r ## X);
549 #else
550 #define FETCH_MP_TEMP(X) /* Nothing */
551 #endif
552
553 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
554 name                                                                    \
555 {                                                                       \
556   CInt s1, s2;                                                          \
557   W_ d1, d2;                                                            \
558   FETCH_MP_TEMP(mp_tmp1);                                               \
559   FETCH_MP_TEMP(mp_tmp2);                                               \
560   FETCH_MP_TEMP(mp_result1)                                             \
561   FETCH_MP_TEMP(mp_result2);                                            \
562                                                                         \
563   /* call doYouWantToGC() */                                            \
564   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
565                                                                         \
566   s1 = W_TO_INT(R1);                                                    \
567   d1 = R2;                                                              \
568   s2 = W_TO_INT(R3);                                                    \
569   d2 = R4;                                                              \
570                                                                         \
571   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
572   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
573   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
574   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
575   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
576   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
577                                                                         \
578   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
579                                                                         \
580   /* Perform the operation */                                           \
581   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
582                                                                         \
583   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
584          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
585 }
586
587 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
588 name                                                                    \
589 {                                                                       \
590   CInt s1;                                                              \
591   W_ d1;                                                                \
592   FETCH_MP_TEMP(mp_tmp1);                                               \
593   FETCH_MP_TEMP(mp_result1)                                             \
594                                                                         \
595   /* call doYouWantToGC() */                                            \
596   MAYBE_GC(R2_PTR, name);                                               \
597                                                                         \
598   d1 = R2;                                                              \
599   s1 = W_TO_INT(R1);                                                    \
600                                                                         \
601   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
602   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
603   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
604                                                                         \
605   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
606                                                                         \
607   /* Perform the operation */                                           \
608   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
609                                                                         \
610   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
611          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
612 }
613
614 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
615 name                                                                                    \
616 {                                                                                       \
617   CInt s1, s2;                                                                          \
618   W_ d1, d2;                                                                            \
619   FETCH_MP_TEMP(mp_tmp1);                                                               \
620   FETCH_MP_TEMP(mp_tmp2);                                                               \
621   FETCH_MP_TEMP(mp_result1)                                                             \
622   FETCH_MP_TEMP(mp_result2)                                                             \
623                                                                                         \
624   /* call doYouWantToGC() */                                                            \
625   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
626                                                                                         \
627   s1 = W_TO_INT(R1);                                                                    \
628   d1 = R2;                                                                              \
629   s2 = W_TO_INT(R3);                                                                    \
630   d2 = R4;                                                                              \
631                                                                                         \
632   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
633   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
634   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
635   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
636   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
637   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
638                                                                                         \
639   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
640   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
641                                                                                         \
642   /* Perform the operation */                                                           \
643   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
644                                                                                         \
645   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
646            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
647            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
648            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
649 }
650
651 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
652 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
653 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
654 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
655 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
656 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
657 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
658 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
659 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
660 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
661 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
662
663 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
664 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
665
666 #ifndef THREADED_RTS
667 section "bss" {
668   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
669 }
670 #endif
671
672 gcdIntzh_fast
673 {
674     /* R1 = the first Int#; R2 = the second Int# */
675     W_ r; 
676     FETCH_MP_TEMP(mp_tmp_w);
677
678     W_[mp_tmp_w] = R1;
679     r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
680
681     R1 = r;
682     /* Result parked in R1, return via info-pointer at TOS */
683     jump %ENTRY_CODE(Sp(0));
684 }
685
686
687 gcdIntegerIntzh_fast
688 {
689     /* R1 = s1; R2 = d1; R3 = the int */
690     R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
691     
692     /* Result parked in R1, return via info-pointer at TOS */
693     jump %ENTRY_CODE(Sp(0));
694 }
695
696
697 cmpIntegerIntzh_fast
698 {
699     /* R1 = s1; R2 = d1; R3 = the int */
700     W_ usize, vsize, v_digit, u_digit;
701
702     usize = R1;
703     vsize = 0;
704     v_digit = R3;
705
706     // paraphrased from __gmpz_cmp_si() in the GMP sources
707     if (%gt(v_digit,0)) {
708         vsize = 1;
709     } else { 
710         if (%lt(v_digit,0)) {
711             vsize = -1;
712             v_digit = -v_digit;
713         }
714     }
715
716     if (usize != vsize) {
717         R1 = usize - vsize; 
718         jump %ENTRY_CODE(Sp(0));
719     }
720
721     if (usize == 0) {
722         R1 = 0; 
723         jump %ENTRY_CODE(Sp(0));
724     }
725
726     u_digit = W_[BYTE_ARR_CTS(R2)];
727
728     if (u_digit == v_digit) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
734         R1 = usize; 
735     } else {
736         R1 = -usize; 
737     }
738
739     jump %ENTRY_CODE(Sp(0));
740 }
741
742 cmpIntegerzh_fast
743 {
744     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
745     W_ usize, vsize, size, up, vp;
746     CInt cmp;
747
748     // paraphrased from __gmpz_cmp() in the GMP sources
749     usize = R1;
750     vsize = R3;
751
752     if (usize != vsize) {
753         R1 = usize - vsize; 
754         jump %ENTRY_CODE(Sp(0));
755     }
756
757     if (usize == 0) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(usize,0)) { // NB. not <, which is unsigned
763         size = -usize;
764     } else {
765         size = usize;
766     }
767
768     up = BYTE_ARR_CTS(R2);
769     vp = BYTE_ARR_CTS(R4);
770
771     cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
772
773     if (cmp == 0 :: CInt) {
774         R1 = 0; 
775         jump %ENTRY_CODE(Sp(0));
776     }
777
778     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
779         R1 = 1;
780     } else {
781         R1 = (-1); 
782     }
783     /* Result parked in R1, return via info-pointer at TOS */
784     jump %ENTRY_CODE(Sp(0));
785 }
786
787 integer2Intzh_fast
788 {
789     /* R1 = s; R2 = d */
790     W_ r, s;
791
792     s = R1;
793     if (s == 0) {
794         r = 0;
795     } else {
796         r = W_[R2 + SIZEOF_StgArrWords];
797         if (%lt(s,0)) {
798             r = -r;
799         }
800     }
801     /* Result parked in R1, return via info-pointer at TOS */
802     R1 = r;
803     jump %ENTRY_CODE(Sp(0));
804 }
805
806 integer2Wordzh_fast
807 {
808   /* R1 = s; R2 = d */
809   W_ r, s;
810
811   s = R1;
812   if (s == 0) {
813     r = 0;
814   } else {
815     r = W_[R2 + SIZEOF_StgArrWords];
816     if (%lt(s,0)) {
817         r = -r;
818     }
819   }
820   /* Result parked in R1, return via info-pointer at TOS */
821   R1 = r;
822   jump %ENTRY_CODE(Sp(0));
823 }
824
825 decodeFloatzh_fast
826
827     W_ p;
828     F_ arg;
829     FETCH_MP_TEMP(mp_tmp1);
830     FETCH_MP_TEMP(mp_tmp_w);
831     
832     /* arguments: F1 = Float# */
833     arg = F1;
834     
835     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
836     
837     /* Be prepared to tell Lennart-coded __decodeFloat
838        where mantissa._mp_d can be put (it does not care about the rest) */
839     p = Hp - SIZEOF_StgArrWords;
840     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
841     StgArrWords_words(p) = 1;
842     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
843     
844     /* Perform the operation */
845     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
846     
847     /* returns: (Int# (expn), Int#, ByteArray#) */
848     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
849 }
850
851 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
852 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
853
854 decodeDoublezh_fast
855
856     D_ arg;
857     W_ p;
858     FETCH_MP_TEMP(mp_tmp1);
859     FETCH_MP_TEMP(mp_tmp_w);
860
861     /* arguments: D1 = Double# */
862     arg = D1;
863
864     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
865     
866     /* Be prepared to tell Lennart-coded __decodeDouble
867        where mantissa.d can be put (it does not care about the rest) */
868     p = Hp - ARR_SIZE + WDS(1);
869     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
870     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
871     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
872
873     /* Perform the operation */
874     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
875     
876     /* returns: (Int# (expn), Int#, ByteArray#) */
877     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
878 }
879
880 /* -----------------------------------------------------------------------------
881  * Concurrency primitives
882  * -------------------------------------------------------------------------- */
883
884 forkzh_fast
885 {
886   /* args: R1 = closure to spark */
887
888   MAYBE_GC(R1_PTR, forkzh_fast);
889
890   W_ closure;
891   W_ threadid;
892   closure = R1;
893
894   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
895                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
896                                 closure "ptr") [];
897   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
898
899   // switch at the earliest opportunity
900   CInt[context_switch] = 1 :: CInt;
901   
902   RET_P(threadid);
903 }
904
905 forkOnzh_fast
906 {
907   /* args: R1 = cpu, R2 = closure to spark */
908
909   MAYBE_GC(R2_PTR, forkOnzh_fast);
910
911   W_ cpu;
912   W_ closure;
913   W_ threadid;
914   cpu = R1;
915   closure = R2;
916
917   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
918                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
919                                 closure "ptr") [];
920   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
921
922   // switch at the earliest opportunity
923   CInt[context_switch] = 1 :: CInt;
924   
925   RET_P(threadid);
926 }
927
928 yieldzh_fast
929 {
930   jump stg_yield_noregs;
931 }
932
933 myThreadIdzh_fast
934 {
935   /* no args. */
936   RET_P(CurrentTSO);
937 }
938
939 labelThreadzh_fast
940 {
941   /* args: 
942         R1 = ThreadId#
943         R2 = Addr# */
944 #ifdef DEBUG
945   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
946 #endif
947   jump %ENTRY_CODE(Sp(0));
948 }
949
950 isCurrentThreadBoundzh_fast
951 {
952   /* no args */
953   W_ r;
954   r = foreign "C" isThreadBound(CurrentTSO) [];
955   RET_N(r);
956 }
957
958
959 /* -----------------------------------------------------------------------------
960  * TVar primitives
961  * -------------------------------------------------------------------------- */
962
963 #ifdef REG_R1
964 #define SP_OFF 0
965 #define IF_NOT_REG_R1(x) 
966 #else
967 #define SP_OFF 1
968 #define IF_NOT_REG_R1(x) x
969 #endif
970
971 // Catch retry frame ------------------------------------------------------------
972
973 #define CATCH_RETRY_FRAME_ERROR(label) \
974   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
975
976 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
977 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
978 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
979 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
980 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
981 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
982 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
983 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
984
985 #if MAX_VECTORED_RTN > 8
986 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
987 #endif
988
989 #if defined(PROFILING)
990 #define CATCH_RETRY_FRAME_BITMAP 7
991 #define CATCH_RETRY_FRAME_WORDS  5
992 #else
993 #define CATCH_RETRY_FRAME_BITMAP 1
994 #define CATCH_RETRY_FRAME_WORDS  3
995 #endif
996
997 INFO_TABLE_RET(stg_catch_retry_frame,
998                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
999                CATCH_RETRY_FRAME,
1000                stg_catch_retry_frame_0_ret,
1001                stg_catch_retry_frame_1_ret,
1002                stg_catch_retry_frame_2_ret,
1003                stg_catch_retry_frame_3_ret,
1004                stg_catch_retry_frame_4_ret,
1005                stg_catch_retry_frame_5_ret,
1006                stg_catch_retry_frame_6_ret,
1007                stg_catch_retry_frame_7_ret)
1008 {
1009    W_ r, frame, trec, outer;
1010    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1011
1012    frame = Sp;
1013    trec = StgTSO_trec(CurrentTSO);
1014    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1015    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1016    if (r != 0) {
1017      /* Succeeded (either first branch or second branch) */
1018      StgTSO_trec(CurrentTSO) = outer;
1019      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1020      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1021      jump %ENTRY_CODE(Sp(SP_OFF));
1022    } else {
1023      /* Did not commit: re-execute */
1024      W_ new_trec;
1025      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1026      StgTSO_trec(CurrentTSO) = new_trec;
1027      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1028        R1 = StgCatchRetryFrame_alt_code(frame);
1029      } else {
1030        R1 = StgCatchRetryFrame_first_code(frame);
1031      }
1032      jump stg_ap_v_fast;
1033    }
1034 }
1035
1036
1037 // Atomically frame -------------------------------------------------------------
1038
1039
1040 #define ATOMICALLY_FRAME_ERROR(label) \
1041   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1042
1043 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1044 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1045 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1046 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1047 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1048 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1049 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1050 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1051
1052 #if MAX_VECTORED_RTN > 8
1053 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1054 #endif
1055
1056 #if defined(PROFILING)
1057 #define ATOMICALLY_FRAME_BITMAP 3
1058 #define ATOMICALLY_FRAME_WORDS  4
1059 #else
1060 #define ATOMICALLY_FRAME_BITMAP 0
1061 #define ATOMICALLY_FRAME_WORDS  2
1062 #endif
1063
1064
1065 INFO_TABLE_RET(stg_atomically_frame,
1066                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1067                ATOMICALLY_FRAME,
1068                stg_atomically_frame_0_ret,
1069                stg_atomically_frame_1_ret,
1070                stg_atomically_frame_2_ret,
1071                stg_atomically_frame_3_ret,
1072                stg_atomically_frame_4_ret,
1073                stg_atomically_frame_5_ret,
1074                stg_atomically_frame_6_ret,
1075                stg_atomically_frame_7_ret)
1076 {
1077   W_ frame, trec, valid, next_invariant, q, outer;
1078   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1079
1080   frame = Sp;
1081   trec = StgTSO_trec(CurrentTSO);
1082   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1083
1084   if (outer == NO_TREC) {
1085     /* First time back at the atomically frame -- pick up invariants */
1086     "ptr" q = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1087     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1088
1089   } else {
1090     /* Second/subsequent time back at the atomically frame -- abort the
1091      * tx that's checking the invariant and move on to the next one */
1092     StgTSO_trec(CurrentTSO) = outer;
1093     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1094     StgInvariantCheckQueue_my_execution(q) = trec;
1095     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1096     /* Don't free trec -- it's linked from q and will be stashed in the
1097      * invariant if we eventually commit. */
1098     q = StgInvariantCheckQueue_next_queue_entry(q);
1099     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1100     trec = outer;
1101   }
1102
1103   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1104
1105   if (q != END_INVARIANT_CHECK_QUEUE) {
1106     /* We can't commit yet: another invariant to check */
1107     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1108     StgTSO_trec(CurrentTSO) = trec;
1109
1110     next_invariant = StgInvariantCheckQueue_invariant(q);
1111     R1 = StgAtomicInvariant_code(next_invariant);
1112     jump stg_ap_v_fast;
1113
1114   } else {
1115
1116     /* We've got no more invariants to check, try to commit */
1117     valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1118     if (valid != 0) {
1119       /* Transaction was valid: commit succeeded */
1120       StgTSO_trec(CurrentTSO) = NO_TREC;
1121       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1122       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1123       jump %ENTRY_CODE(Sp(SP_OFF));
1124     } else {
1125       /* Transaction was not valid: try again */
1126       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1127       StgTSO_trec(CurrentTSO) = trec;
1128       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1129       R1 = StgAtomicallyFrame_code(frame);
1130       jump stg_ap_v_fast;
1131     }
1132   }
1133 }
1134
1135 INFO_TABLE_RET(stg_atomically_waiting_frame,
1136                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1137                ATOMICALLY_FRAME,
1138                stg_atomically_frame_0_ret,
1139                stg_atomically_frame_1_ret,
1140                stg_atomically_frame_2_ret,
1141                stg_atomically_frame_3_ret,
1142                stg_atomically_frame_4_ret,
1143                stg_atomically_frame_5_ret,
1144                stg_atomically_frame_6_ret,
1145                stg_atomically_frame_7_ret)
1146 {
1147   W_ frame, trec, valid;
1148   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1149
1150   frame = Sp;
1151
1152   /* The TSO is currently waiting: should we stop waiting? */
1153   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1154   if (valid != 0) {
1155     /* Previous attempt is still valid: no point trying again yet */
1156           IF_NOT_REG_R1(Sp_adj(-2);
1157                         Sp(1) = stg_NO_FINALIZER_closure;
1158                         Sp(0) = stg_ut_1_0_unreg_info;)
1159     jump stg_block_noregs;
1160   } else {
1161     /* Previous attempt is no longer valid: try again */
1162     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1163     StgTSO_trec(CurrentTSO) = trec;
1164     StgHeader_info(frame) = stg_atomically_frame_info;
1165     R1 = StgAtomicallyFrame_code(frame);
1166     jump stg_ap_v_fast;
1167   }
1168 }
1169
1170 // STM catch frame --------------------------------------------------------------
1171
1172 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)                                               \
1173    label                                                                                        \
1174    {                                                                                            \
1175       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )                                       \
1176       W_ r, frame, trec, outer;                                                                 \
1177       frame = Sp;                                                                               \
1178       trec = StgTSO_trec(CurrentTSO);                                                           \
1179       "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];                             \
1180       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];          \
1181       if (r != 0) {                                                                             \
1182         /* Commit succeeded */                                                                  \
1183         StgTSO_trec(CurrentTSO) = outer;                                                        \
1184         Sp = Sp + SIZEOF_StgCatchSTMFrame;                                                      \
1185         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                                \
1186         jump ret;                                                                               \
1187       } else {                                                                                  \
1188         /* Commit failed */                                                                     \
1189         W_ new_trec;                                                                            \
1190         "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; \
1191         StgTSO_trec(CurrentTSO) = new_trec;                                                     \
1192         R1 = StgCatchSTMFrame_code(frame);                                                      \
1193         jump stg_ap_v_fast;                                                                     \
1194       }                                                                                         \
1195    }
1196
1197 #ifdef REG_R1
1198 #define SP_OFF 0
1199 #else
1200 #define SP_OFF 1
1201 #endif
1202
1203 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1204 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1205 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1206 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1207 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1208 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1209 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1210 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1211
1212 #if MAX_VECTORED_RTN > 8
1213 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1214 #endif
1215
1216 #if defined(PROFILING)
1217 #define CATCH_STM_FRAME_BITMAP 3
1218 #define CATCH_STM_FRAME_WORDS  4
1219 #else
1220 #define CATCH_STM_FRAME_BITMAP 0
1221 #define CATCH_STM_FRAME_WORDS  2
1222 #endif
1223
1224 /* Catch frames are very similar to update frames, but when entering
1225  * one we just pop the frame off the stack and perform the correct
1226  * kind of return to the activation record underneath us on the stack.
1227  */
1228
1229 INFO_TABLE_RET(stg_catch_stm_frame,
1230                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1231                CATCH_STM_FRAME,
1232                stg_catch_stm_frame_0_ret,
1233                stg_catch_stm_frame_1_ret,
1234                stg_catch_stm_frame_2_ret,
1235                stg_catch_stm_frame_3_ret,
1236                stg_catch_stm_frame_4_ret,
1237                stg_catch_stm_frame_5_ret,
1238                stg_catch_stm_frame_6_ret,
1239                stg_catch_stm_frame_7_ret)
1240 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1241
1242
1243 // Primop definition ------------------------------------------------------------
1244
1245 atomicallyzh_fast
1246 {
1247   W_ frame;
1248   W_ old_trec;
1249   W_ new_trec;
1250   
1251   // stmStartTransaction may allocate
1252   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1253
1254   /* Args: R1 = m :: STM a */
1255   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1256
1257   old_trec = StgTSO_trec(CurrentTSO);
1258
1259   /* Nested transactions are not allowed; raise an exception */
1260   if (old_trec != NO_TREC) {
1261      R1 = base_GHCziIOBase_NestedAtomically_closure;
1262      jump raisezh_fast;
1263   }
1264
1265   /* Set up the atomically frame */
1266   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1267   frame = Sp;
1268
1269   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1270   StgAtomicallyFrame_code(frame) = R1;
1271   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1272
1273   /* Start the memory transcation */
1274   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1275   StgTSO_trec(CurrentTSO) = new_trec;
1276
1277   /* Apply R1 to the realworld token */
1278   jump stg_ap_v_fast;
1279 }
1280
1281
1282 catchSTMzh_fast
1283 {
1284   W_ frame;
1285   
1286   /* Args: R1 :: STM a */
1287   /* Args: R2 :: Exception -> STM a */
1288   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1289
1290   /* Set up the catch frame */
1291   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1292   frame = Sp;
1293
1294   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1295   StgCatchSTMFrame_handler(frame) = R2;
1296   StgCatchSTMFrame_code(frame) = R1;
1297
1298   /* Start a nested transaction to run the body of the try block in */
1299   W_ cur_trec;  
1300   W_ new_trec;
1301   cur_trec = StgTSO_trec(CurrentTSO);
1302   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1303   StgTSO_trec(CurrentTSO) = new_trec;
1304
1305   /* Apply R1 to the realworld token */
1306   jump stg_ap_v_fast;
1307 }
1308
1309
1310 catchRetryzh_fast
1311 {
1312   W_ frame;
1313   W_ new_trec;
1314   W_ trec;
1315
1316   // stmStartTransaction may allocate
1317   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1318
1319   /* Args: R1 :: STM a */
1320   /* Args: R2 :: STM a */
1321   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1322
1323   /* Start a nested transaction within which to run the first code */
1324   trec = StgTSO_trec(CurrentTSO);
1325   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1326   StgTSO_trec(CurrentTSO) = new_trec;
1327
1328   /* Set up the catch-retry frame */
1329   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1330   frame = Sp;
1331   
1332   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1333   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1334   StgCatchRetryFrame_first_code(frame) = R1;
1335   StgCatchRetryFrame_alt_code(frame) = R2;
1336
1337   /* Apply R1 to the realworld token */
1338   jump stg_ap_v_fast;
1339 }
1340
1341
1342 retryzh_fast
1343 {
1344   W_ frame_type;
1345   W_ frame;
1346   W_ trec;
1347   W_ outer;
1348   W_ r;
1349
1350   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1351
1352   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1353 retry_pop_stack:
1354   StgTSO_sp(CurrentTSO) = Sp;
1355   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1356   Sp = StgTSO_sp(CurrentTSO);
1357   frame = Sp;
1358   trec = StgTSO_trec(CurrentTSO);
1359   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1360
1361   if (frame_type == CATCH_RETRY_FRAME) {
1362     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1363     ASSERT(outer != NO_TREC);
1364     // Abort the transaction attempting the current branch
1365     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1366     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1367     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1368       // Retry in the first branch: try the alternative
1369       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1370       StgTSO_trec(CurrentTSO) = trec;
1371       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1372       R1 = StgCatchRetryFrame_alt_code(frame);
1373       jump stg_ap_v_fast;
1374     } else {
1375       // Retry in the alternative code: propagate the retry
1376       StgTSO_trec(CurrentTSO) = outer;
1377       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1378       goto retry_pop_stack;
1379     }
1380   }
1381
1382   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1383   ASSERT(frame_type == ATOMICALLY_FRAME);
1384   if (outer != NO_TREC) {
1385     // We called retry while checking invariants, so abort the current
1386     // invariant check (merging its TVar accesses into the parents read
1387     // set so we'll wait on them)
1388     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1389     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1390     trec = outer;
1391      StgTSO_trec(CurrentTSO) = trec;
1392     "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1393   }
1394   ASSERT(outer == NO_TREC);
1395
1396   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1397   if (r != 0) {
1398     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1399     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1400     Sp = frame;
1401     // Fix up the stack in the unregisterised case: the return convention is different.
1402     IF_NOT_REG_R1(Sp_adj(-2); 
1403                   Sp(1) = stg_NO_FINALIZER_closure;
1404                   Sp(0) = stg_ut_1_0_unreg_info;)
1405     R3 = trec; // passing to stmWaitUnblock()
1406     jump stg_block_stmwait;
1407   } else {
1408     // Transaction was not valid: retry immediately
1409     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1410     StgTSO_trec(CurrentTSO) = trec;
1411     R1 = StgAtomicallyFrame_code(frame);
1412     Sp = frame;
1413     jump stg_ap_v_fast;
1414   }
1415 }
1416
1417
1418 checkzh_fast
1419 {
1420   W_ trec, closure;
1421
1422   /* Args: R1 = invariant closure */
1423   MAYBE_GC (R1_PTR, checkzh_fast); 
1424
1425   trec = StgTSO_trec(CurrentTSO);
1426   closure = R1;
1427   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1428                                      trec "ptr",
1429                                      closure "ptr") [];
1430
1431   jump %ENTRY_CODE(Sp(0));
1432 }
1433
1434
1435 newTVarzh_fast
1436 {
1437   W_ tv;
1438   W_ new_value;
1439
1440   /* Args: R1 = initialisation value */
1441
1442   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1443   new_value = R1;
1444   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1445   RET_P(tv);
1446 }
1447
1448
1449 readTVarzh_fast
1450 {
1451   W_ trec;
1452   W_ tvar;
1453   W_ result;
1454
1455   /* Args: R1 = TVar closure */
1456
1457   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1458   trec = StgTSO_trec(CurrentTSO);
1459   tvar = R1;
1460   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1461
1462   RET_P(result);
1463 }
1464
1465
1466 writeTVarzh_fast
1467 {
1468   W_ trec;
1469   W_ tvar;
1470   W_ new_value;
1471   
1472   /* Args: R1 = TVar closure */
1473   /*       R2 = New value    */
1474
1475   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1476   trec = StgTSO_trec(CurrentTSO);
1477   tvar = R1;
1478   new_value = R2;
1479   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1480
1481   jump %ENTRY_CODE(Sp(0));
1482 }
1483
1484
1485 /* -----------------------------------------------------------------------------
1486  * MVar primitives
1487  *
1488  * take & putMVar work as follows.  Firstly, an important invariant:
1489  *
1490  *    If the MVar is full, then the blocking queue contains only
1491  *    threads blocked on putMVar, and if the MVar is empty then the
1492  *    blocking queue contains only threads blocked on takeMVar.
1493  *
1494  * takeMvar:
1495  *    MVar empty : then add ourselves to the blocking queue
1496  *    MVar full  : remove the value from the MVar, and
1497  *                 blocking queue empty     : return
1498  *                 blocking queue non-empty : perform the first blocked putMVar
1499  *                                            from the queue, and wake up the
1500  *                                            thread (MVar is now full again)
1501  *
1502  * putMVar is just the dual of the above algorithm.
1503  *
1504  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1505  * the stack of the thread waiting to do the putMVar.  See
1506  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1507  * the stack layout, and the PerformPut and PerformTake macros below.
1508  *
1509  * It is important that a blocked take or put is woken up with the
1510  * take/put already performed, because otherwise there would be a
1511  * small window of vulnerability where the thread could receive an
1512  * exception and never perform its take or put, and we'd end up with a
1513  * deadlock.
1514  *
1515  * -------------------------------------------------------------------------- */
1516
1517 isEmptyMVarzh_fast
1518 {
1519     /* args: R1 = MVar closure */
1520
1521     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1522         RET_N(1);
1523     } else {
1524         RET_N(0);
1525     }
1526 }
1527
1528 newMVarzh_fast
1529 {
1530     /* args: none */
1531     W_ mvar;
1532
1533     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1534   
1535     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1536     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1537     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1538     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1539     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1540     RET_P(mvar);
1541 }
1542
1543
1544 /* If R1 isn't available, pass it on the stack */
1545 #ifdef REG_R1
1546 #define PerformTake(tso, value)                         \
1547     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1548     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1549 #else
1550 #define PerformTake(tso, value)                                 \
1551     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1552     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1553 #endif
1554
1555 #define PerformPut(tso,lval)                    \
1556     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1557     lval = W_[StgTSO_sp(tso) - WDS(1)];
1558
1559 takeMVarzh_fast
1560 {
1561     W_ mvar, val, info, tso;
1562
1563     /* args: R1 = MVar closure */
1564     mvar = R1;
1565
1566 #if defined(THREADED_RTS)
1567     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1568 #else
1569     info = GET_INFO(mvar);
1570 #endif
1571
1572     /* If the MVar is empty, put ourselves on its blocking queue,
1573      * and wait until we're woken up.
1574      */
1575     if (info == stg_EMPTY_MVAR_info) {
1576         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1577             StgMVar_head(mvar) = CurrentTSO;
1578         } else {
1579             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1580         }
1581         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1582         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1583         StgTSO_block_info(CurrentTSO)  = mvar;
1584         StgMVar_tail(mvar) = CurrentTSO;
1585         
1586         jump stg_block_takemvar;
1587   }
1588
1589   /* we got the value... */
1590   val = StgMVar_value(mvar);
1591
1592   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1593   {
1594       /* There are putMVar(s) waiting... 
1595        * wake up the first thread on the queue
1596        */
1597       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1598
1599       /* actually perform the putMVar for the thread that we just woke up */
1600       tso = StgMVar_head(mvar);
1601       PerformPut(tso,StgMVar_value(mvar));
1602       dirtyTSO(tso);
1603
1604 #if defined(GRAN) || defined(PAR)
1605       /* ToDo: check 2nd arg (mvar) is right */
1606       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1607       StgMVar_head(mvar) = tso;
1608 #else
1609       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1610                                          StgMVar_head(mvar) "ptr") [];
1611       StgMVar_head(mvar) = tso;
1612 #endif
1613
1614       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1615           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1616       }
1617
1618 #if defined(THREADED_RTS)
1619       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1620 #endif
1621       RET_P(val);
1622   } 
1623   else
1624   {
1625       /* No further putMVars, MVar is now empty */
1626       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1627  
1628 #if defined(THREADED_RTS)
1629       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1630 #else
1631       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1632 #endif
1633
1634       RET_P(val);
1635   }
1636 }
1637
1638
1639 tryTakeMVarzh_fast
1640 {
1641     W_ mvar, val, info, tso;
1642
1643     /* args: R1 = MVar closure */
1644
1645     mvar = R1;
1646
1647 #if defined(THREADED_RTS)
1648     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1649 #else
1650     info = GET_INFO(mvar);
1651 #endif
1652
1653     if (info == stg_EMPTY_MVAR_info) {
1654 #if defined(THREADED_RTS)
1655         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1656 #endif
1657         /* HACK: we need a pointer to pass back, 
1658          * so we abuse NO_FINALIZER_closure
1659          */
1660         RET_NP(0, stg_NO_FINALIZER_closure);
1661     }
1662
1663     /* we got the value... */
1664     val = StgMVar_value(mvar);
1665
1666     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1667
1668         /* There are putMVar(s) waiting... 
1669          * wake up the first thread on the queue
1670          */
1671         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1672
1673         /* actually perform the putMVar for the thread that we just woke up */
1674         tso = StgMVar_head(mvar);
1675         PerformPut(tso,StgMVar_value(mvar));
1676         dirtyTSO(tso);
1677
1678 #if defined(GRAN) || defined(PAR)
1679         /* ToDo: check 2nd arg (mvar) is right */
1680         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1681         StgMVar_head(mvar) = tso;
1682 #else
1683         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1684                                            StgMVar_head(mvar) "ptr") [];
1685         StgMVar_head(mvar) = tso;
1686 #endif
1687
1688         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1689             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1690         }
1691 #if defined(THREADED_RTS)
1692         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1693 #endif
1694     }
1695     else 
1696     {
1697         /* No further putMVars, MVar is now empty */
1698         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1699 #if defined(THREADED_RTS)
1700         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1701 #else
1702         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1703 #endif
1704     }
1705     
1706     RET_NP(1, val);
1707 }
1708
1709
1710 putMVarzh_fast
1711 {
1712     W_ mvar, info, tso;
1713
1714     /* args: R1 = MVar, R2 = value */
1715     mvar = R1;
1716
1717 #if defined(THREADED_RTS)
1718     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1719 #else
1720     info = GET_INFO(mvar);
1721 #endif
1722
1723     if (info == stg_FULL_MVAR_info) {
1724         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1725             StgMVar_head(mvar) = CurrentTSO;
1726         } else {
1727             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1728         }
1729         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1730         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1731         StgTSO_block_info(CurrentTSO)  = mvar;
1732         StgMVar_tail(mvar) = CurrentTSO;
1733         
1734         jump stg_block_putmvar;
1735     }
1736   
1737     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1738
1739         /* There are takeMVar(s) waiting: wake up the first one
1740          */
1741         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1742
1743         /* actually perform the takeMVar */
1744         tso = StgMVar_head(mvar);
1745         PerformTake(tso, R2);
1746         dirtyTSO(tso);
1747       
1748 #if defined(GRAN) || defined(PAR)
1749         /* ToDo: check 2nd arg (mvar) is right */
1750         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1751         StgMVar_head(mvar) = tso;
1752 #else
1753         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1754         StgMVar_head(mvar) = tso;
1755 #endif
1756
1757         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1758             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1759         }
1760
1761 #if defined(THREADED_RTS)
1762         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1763 #endif
1764         jump %ENTRY_CODE(Sp(0));
1765     }
1766     else
1767     {
1768         /* No further takes, the MVar is now full. */
1769         StgMVar_value(mvar) = R2;
1770
1771 #if defined(THREADED_RTS)
1772         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1773 #else
1774         SET_INFO(mvar,stg_FULL_MVAR_info);
1775 #endif
1776         jump %ENTRY_CODE(Sp(0));
1777     }
1778     
1779     /* ToDo: yield afterward for better communication performance? */
1780 }
1781
1782
1783 tryPutMVarzh_fast
1784 {
1785     W_ mvar, info, tso;
1786
1787     /* args: R1 = MVar, R2 = value */
1788     mvar = R1;
1789
1790 #if defined(THREADED_RTS)
1791     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1792 #else
1793     info = GET_INFO(mvar);
1794 #endif
1795
1796     if (info == stg_FULL_MVAR_info) {
1797 #if defined(THREADED_RTS)
1798         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1799 #endif
1800         RET_N(0);
1801     }
1802   
1803     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1804
1805         /* There are takeMVar(s) waiting: wake up the first one
1806          */
1807         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1808         
1809         /* actually perform the takeMVar */
1810         tso = StgMVar_head(mvar);
1811         PerformTake(tso, R2);
1812         dirtyTSO(tso);
1813       
1814 #if defined(GRAN) || defined(PAR)
1815         /* ToDo: check 2nd arg (mvar) is right */
1816         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1817         StgMVar_head(mvar) = tso;
1818 #else
1819         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1820         StgMVar_head(mvar) = tso;
1821 #endif
1822
1823         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1824             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1825         }
1826
1827 #if defined(THREADED_RTS)
1828         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1829 #endif
1830     }
1831     else
1832     {
1833         /* No further takes, the MVar is now full. */
1834         StgMVar_value(mvar) = R2;
1835
1836 #if defined(THREADED_RTS)
1837         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1838 #else
1839         SET_INFO(mvar,stg_FULL_MVAR_info);
1840 #endif
1841     }
1842     
1843     RET_N(1);
1844     /* ToDo: yield afterward for better communication performance? */
1845 }
1846
1847
1848 /* -----------------------------------------------------------------------------
1849    Stable pointer primitives
1850    -------------------------------------------------------------------------  */
1851
1852 makeStableNamezh_fast
1853 {
1854     W_ index, sn_obj;
1855
1856     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1857   
1858     index = foreign "C" lookupStableName(R1 "ptr") [];
1859
1860     /* Is there already a StableName for this heap object?
1861      *  stable_ptr_table is a pointer to an array of snEntry structs.
1862      */
1863     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1864         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1865         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1866         StgStableName_sn(sn_obj) = index;
1867         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1868     } else {
1869         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1870     }
1871     
1872     RET_P(sn_obj);
1873 }
1874
1875
1876 makeStablePtrzh_fast
1877 {
1878     /* Args: R1 = a */
1879     W_ sp;
1880     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1881     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1882     RET_N(sp);
1883 }
1884
1885 deRefStablePtrzh_fast
1886 {
1887     /* Args: R1 = the stable ptr */
1888     W_ r, sp;
1889     sp = R1;
1890     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1891     RET_P(r);
1892 }
1893
1894 /* -----------------------------------------------------------------------------
1895    Bytecode object primitives
1896    -------------------------------------------------------------------------  */
1897
1898 newBCOzh_fast
1899 {
1900     /* R1 = instrs
1901        R2 = literals
1902        R3 = ptrs
1903        R4 = arity
1904        R5 = bitmap array
1905     */
1906     W_ bco, bitmap_arr, bytes, words;
1907     
1908     bitmap_arr = R5;
1909     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1910     bytes = WDS(words);
1911
1912     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1913
1914     bco = Hp - bytes + WDS(1);
1915     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1916     
1917     StgBCO_instrs(bco)     = R1;
1918     StgBCO_literals(bco)   = R2;
1919     StgBCO_ptrs(bco)       = R3;
1920     StgBCO_arity(bco)      = HALF_W_(R4);
1921     StgBCO_size(bco)       = HALF_W_(words);
1922     
1923     // Copy the arity/bitmap info into the BCO
1924     W_ i;
1925     i = 0;
1926 for:
1927     if (i < StgArrWords_words(bitmap_arr)) {
1928         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1929         i = i + 1;
1930         goto for;
1931     }
1932     
1933     RET_P(bco);
1934 }
1935
1936
1937 mkApUpd0zh_fast
1938 {
1939     // R1 = the BCO# for the AP
1940     //  
1941     W_ ap;
1942
1943     // This function is *only* used to wrap zero-arity BCOs in an
1944     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1945     // saturated and always points directly to a FUN or BCO.
1946     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1947            StgBCO_arity(R1) == HALF_W_(0));
1948
1949     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1950     TICK_ALLOC_UP_THK(0, 0);
1951     CCCS_ALLOC(SIZEOF_StgAP);
1952
1953     ap = Hp - SIZEOF_StgAP + WDS(1);
1954     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1955     
1956     StgAP_n_args(ap) = HALF_W_(0);
1957     StgAP_fun(ap) = R1;
1958     
1959     RET_P(ap);
1960 }
1961
1962 infoPtrzh_fast
1963 {
1964 /* args: R1 = closure to analyze */
1965    
1966   MAYBE_GC(R1_PTR, infoPtrzh_fast);
1967
1968   W_ info;
1969   info = %GET_STD_INFO(R1);
1970   RET_N(info);
1971 }
1972
1973 closurePayloadzh_fast
1974 {
1975 /* args: R1 = closure to analyze */
1976 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1977
1978     MAYBE_GC(R1_PTR, closurePayloadzh_fast);
1979
1980     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1981     info  = %GET_STD_INFO(R1);
1982     ptrs  = TO_W_(%INFO_PTRS(info)); 
1983     nptrs = TO_W_(%INFO_NPTRS(info));
1984     p = 0;
1985
1986     ALLOC_PRIM (SIZEOF_StgMutArrPtrs + WDS(ptrs), R1_PTR, closurePayloadzh_fast);
1987     ptrs_arr = Hp - SIZEOF_StgMutArrPtrs - WDS(ptrs) + WDS(1);
1988     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1989     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1990 for:
1991     if(p < ptrs) {
1992          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
1993          p = p + 1;
1994          goto for;
1995     }
1996     
1997     ALLOC_PRIM (SIZEOF_StgArrWords + WDS(nptrs), R1_PTR, closurePayloadzh_fast);
1998     nptrs_arr = Hp - SIZEOF_StgArrWords - WDS(nptrs) + WDS(1);
1999     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2000     StgArrWords_words(nptrs_arr) = nptrs;
2001     p = 0;
2002 for2:
2003     if(p < nptrs) {
2004          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
2005          p = p + 1;
2006          goto for2;
2007     }
2008     RET_PP(ptrs_arr, nptrs_arr);
2009 }
2010
2011 /* -----------------------------------------------------------------------------
2012    Thread I/O blocking primitives
2013    -------------------------------------------------------------------------- */
2014
2015 /* Add a thread to the end of the blocked queue. (C-- version of the C
2016  * macro in Schedule.h).
2017  */
2018 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2019     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
2020     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2021       W_[blocked_queue_hd] = tso;                       \
2022     } else {                                            \
2023       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
2024     }                                                   \
2025     W_[blocked_queue_tl] = tso;
2026
2027 waitReadzh_fast
2028 {
2029     /* args: R1 */
2030 #ifdef THREADED_RTS
2031     foreign "C" barf("waitRead# on threaded RTS");
2032 #else
2033
2034     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2035     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2036     StgTSO_block_info(CurrentTSO) = R1;
2037     // No locking - we're not going to use this interface in the
2038     // threaded RTS anyway.
2039     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2040     jump stg_block_noregs;
2041 #endif
2042 }
2043
2044 waitWritezh_fast
2045 {
2046     /* args: R1 */
2047 #ifdef THREADED_RTS
2048     foreign "C" barf("waitWrite# on threaded RTS");
2049 #else
2050
2051     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2052     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2053     StgTSO_block_info(CurrentTSO) = R1;
2054     // No locking - we're not going to use this interface in the
2055     // threaded RTS anyway.
2056     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2057     jump stg_block_noregs;
2058 #endif
2059 }
2060
2061
2062 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2063 delayzh_fast
2064 {
2065 #ifdef mingw32_HOST_OS
2066     W_ ares;
2067     CInt reqID;
2068 #else
2069     W_ t, prev, target;
2070 #endif
2071
2072 #ifdef THREADED_RTS
2073     foreign "C" barf("delay# on threaded RTS");
2074 #else
2075
2076     /* args: R1 (microsecond delay amount) */
2077     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2078     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2079
2080 #ifdef mingw32_HOST_OS
2081
2082     /* could probably allocate this on the heap instead */
2083     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2084                                             stg_delayzh_malloc_str);
2085     reqID = foreign "C" addDelayRequest(R1);
2086     StgAsyncIOResult_reqID(ares)   = reqID;
2087     StgAsyncIOResult_len(ares)     = 0;
2088     StgAsyncIOResult_errCode(ares) = 0;
2089     StgTSO_block_info(CurrentTSO)  = ares;
2090
2091     /* Having all async-blocked threads reside on the blocked_queue
2092      * simplifies matters, so change the status to OnDoProc put the
2093      * delayed thread on the blocked_queue.
2094      */
2095     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2096     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2097     jump stg_block_async_void;
2098
2099 #else
2100
2101     W_ time;
2102     W_ divisor;
2103     time = foreign "C" getourtimeofday() [R1];
2104     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2105     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2106            + time + 1; /* Add 1 as getourtimeofday rounds down */
2107     StgTSO_block_info(CurrentTSO) = target;
2108
2109     /* Insert the new thread in the sleeping queue. */
2110     prev = NULL;
2111     t = W_[sleeping_queue];
2112 while:
2113     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2114         prev = t;
2115         t = StgTSO_link(t);
2116         goto while;
2117     }
2118
2119     StgTSO_link(CurrentTSO) = t;
2120     if (prev == NULL) {
2121         W_[sleeping_queue] = CurrentTSO;
2122     } else {
2123         StgTSO_link(prev) = CurrentTSO;
2124     }
2125     jump stg_block_noregs;
2126 #endif
2127 #endif /* !THREADED_RTS */
2128 }
2129
2130
2131 #ifdef mingw32_HOST_OS
2132 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2133 asyncReadzh_fast
2134 {
2135     W_ ares;
2136     CInt reqID;
2137
2138 #ifdef THREADED_RTS
2139     foreign "C" barf("asyncRead# on threaded RTS");
2140 #else
2141
2142     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2143     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2144     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2145
2146     /* could probably allocate this on the heap instead */
2147     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2148                                             stg_asyncReadzh_malloc_str)
2149                         [R1,R2,R3,R4];
2150     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2151     StgAsyncIOResult_reqID(ares)   = reqID;
2152     StgAsyncIOResult_len(ares)     = 0;
2153     StgAsyncIOResult_errCode(ares) = 0;
2154     StgTSO_block_info(CurrentTSO)  = ares;
2155     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2156     jump stg_block_async;
2157 #endif
2158 }
2159
2160 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2161 asyncWritezh_fast
2162 {
2163     W_ ares;
2164     CInt reqID;
2165
2166 #ifdef THREADED_RTS
2167     foreign "C" barf("asyncWrite# on threaded RTS");
2168 #else
2169
2170     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2171     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2172     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2173
2174     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2175                                             stg_asyncWritezh_malloc_str)
2176                         [R1,R2,R3,R4];
2177     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2178
2179     StgAsyncIOResult_reqID(ares)   = reqID;
2180     StgAsyncIOResult_len(ares)     = 0;
2181     StgAsyncIOResult_errCode(ares) = 0;
2182     StgTSO_block_info(CurrentTSO)  = ares;
2183     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2184     jump stg_block_async;
2185 #endif
2186 }
2187
2188 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2189 asyncDoProczh_fast
2190 {
2191     W_ ares;
2192     CInt reqID;
2193
2194 #ifdef THREADED_RTS
2195     foreign "C" barf("asyncDoProc# on threaded RTS");
2196 #else
2197
2198     /* args: R1 = proc, R2 = param */
2199     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2200     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2201
2202     /* could probably allocate this on the heap instead */
2203     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2204                                             stg_asyncDoProczh_malloc_str) 
2205                                 [R1,R2];
2206     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2207     StgAsyncIOResult_reqID(ares)   = reqID;
2208     StgAsyncIOResult_len(ares)     = 0;
2209     StgAsyncIOResult_errCode(ares) = 0;
2210     StgTSO_block_info(CurrentTSO) = ares;
2211     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2212     jump stg_block_async;
2213 #endif
2214 }
2215 #endif
2216
2217 /* -----------------------------------------------------------------------------
2218   ** temporary **
2219
2220    classes CCallable and CReturnable don't really exist, but the
2221    compiler insists on generating dictionaries containing references
2222    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2223    for these.  Some C compilers can't cope with zero-length static arrays,
2224    so we have to make these one element long.
2225   --------------------------------------------------------------------------- */
2226
2227 section "rodata" {
2228   GHC_ZCCCallable_static_info:   W_ 0;
2229 }
2230
2231 section "rodata" {
2232   GHC_ZCCReturnable_static_info: W_ 0;
2233 }