fix via-C compilation: import ghczmprim_GHCziBool_False_closure
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 newPinnedByteArrayzh_fast
87 {
88     W_ words, payload_words, n, p;
89
90     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
91     n = R1;
92     payload_words = ROUNDUP_BYTES_TO_WDS(n);
93
94     // We want an 8-byte aligned array.  allocatePinned() gives us
95     // 8-byte aligned memory by default, but we want to align the
96     // *goods* inside the ArrWords object, so we have to check the
97     // size of the ArrWords header and adjust our size accordingly.
98     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
99     if ((SIZEOF_StgArrWords & 7) != 0) {
100         words = words + 1;
101     }
102
103     ("ptr" p) = foreign "C" allocatePinned(words) [];
104     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
105
106     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
107     // have to push the object forward one word so that the goods
108     // fall on an 8-byte boundary.
109     if ((SIZEOF_StgArrWords & 7) != 0) {
110         p = p + WDS(1);
111     }
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newArrayzh_fast
119 {
120     W_ words, n, init, arr, p;
121     /* Args: R1 = words, R2 = initialisation value */
122
123     n = R1;
124     MAYBE_GC(R2_PTR,newArrayzh_fast);
125
126     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
127     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
128     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
129
130     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
131     StgMutArrPtrs_ptrs(arr) = n;
132
133     // Initialise all elements of the the array with the value in R2
134     init = R2;
135     p = arr + SIZEOF_StgMutArrPtrs;
136   for:
137     if (p < arr + WDS(words)) {
138         W_[p] = init;
139         p = p + WDS(1);
140         goto for;
141     }
142
143     RET_P(arr);
144 }
145
146 unsafeThawArrayzh_fast
147 {
148   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
149   //
150   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
151   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
152   // it on the mutable list for the GC to remove (removing something from
153   // the mutable list is not easy, because the mut_list is only singly-linked).
154   // 
155   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
156   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
157   // to indicate that it is still on the mutable list.
158   //
159   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
160   // either it is on a mut_list, or it isn't.  We adopt the convention that
161   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
162   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
163   // we put it on the mutable list more than once, but it would get scavenged
164   // multiple times during GC, which would be unnecessarily slow.
165   //
166   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
167         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
168         recordMutable(R1, R1);
169         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
170         RET_P(R1);
171   } else {
172         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
173         RET_P(R1);
174   }
175 }
176
177 /* -----------------------------------------------------------------------------
178    MutVar primitives
179    -------------------------------------------------------------------------- */
180
181 newMutVarzh_fast
182 {
183     W_ mv;
184     /* Args: R1 = initialisation value */
185
186     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
187
188     mv = Hp - SIZEOF_StgMutVar + WDS(1);
189     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
190     StgMutVar_var(mv) = R1;
191     
192     RET_P(mv);
193 }
194
195 atomicModifyMutVarzh_fast
196 {
197     W_ mv, f, z, x, y, r, h;
198     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
199
200     /* If x is the current contents of the MutVar#, then 
201        We want to make the new contents point to
202
203          (sel_0 (f x))
204  
205        and the return value is
206          
207          (sel_1 (f x))
208
209         obviously we can share (f x).
210
211          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
212          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
213          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
214     */
215
216 #if MIN_UPD_SIZE > 1
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
219 #else
220 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
221 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
222 #endif
223
224 #if MIN_UPD_SIZE > 2
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
227 #else
228 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
229 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
230 #endif
231
232 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
233
234    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
235
236    mv = R1;
237    f = R2;
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = f;
245
246    TICK_ALLOC_THUNK_1();
247    CCCS_ALLOC(THUNK_1_SIZE);
248    y = z - THUNK_1_SIZE;
249    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
250    LDV_RECORD_CREATE(y);
251    StgThunk_payload(y,0) = z;
252
253    TICK_ALLOC_THUNK_1();
254    CCCS_ALLOC(THUNK_1_SIZE);
255    r = y - THUNK_1_SIZE;
256    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
257    LDV_RECORD_CREATE(r);
258    StgThunk_payload(r,0) = z;
259
260  retry:
261    x = StgMutVar_var(mv);
262    StgThunk_payload(z,1) = x;
263 #ifdef THREADED_RTS
264    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
265    if (h != x) { goto retry; }
266 #else
267    StgMutVar_var(mv) = y;
268 #endif
269
270    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
271      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
272    }
273
274    RET_P(r);
275 }
276
277 /* -----------------------------------------------------------------------------
278    Weak Pointer Primitives
279    -------------------------------------------------------------------------- */
280
281 STRING(stg_weak_msg,"New weak pointer at %p\n")
282
283 mkWeakzh_fast
284 {
285   /* R1 = key
286      R2 = value
287      R3 = finalizer (or NULL)
288   */
289   W_ w;
290
291   if (R3 == NULL) {
292     R3 = stg_NO_FINALIZER_closure;
293   }
294
295   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
296
297   w = Hp - SIZEOF_StgWeak + WDS(1);
298   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
299
300   StgWeak_key(w)       = R1;
301   StgWeak_value(w)     = R2;
302   StgWeak_finalizer(w) = R3;
303
304   StgWeak_link(w)       = W_[weak_ptr_list];
305   W_[weak_ptr_list]     = w;
306
307   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
308
309   RET_P(w);
310 }
311
312
313 finalizzeWeakzh_fast
314 {
315   /* R1 = weak ptr
316    */
317   W_ w, f;
318
319   w = R1;
320
321   // already dead?
322   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
323       RET_NP(0,stg_NO_FINALIZER_closure);
324   }
325
326   // kill it
327 #ifdef PROFILING
328   // @LDV profiling
329   // A weak pointer is inherently used, so we do not need to call
330   // LDV_recordDead_FILL_SLOP_DYNAMIC():
331   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
332   // or, LDV_recordDead():
333   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
334   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
335   // large as weak pointers, so there is no need to fill the slop, either.
336   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
337 #endif
338
339   //
340   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
341   //
342   SET_INFO(w,stg_DEAD_WEAK_info);
343   LDV_RECORD_CREATE(w);
344
345   f = StgWeak_finalizer(w);
346   StgDeadWeak_link(w) = StgWeak_link(w);
347
348   /* return the finalizer */
349   if (f == stg_NO_FINALIZER_closure) {
350       RET_NP(0,stg_NO_FINALIZER_closure);
351   } else {
352       RET_NP(1,f);
353   }
354 }
355
356 deRefWeakzh_fast
357 {
358   /* R1 = weak ptr */
359   W_ w, code, val;
360
361   w = R1;
362   if (GET_INFO(w) == stg_WEAK_info) {
363     code = 1;
364     val = StgWeak_value(w);
365   } else {
366     code = 0;
367     val = w;
368   }
369   RET_NP(code,val);
370 }
371
372 /* -----------------------------------------------------------------------------
373    Arbitrary-precision Integer operations.
374
375    There are some assumptions in this code that mp_limb_t == W_.  This is
376    the case for all the platforms that GHC supports, currently.
377    -------------------------------------------------------------------------- */
378
379 int2Integerzh_fast
380 {
381    /* arguments: R1 = Int# */
382
383    W_ val, s, p;        /* to avoid aliasing */
384
385    val = R1;
386    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
387
388    p = Hp - SIZEOF_StgArrWords;
389    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
390    StgArrWords_words(p) = 1;
391
392    /* mpz_set_si is inlined here, makes things simpler */
393    if (%lt(val,0)) { 
394         s  = -1;
395         Hp(0) = -val;
396    } else { 
397      if (%gt(val,0)) {
398         s = 1;
399         Hp(0) = val;
400      } else {
401         s = 0;
402      }
403   }
404
405    /* returns (# size  :: Int#, 
406                  data  :: ByteArray# 
407                #)
408    */
409    RET_NP(s,p);
410 }
411
412 word2Integerzh_fast
413 {
414    /* arguments: R1 = Word# */
415
416    W_ val, s, p;        /* to avoid aliasing */
417
418    val = R1;
419
420    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
421
422    p = Hp - SIZEOF_StgArrWords;
423    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
424    StgArrWords_words(p) = 1;
425
426    if (val != 0) {
427         s = 1;
428         W_[Hp] = val;
429    } else {
430         s = 0;
431    }
432
433    /* returns (# size  :: Int#, 
434                  data  :: ByteArray# #)
435    */
436    RET_NP(s,p);
437 }
438
439
440 /*
441  * 'long long' primops for converting to/from Integers.
442  */
443
444 #ifdef SUPPORT_LONG_LONGS
445
446 int64ToIntegerzh_fast
447 {
448    /* arguments: L1 = Int64# */
449
450    L_ val;
451    W_ hi, lo, s, neg, words_needed, p;
452
453    val = L1;
454    neg = 0;
455
456    hi = TO_W_(val >> 32);
457    lo = TO_W_(val);
458
459    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
460        // minimum is one word
461        words_needed = 1;
462    } else { 
463        words_needed = 2;
464    }
465
466    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
467                NO_PTRS, int64ToIntegerzh_fast );
468
469    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
470    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
471    StgArrWords_words(p) = words_needed;
472
473    if ( %lt(hi,0) ) {
474      neg = 1;
475      lo = -lo;
476      if(lo == 0) {
477        hi = -hi;
478      } else {
479        hi = -hi - 1;
480      }
481    }
482
483    if ( words_needed == 2 )  { 
484       s = 2;
485       Hp(-1) = lo;
486       Hp(0) = hi;
487    } else { 
488        if ( lo != 0 ) {
489            s = 1;
490            Hp(0) = lo;
491        } else /* val==0 */  {
492            s = 0;
493        }
494    }
495    if ( neg != 0 ) {
496         s = -s;
497    }
498
499    /* returns (# size  :: Int#, 
500                  data  :: ByteArray# #)
501    */
502    RET_NP(s,p);
503 }
504 word64ToIntegerzh_fast
505 {
506    /* arguments: L1 = Word64# */
507
508    L_ val;
509    W_ hi, lo, s, words_needed, p;
510
511    val = L1;
512    hi = TO_W_(val >> 32);
513    lo = TO_W_(val);
514
515    if ( hi != 0 ) {
516       words_needed = 2;
517    } else {
518       words_needed = 1;
519    }
520
521    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
522                NO_PTRS, word64ToIntegerzh_fast );
523
524    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
525    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
526    StgArrWords_words(p) = words_needed;
527
528    if ( hi != 0 ) { 
529      s = 2;
530      Hp(-1) = lo;
531      Hp(0)  = hi;
532    } else {
533       if ( lo != 0 ) {
534         s = 1;
535         Hp(0) = lo;
536      } else /* val==0 */  {
537       s = 0;
538      }
539   }
540
541    /* returns (# size  :: Int#, 
542                  data  :: ByteArray# #)
543    */
544    RET_NP(s,p);
545 }
546
547
548
549 #endif /* SUPPORT_LONG_LONGS */
550
551 /* ToDo: this is shockingly inefficient */
552
553 #ifndef THREADED_RTS
554 section "bss" {
555   mp_tmp1:
556     bits8 [SIZEOF_MP_INT];
557 }
558
559 section "bss" {
560   mp_tmp2:
561     bits8 [SIZEOF_MP_INT];
562 }
563
564 section "bss" {
565   mp_result1:
566     bits8 [SIZEOF_MP_INT];
567 }
568
569 section "bss" {
570   mp_result2:
571     bits8 [SIZEOF_MP_INT];
572 }
573 #endif
574
575 #ifdef THREADED_RTS
576 #define FETCH_MP_TEMP(X) \
577 W_ X; \
578 X = BaseReg + (OFFSET_StgRegTable_r ## X);
579 #else
580 #define FETCH_MP_TEMP(X) /* Nothing */
581 #endif
582
583 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
584 name                                                                    \
585 {                                                                       \
586   CInt s1, s2;                                                          \
587   W_ d1, d2;                                                            \
588   FETCH_MP_TEMP(mp_tmp1);                                               \
589   FETCH_MP_TEMP(mp_tmp2);                                               \
590   FETCH_MP_TEMP(mp_result1)                                             \
591   FETCH_MP_TEMP(mp_result2);                                            \
592                                                                         \
593   /* call doYouWantToGC() */                                            \
594   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
595                                                                         \
596   s1 = W_TO_INT(R1);                                                    \
597   d1 = R2;                                                              \
598   s2 = W_TO_INT(R3);                                                    \
599   d2 = R4;                                                              \
600                                                                         \
601   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
602   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
603   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
604   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
605   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
606   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
607                                                                         \
608   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
609                                                                         \
610   /* Perform the operation */                                           \
611   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
612                                                                         \
613   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
614          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
615 }
616
617 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
618 name                                                                    \
619 {                                                                       \
620   CInt s1;                                                              \
621   W_ d1;                                                                \
622   FETCH_MP_TEMP(mp_tmp1);                                               \
623   FETCH_MP_TEMP(mp_result1)                                             \
624                                                                         \
625   /* call doYouWantToGC() */                                            \
626   MAYBE_GC(R2_PTR, name);                                               \
627                                                                         \
628   d1 = R2;                                                              \
629   s1 = W_TO_INT(R1);                                                    \
630                                                                         \
631   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
632   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
633   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
634                                                                         \
635   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
636                                                                         \
637   /* Perform the operation */                                           \
638   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
639                                                                         \
640   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
641          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
642 }
643
644 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
645 name                                                                                    \
646 {                                                                                       \
647   CInt s1, s2;                                                                          \
648   W_ d1, d2;                                                                            \
649   FETCH_MP_TEMP(mp_tmp1);                                                               \
650   FETCH_MP_TEMP(mp_tmp2);                                                               \
651   FETCH_MP_TEMP(mp_result1)                                                             \
652   FETCH_MP_TEMP(mp_result2)                                                             \
653                                                                                         \
654   /* call doYouWantToGC() */                                                            \
655   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
656                                                                                         \
657   s1 = W_TO_INT(R1);                                                                    \
658   d1 = R2;                                                                              \
659   s2 = W_TO_INT(R3);                                                                    \
660   d2 = R4;                                                                              \
661                                                                                         \
662   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
663   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
664   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
665   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
666   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
667   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
668                                                                                         \
669   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
670   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
671                                                                                         \
672   /* Perform the operation */                                                           \
673   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
674                                                                                         \
675   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
676            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
677            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
678            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
679 }
680
681 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
682 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
683 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
684 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
685 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
686 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
687 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
688 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
689 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
690 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
691 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
692
693 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
694 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
695
696 #ifndef THREADED_RTS
697 section "bss" {
698   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
699 }
700 #endif
701
702 gcdIntzh_fast
703 {
704     /* R1 = the first Int#; R2 = the second Int# */
705     W_ r; 
706     FETCH_MP_TEMP(mp_tmp_w);
707
708     W_[mp_tmp_w] = R1;
709     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
710
711     R1 = r;
712     /* Result parked in R1, return via info-pointer at TOS */
713     jump %ENTRY_CODE(Sp(0));
714 }
715
716
717 gcdIntegerIntzh_fast
718 {
719     /* R1 = s1; R2 = d1; R3 = the int */
720     W_ s1;
721     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
722     R1 = s1;
723     
724     /* Result parked in R1, return via info-pointer at TOS */
725     jump %ENTRY_CODE(Sp(0));
726 }
727
728
729 cmpIntegerIntzh_fast
730 {
731     /* R1 = s1; R2 = d1; R3 = the int */
732     W_ usize, vsize, v_digit, u_digit;
733
734     usize = R1;
735     vsize = 0;
736     v_digit = R3;
737
738     // paraphrased from __gmpz_cmp_si() in the GMP sources
739     if (%gt(v_digit,0)) {
740         vsize = 1;
741     } else { 
742         if (%lt(v_digit,0)) {
743             vsize = -1;
744             v_digit = -v_digit;
745         }
746     }
747
748     if (usize != vsize) {
749         R1 = usize - vsize; 
750         jump %ENTRY_CODE(Sp(0));
751     }
752
753     if (usize == 0) {
754         R1 = 0; 
755         jump %ENTRY_CODE(Sp(0));
756     }
757
758     u_digit = W_[BYTE_ARR_CTS(R2)];
759
760     if (u_digit == v_digit) {
761         R1 = 0; 
762         jump %ENTRY_CODE(Sp(0));
763     }
764
765     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
766         R1 = usize; 
767     } else {
768         R1 = -usize; 
769     }
770
771     jump %ENTRY_CODE(Sp(0));
772 }
773
774 cmpIntegerzh_fast
775 {
776     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
777     W_ usize, vsize, size, up, vp;
778     CInt cmp;
779
780     // paraphrased from __gmpz_cmp() in the GMP sources
781     usize = R1;
782     vsize = R3;
783
784     if (usize != vsize) {
785         R1 = usize - vsize; 
786         jump %ENTRY_CODE(Sp(0));
787     }
788
789     if (usize == 0) {
790         R1 = 0; 
791         jump %ENTRY_CODE(Sp(0));
792     }
793
794     if (%lt(usize,0)) { // NB. not <, which is unsigned
795         size = -usize;
796     } else {
797         size = usize;
798     }
799
800     up = BYTE_ARR_CTS(R2);
801     vp = BYTE_ARR_CTS(R4);
802
803     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
804
805     if (cmp == 0 :: CInt) {
806         R1 = 0; 
807         jump %ENTRY_CODE(Sp(0));
808     }
809
810     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
811         R1 = 1;
812     } else {
813         R1 = (-1); 
814     }
815     /* Result parked in R1, return via info-pointer at TOS */
816     jump %ENTRY_CODE(Sp(0));
817 }
818
819 integer2Intzh_fast
820 {
821     /* R1 = s; R2 = d */
822     W_ r, s;
823
824     s = R1;
825     if (s == 0) {
826         r = 0;
827     } else {
828         r = W_[R2 + SIZEOF_StgArrWords];
829         if (%lt(s,0)) {
830             r = -r;
831         }
832     }
833     /* Result parked in R1, return via info-pointer at TOS */
834     R1 = r;
835     jump %ENTRY_CODE(Sp(0));
836 }
837
838 integer2Wordzh_fast
839 {
840   /* R1 = s; R2 = d */
841   W_ r, s;
842
843   s = R1;
844   if (s == 0) {
845     r = 0;
846   } else {
847     r = W_[R2 + SIZEOF_StgArrWords];
848     if (%lt(s,0)) {
849         r = -r;
850     }
851   }
852   /* Result parked in R1, return via info-pointer at TOS */
853   R1 = r;
854   jump %ENTRY_CODE(Sp(0));
855 }
856
857 decodeFloatzh_fast
858
859     W_ p;
860     F_ arg;
861     FETCH_MP_TEMP(mp_tmp1);
862     FETCH_MP_TEMP(mp_tmp_w);
863     
864     /* arguments: F1 = Float# */
865     arg = F1;
866     
867     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
868     
869     /* Be prepared to tell Lennart-coded __decodeFloat
870        where mantissa._mp_d can be put (it does not care about the rest) */
871     p = Hp - SIZEOF_StgArrWords;
872     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
873     StgArrWords_words(p) = 1;
874     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
875     
876     /* Perform the operation */
877     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
878     
879     /* returns: (Int# (expn), Int#, ByteArray#) */
880     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
881 }
882
883 decodeFloatzuIntzh_fast
884
885     W_ p;
886     F_ arg;
887     FETCH_MP_TEMP(mp_tmp1);
888     FETCH_MP_TEMP(mp_tmp_w);
889     
890     /* arguments: F1 = Float# */
891     arg = F1;
892     
893     /* Perform the operation */
894     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
895     
896     /* returns: (Int# (mantissa), Int# (exponent)) */
897     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
898 }
899
900 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
901 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
902
903 decodeDoublezh_fast
904
905     D_ arg;
906     W_ p;
907     FETCH_MP_TEMP(mp_tmp1);
908     FETCH_MP_TEMP(mp_tmp_w);
909
910     /* arguments: D1 = Double# */
911     arg = D1;
912
913     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
914     
915     /* Be prepared to tell Lennart-coded __decodeDouble
916        where mantissa.d can be put (it does not care about the rest) */
917     p = Hp - ARR_SIZE + WDS(1);
918     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
919     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
920     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
921
922     /* Perform the operation */
923     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
924     
925     /* returns: (Int# (expn), Int#, ByteArray#) */
926     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
927 }
928
929 decodeDoublezu2Intzh_fast
930
931     D_ arg;
932     W_ p;
933     FETCH_MP_TEMP(mp_tmp1);
934     FETCH_MP_TEMP(mp_tmp2);
935     FETCH_MP_TEMP(mp_result1);
936     FETCH_MP_TEMP(mp_result2);
937
938     /* arguments: D1 = Double# */
939     arg = D1;
940
941     /* Perform the operation */
942     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
943                                     mp_result1 "ptr", mp_result2 "ptr",
944                                     arg) [];
945
946     /* returns:
947        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
948     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
949 }
950
951 /* -----------------------------------------------------------------------------
952  * Concurrency primitives
953  * -------------------------------------------------------------------------- */
954
955 forkzh_fast
956 {
957   /* args: R1 = closure to spark */
958
959   MAYBE_GC(R1_PTR, forkzh_fast);
960
961   W_ closure;
962   W_ threadid;
963   closure = R1;
964
965   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
966                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
967                                 closure "ptr") [];
968
969   /* start blocked if the current thread is blocked */
970   StgTSO_flags(threadid) = 
971      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
972                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
973
974   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
975
976   // switch at the earliest opportunity
977   Capability_context_switch(MyCapability()) = 1 :: CInt;
978   
979   RET_P(threadid);
980 }
981
982 forkOnzh_fast
983 {
984   /* args: R1 = cpu, R2 = closure to spark */
985
986   MAYBE_GC(R2_PTR, forkOnzh_fast);
987
988   W_ cpu;
989   W_ closure;
990   W_ threadid;
991   cpu = R1;
992   closure = R2;
993
994   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
995                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
996                                 closure "ptr") [];
997
998   /* start blocked if the current thread is blocked */
999   StgTSO_flags(threadid) = 
1000      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1001                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1002
1003   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1004
1005   // switch at the earliest opportunity
1006   Capability_context_switch(MyCapability()) = 1 :: CInt;
1007   
1008   RET_P(threadid);
1009 }
1010
1011 yieldzh_fast
1012 {
1013   jump stg_yield_noregs;
1014 }
1015
1016 myThreadIdzh_fast
1017 {
1018   /* no args. */
1019   RET_P(CurrentTSO);
1020 }
1021
1022 labelThreadzh_fast
1023 {
1024   /* args: 
1025         R1 = ThreadId#
1026         R2 = Addr# */
1027 #ifdef DEBUG
1028   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1029 #endif
1030   jump %ENTRY_CODE(Sp(0));
1031 }
1032
1033 isCurrentThreadBoundzh_fast
1034 {
1035   /* no args */
1036   W_ r;
1037   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1038   RET_N(r);
1039 }
1040
1041 threadStatuszh_fast
1042 {
1043     /* args: R1 :: ThreadId# */
1044     W_ tso;
1045     W_ why_blocked;
1046     W_ what_next;
1047     W_ ret;
1048
1049     tso = R1;
1050     loop:
1051       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1052           tso = StgTSO__link(tso);
1053           goto loop;
1054       }
1055
1056     what_next   = TO_W_(StgTSO_what_next(tso));
1057     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1058     // Note: these two reads are not atomic, so they might end up
1059     // being inconsistent.  It doesn't matter, since we
1060     // only return one or the other.  If we wanted to return the
1061     // contents of block_info too, then we'd have to do some synchronisation.
1062
1063     if (what_next == ThreadComplete) {
1064         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1065     } else {
1066         if (what_next == ThreadKilled) {
1067             ret = 17;
1068         } else {
1069             ret = why_blocked;
1070         }
1071     }
1072     RET_N(ret);
1073 }
1074
1075 /* -----------------------------------------------------------------------------
1076  * TVar primitives
1077  * -------------------------------------------------------------------------- */
1078
1079 #define SP_OFF 0
1080
1081 // Catch retry frame ------------------------------------------------------------
1082
1083 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1084 #if defined(PROFILING)
1085   W_ unused1, W_ unused2,
1086 #endif
1087   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1088 {
1089    W_ r, frame, trec, outer;
1090
1091    frame = Sp;
1092    trec = StgTSO_trec(CurrentTSO);
1093    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1094    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1095    if (r != 0) {
1096      /* Succeeded (either first branch or second branch) */
1097      StgTSO_trec(CurrentTSO) = outer;
1098      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1099      jump %ENTRY_CODE(Sp(SP_OFF));
1100    } else {
1101      /* Did not commit: re-execute */
1102      W_ new_trec;
1103      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1104      StgTSO_trec(CurrentTSO) = new_trec;
1105      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1106        R1 = StgCatchRetryFrame_alt_code(frame);
1107      } else {
1108        R1 = StgCatchRetryFrame_first_code(frame);
1109      }
1110      jump stg_ap_v_fast;
1111    }
1112 }
1113
1114
1115 // Atomically frame ------------------------------------------------------------
1116
1117 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1118 #if defined(PROFILING)
1119   W_ unused1, W_ unused2,
1120 #endif
1121   "ptr" W_ unused3, "ptr" W_ unused4)
1122 {
1123   W_ frame, trec, valid, next_invariant, q, outer;
1124
1125   frame = Sp;
1126   trec = StgTSO_trec(CurrentTSO);
1127   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1128
1129   if (outer == NO_TREC) {
1130     /* First time back at the atomically frame -- pick up invariants */
1131     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1132     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1133
1134   } else {
1135     /* Second/subsequent time back at the atomically frame -- abort the
1136      * tx that's checking the invariant and move on to the next one */
1137     StgTSO_trec(CurrentTSO) = outer;
1138     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1139     StgInvariantCheckQueue_my_execution(q) = trec;
1140     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1141     /* Don't free trec -- it's linked from q and will be stashed in the
1142      * invariant if we eventually commit. */
1143     q = StgInvariantCheckQueue_next_queue_entry(q);
1144     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1145     trec = outer;
1146   }
1147
1148   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1149
1150   if (q != END_INVARIANT_CHECK_QUEUE) {
1151     /* We can't commit yet: another invariant to check */
1152     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1153     StgTSO_trec(CurrentTSO) = trec;
1154
1155     next_invariant = StgInvariantCheckQueue_invariant(q);
1156     R1 = StgAtomicInvariant_code(next_invariant);
1157     jump stg_ap_v_fast;
1158
1159   } else {
1160
1161     /* We've got no more invariants to check, try to commit */
1162     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1163     if (valid != 0) {
1164       /* Transaction was valid: commit succeeded */
1165       StgTSO_trec(CurrentTSO) = NO_TREC;
1166       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1167       jump %ENTRY_CODE(Sp(SP_OFF));
1168     } else {
1169       /* Transaction was not valid: try again */
1170       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1171       StgTSO_trec(CurrentTSO) = trec;
1172       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1173       R1 = StgAtomicallyFrame_code(frame);
1174       jump stg_ap_v_fast;
1175     }
1176   }
1177 }
1178
1179 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1180 #if defined(PROFILING)
1181   W_ unused1, W_ unused2,
1182 #endif
1183   "ptr" W_ unused3, "ptr" W_ unused4)
1184 {
1185   W_ frame, trec, valid;
1186
1187   frame = Sp;
1188
1189   /* The TSO is currently waiting: should we stop waiting? */
1190   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1191   if (valid != 0) {
1192     /* Previous attempt is still valid: no point trying again yet */
1193     jump stg_block_noregs;
1194   } else {
1195     /* Previous attempt is no longer valid: try again */
1196     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1197     StgTSO_trec(CurrentTSO) = trec;
1198     StgHeader_info(frame) = stg_atomically_frame_info;
1199     R1 = StgAtomicallyFrame_code(frame);
1200     jump stg_ap_v_fast;
1201   }
1202 }
1203
1204 // STM catch frame --------------------------------------------------------------
1205
1206 #define SP_OFF 0
1207
1208 /* Catch frames are very similar to update frames, but when entering
1209  * one we just pop the frame off the stack and perform the correct
1210  * kind of return to the activation record underneath us on the stack.
1211  */
1212
1213 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1214 #if defined(PROFILING)
1215   W_ unused1, W_ unused2,
1216 #endif
1217   "ptr" W_ unused3, "ptr" W_ unused4)
1218    {
1219       W_ r, frame, trec, outer;
1220       frame = Sp;
1221       trec = StgTSO_trec(CurrentTSO);
1222       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1223       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1224       if (r != 0) {
1225         /* Commit succeeded */
1226         StgTSO_trec(CurrentTSO) = outer;
1227         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1228         jump Sp(SP_OFF);
1229       } else {
1230         /* Commit failed */
1231         W_ new_trec;
1232         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1233         StgTSO_trec(CurrentTSO) = new_trec;
1234         R1 = StgCatchSTMFrame_code(frame);
1235         jump stg_ap_v_fast;
1236       }
1237    }
1238
1239
1240 // Primop definition ------------------------------------------------------------
1241
1242 atomicallyzh_fast
1243 {
1244   W_ frame;
1245   W_ old_trec;
1246   W_ new_trec;
1247   
1248   // stmStartTransaction may allocate
1249   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1250
1251   /* Args: R1 = m :: STM a */
1252   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1253
1254   old_trec = StgTSO_trec(CurrentTSO);
1255
1256   /* Nested transactions are not allowed; raise an exception */
1257   if (old_trec != NO_TREC) {
1258      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1259      jump raisezh_fast;
1260   }
1261
1262   /* Set up the atomically frame */
1263   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1264   frame = Sp;
1265
1266   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1267   StgAtomicallyFrame_code(frame) = R1;
1268   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1269
1270   /* Start the memory transcation */
1271   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1272   StgTSO_trec(CurrentTSO) = new_trec;
1273
1274   /* Apply R1 to the realworld token */
1275   jump stg_ap_v_fast;
1276 }
1277
1278
1279 catchSTMzh_fast
1280 {
1281   W_ frame;
1282   
1283   /* Args: R1 :: STM a */
1284   /* Args: R2 :: Exception -> STM a */
1285   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1286
1287   /* Set up the catch frame */
1288   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1289   frame = Sp;
1290
1291   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1292   StgCatchSTMFrame_handler(frame) = R2;
1293   StgCatchSTMFrame_code(frame) = R1;
1294
1295   /* Start a nested transaction to run the body of the try block in */
1296   W_ cur_trec;  
1297   W_ new_trec;
1298   cur_trec = StgTSO_trec(CurrentTSO);
1299   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1300   StgTSO_trec(CurrentTSO) = new_trec;
1301
1302   /* Apply R1 to the realworld token */
1303   jump stg_ap_v_fast;
1304 }
1305
1306
1307 catchRetryzh_fast
1308 {
1309   W_ frame;
1310   W_ new_trec;
1311   W_ trec;
1312
1313   // stmStartTransaction may allocate
1314   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1315
1316   /* Args: R1 :: STM a */
1317   /* Args: R2 :: STM a */
1318   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1319
1320   /* Start a nested transaction within which to run the first code */
1321   trec = StgTSO_trec(CurrentTSO);
1322   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1323   StgTSO_trec(CurrentTSO) = new_trec;
1324
1325   /* Set up the catch-retry frame */
1326   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1327   frame = Sp;
1328   
1329   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1330   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1331   StgCatchRetryFrame_first_code(frame) = R1;
1332   StgCatchRetryFrame_alt_code(frame) = R2;
1333
1334   /* Apply R1 to the realworld token */
1335   jump stg_ap_v_fast;
1336 }
1337
1338
1339 retryzh_fast
1340 {
1341   W_ frame_type;
1342   W_ frame;
1343   W_ trec;
1344   W_ outer;
1345   W_ r;
1346
1347   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1348
1349   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1350 retry_pop_stack:
1351   StgTSO_sp(CurrentTSO) = Sp;
1352   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1353   Sp = StgTSO_sp(CurrentTSO);
1354   frame = Sp;
1355   trec = StgTSO_trec(CurrentTSO);
1356   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1357
1358   if (frame_type == CATCH_RETRY_FRAME) {
1359     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1360     ASSERT(outer != NO_TREC);
1361     // Abort the transaction attempting the current branch
1362     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1363     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1364     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1365       // Retry in the first branch: try the alternative
1366       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1367       StgTSO_trec(CurrentTSO) = trec;
1368       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1369       R1 = StgCatchRetryFrame_alt_code(frame);
1370       jump stg_ap_v_fast;
1371     } else {
1372       // Retry in the alternative code: propagate the retry
1373       StgTSO_trec(CurrentTSO) = outer;
1374       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1375       goto retry_pop_stack;
1376     }
1377   }
1378
1379   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1380   ASSERT(frame_type == ATOMICALLY_FRAME);
1381   if (outer != NO_TREC) {
1382     // We called retry while checking invariants, so abort the current
1383     // invariant check (merging its TVar accesses into the parents read
1384     // set so we'll wait on them)
1385     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1386     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1387     trec = outer;
1388     StgTSO_trec(CurrentTSO) = trec;
1389     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1390   }
1391   ASSERT(outer == NO_TREC);
1392
1393   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1394   if (r != 0) {
1395     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1396     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1397     Sp = frame;
1398     // Fix up the stack in the unregisterised case: the return convention is different.
1399     R3 = trec; // passing to stmWaitUnblock()
1400     jump stg_block_stmwait;
1401   } else {
1402     // Transaction was not valid: retry immediately
1403     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1404     StgTSO_trec(CurrentTSO) = trec;
1405     R1 = StgAtomicallyFrame_code(frame);
1406     Sp = frame;
1407     jump stg_ap_v_fast;
1408   }
1409 }
1410
1411
1412 checkzh_fast
1413 {
1414   W_ trec, closure;
1415
1416   /* Args: R1 = invariant closure */
1417   MAYBE_GC (R1_PTR, checkzh_fast); 
1418
1419   trec = StgTSO_trec(CurrentTSO);
1420   closure = R1;
1421   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1422                                      trec "ptr",
1423                                      closure "ptr") [];
1424
1425   jump %ENTRY_CODE(Sp(0));
1426 }
1427
1428
1429 newTVarzh_fast
1430 {
1431   W_ tv;
1432   W_ new_value;
1433
1434   /* Args: R1 = initialisation value */
1435
1436   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1437   new_value = R1;
1438   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1439   RET_P(tv);
1440 }
1441
1442
1443 readTVarzh_fast
1444 {
1445   W_ trec;
1446   W_ tvar;
1447   W_ result;
1448
1449   /* Args: R1 = TVar closure */
1450
1451   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1452   trec = StgTSO_trec(CurrentTSO);
1453   tvar = R1;
1454   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1455
1456   RET_P(result);
1457 }
1458
1459 readTVarIOzh_fast
1460 {
1461     W_ result;
1462
1463 again:
1464     result = StgTVar_current_value(R1);
1465     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1466         goto again;
1467     }
1468     RET_P(result);
1469 }
1470
1471 writeTVarzh_fast
1472 {
1473   W_ trec;
1474   W_ tvar;
1475   W_ new_value;
1476   
1477   /* Args: R1 = TVar closure */
1478   /*       R2 = New value    */
1479
1480   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1481   trec = StgTSO_trec(CurrentTSO);
1482   tvar = R1;
1483   new_value = R2;
1484   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1485
1486   jump %ENTRY_CODE(Sp(0));
1487 }
1488
1489
1490 /* -----------------------------------------------------------------------------
1491  * MVar primitives
1492  *
1493  * take & putMVar work as follows.  Firstly, an important invariant:
1494  *
1495  *    If the MVar is full, then the blocking queue contains only
1496  *    threads blocked on putMVar, and if the MVar is empty then the
1497  *    blocking queue contains only threads blocked on takeMVar.
1498  *
1499  * takeMvar:
1500  *    MVar empty : then add ourselves to the blocking queue
1501  *    MVar full  : remove the value from the MVar, and
1502  *                 blocking queue empty     : return
1503  *                 blocking queue non-empty : perform the first blocked putMVar
1504  *                                            from the queue, and wake up the
1505  *                                            thread (MVar is now full again)
1506  *
1507  * putMVar is just the dual of the above algorithm.
1508  *
1509  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1510  * the stack of the thread waiting to do the putMVar.  See
1511  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1512  * the stack layout, and the PerformPut and PerformTake macros below.
1513  *
1514  * It is important that a blocked take or put is woken up with the
1515  * take/put already performed, because otherwise there would be a
1516  * small window of vulnerability where the thread could receive an
1517  * exception and never perform its take or put, and we'd end up with a
1518  * deadlock.
1519  *
1520  * -------------------------------------------------------------------------- */
1521
1522 isEmptyMVarzh_fast
1523 {
1524     /* args: R1 = MVar closure */
1525
1526     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1527         RET_N(1);
1528     } else {
1529         RET_N(0);
1530     }
1531 }
1532
1533 newMVarzh_fast
1534 {
1535     /* args: none */
1536     W_ mvar;
1537
1538     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1539   
1540     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1541     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1542         // MVARs start dirty: generation 0 has no mutable list
1543     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1544     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1545     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1546     RET_P(mvar);
1547 }
1548
1549
1550 #define PerformTake(tso, value)                         \
1551     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1552     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1553
1554 #define PerformPut(tso,lval)                    \
1555     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1556     lval = W_[StgTSO_sp(tso) - WDS(1)];
1557
1558 takeMVarzh_fast
1559 {
1560     W_ mvar, val, info, tso;
1561
1562     /* args: R1 = MVar closure */
1563     mvar = R1;
1564
1565 #if defined(THREADED_RTS)
1566     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1567 #else
1568     info = GET_INFO(mvar);
1569 #endif
1570         
1571     if (info == stg_MVAR_CLEAN_info) {
1572         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1573     }
1574
1575     /* If the MVar is empty, put ourselves on its blocking queue,
1576      * and wait until we're woken up.
1577      */
1578     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1579         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1580             StgMVar_head(mvar) = CurrentTSO;
1581         } else {
1582             foreign "C" setTSOLink(MyCapability() "ptr", 
1583                                    StgMVar_tail(mvar) "ptr",
1584                                    CurrentTSO) [];
1585         }
1586         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1587         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1588         StgTSO_block_info(CurrentTSO)  = mvar;
1589         StgMVar_tail(mvar) = CurrentTSO;
1590         
1591         R1 = mvar;
1592         jump stg_block_takemvar;
1593   }
1594
1595   /* we got the value... */
1596   val = StgMVar_value(mvar);
1597
1598   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1599   {
1600       /* There are putMVar(s) waiting... 
1601        * wake up the first thread on the queue
1602        */
1603       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1604
1605       /* actually perform the putMVar for the thread that we just woke up */
1606       tso = StgMVar_head(mvar);
1607       PerformPut(tso,StgMVar_value(mvar));
1608
1609       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1610           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1611       }
1612
1613       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1614                                             StgMVar_head(mvar) "ptr", 1) [];
1615       StgMVar_head(mvar) = tso;
1616
1617       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1618           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1619       }
1620
1621 #if defined(THREADED_RTS)
1622       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1623 #else
1624       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1625 #endif
1626       RET_P(val);
1627   } 
1628   else
1629   {
1630       /* No further putMVars, MVar is now empty */
1631       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1632  
1633 #if defined(THREADED_RTS)
1634       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1635 #else
1636       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1637 #endif
1638
1639       RET_P(val);
1640   }
1641 }
1642
1643
1644 tryTakeMVarzh_fast
1645 {
1646     W_ mvar, val, info, tso;
1647
1648     /* args: R1 = MVar closure */
1649
1650     mvar = R1;
1651
1652 #if defined(THREADED_RTS)
1653     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1654 #else
1655     info = GET_INFO(mvar);
1656 #endif
1657
1658     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1659 #if defined(THREADED_RTS)
1660         unlockClosure(mvar, info);
1661 #endif
1662         /* HACK: we need a pointer to pass back, 
1663          * so we abuse NO_FINALIZER_closure
1664          */
1665         RET_NP(0, stg_NO_FINALIZER_closure);
1666     }
1667
1668     if (info == stg_MVAR_CLEAN_info) {
1669         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1670     }
1671
1672     /* we got the value... */
1673     val = StgMVar_value(mvar);
1674
1675     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1676
1677         /* There are putMVar(s) waiting... 
1678          * wake up the first thread on the queue
1679          */
1680         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1681
1682         /* actually perform the putMVar for the thread that we just woke up */
1683         tso = StgMVar_head(mvar);
1684         PerformPut(tso,StgMVar_value(mvar));
1685         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1686             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1687         }
1688
1689         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1690                                               StgMVar_head(mvar) "ptr", 1) [];
1691         StgMVar_head(mvar) = tso;
1692
1693         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1694             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1695         }
1696 #if defined(THREADED_RTS)
1697         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1698 #else
1699         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1700 #endif
1701     }
1702     else 
1703     {
1704         /* No further putMVars, MVar is now empty */
1705         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1706 #if defined(THREADED_RTS)
1707         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1708 #else
1709         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1710 #endif
1711     }
1712     
1713     RET_NP(1, val);
1714 }
1715
1716
1717 putMVarzh_fast
1718 {
1719     W_ mvar, val, info, tso;
1720
1721     /* args: R1 = MVar, R2 = value */
1722     mvar = R1;
1723     val  = R2;
1724
1725 #if defined(THREADED_RTS)
1726     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1727 #else
1728     info = GET_INFO(mvar);
1729 #endif
1730
1731     if (info == stg_MVAR_CLEAN_info) {
1732         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1733     }
1734
1735     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1736         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1737             StgMVar_head(mvar) = CurrentTSO;
1738         } else {
1739             foreign "C" setTSOLink(MyCapability() "ptr", 
1740                                    StgMVar_tail(mvar) "ptr",
1741                                    CurrentTSO) [];
1742         }
1743         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1744         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1745         StgTSO_block_info(CurrentTSO)  = mvar;
1746         StgMVar_tail(mvar) = CurrentTSO;
1747         
1748         R1 = mvar;
1749         R2 = val;
1750         jump stg_block_putmvar;
1751     }
1752   
1753     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1754
1755         /* There are takeMVar(s) waiting: wake up the first one
1756          */
1757         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1758
1759         /* actually perform the takeMVar */
1760         tso = StgMVar_head(mvar);
1761         PerformTake(tso, val);
1762         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1763             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1764         }
1765       
1766         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1767                                               StgMVar_head(mvar) "ptr", 1) [];
1768         StgMVar_head(mvar) = tso;
1769
1770         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1771             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1772         }
1773
1774 #if defined(THREADED_RTS)
1775         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1776 #else
1777         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1778 #endif
1779         jump %ENTRY_CODE(Sp(0));
1780     }
1781     else
1782     {
1783         /* No further takes, the MVar is now full. */
1784         StgMVar_value(mvar) = val;
1785
1786 #if defined(THREADED_RTS)
1787         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1788 #else
1789         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1790 #endif
1791         jump %ENTRY_CODE(Sp(0));
1792     }
1793     
1794     /* ToDo: yield afterward for better communication performance? */
1795 }
1796
1797
1798 tryPutMVarzh_fast
1799 {
1800     W_ mvar, info, tso;
1801
1802     /* args: R1 = MVar, R2 = value */
1803     mvar = R1;
1804
1805 #if defined(THREADED_RTS)
1806     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1807 #else
1808     info = GET_INFO(mvar);
1809 #endif
1810
1811     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1812 #if defined(THREADED_RTS)
1813         unlockClosure(mvar, info);
1814 #endif
1815         RET_N(0);
1816     }
1817   
1818     if (info == stg_MVAR_CLEAN_info) {
1819         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1820     }
1821
1822     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1823
1824         /* There are takeMVar(s) waiting: wake up the first one
1825          */
1826         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1827         
1828         /* actually perform the takeMVar */
1829         tso = StgMVar_head(mvar);
1830         PerformTake(tso, R2);
1831         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1832             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1833         }
1834       
1835         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1836                                               StgMVar_head(mvar) "ptr", 1) [];
1837         StgMVar_head(mvar) = tso;
1838
1839         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1840             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1841         }
1842
1843 #if defined(THREADED_RTS)
1844         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1845 #else
1846         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1847 #endif
1848     }
1849     else
1850     {
1851         /* No further takes, the MVar is now full. */
1852         StgMVar_value(mvar) = R2;
1853
1854 #if defined(THREADED_RTS)
1855         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1856 #else
1857         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1858 #endif
1859     }
1860     
1861     RET_N(1);
1862     /* ToDo: yield afterward for better communication performance? */
1863 }
1864
1865
1866 /* -----------------------------------------------------------------------------
1867    Stable pointer primitives
1868    -------------------------------------------------------------------------  */
1869
1870 makeStableNamezh_fast
1871 {
1872     W_ index, sn_obj;
1873
1874     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1875   
1876     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1877
1878     /* Is there already a StableName for this heap object?
1879      *  stable_ptr_table is a pointer to an array of snEntry structs.
1880      */
1881     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1882         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1883         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1884         StgStableName_sn(sn_obj) = index;
1885         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1886     } else {
1887         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1888     }
1889     
1890     RET_P(sn_obj);
1891 }
1892
1893
1894 makeStablePtrzh_fast
1895 {
1896     /* Args: R1 = a */
1897     W_ sp;
1898     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1899     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1900     RET_N(sp);
1901 }
1902
1903 deRefStablePtrzh_fast
1904 {
1905     /* Args: R1 = the stable ptr */
1906     W_ r, sp;
1907     sp = R1;
1908     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1909     RET_P(r);
1910 }
1911
1912 /* -----------------------------------------------------------------------------
1913    Bytecode object primitives
1914    -------------------------------------------------------------------------  */
1915
1916 newBCOzh_fast
1917 {
1918     /* R1 = instrs
1919        R2 = literals
1920        R3 = ptrs
1921        R4 = arity
1922        R5 = bitmap array
1923     */
1924     W_ bco, bitmap_arr, bytes, words;
1925     
1926     bitmap_arr = R5;
1927
1928     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1929     bytes = WDS(words);
1930
1931     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1932
1933     bco = Hp - bytes + WDS(1);
1934     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1935     
1936     StgBCO_instrs(bco)     = R1;
1937     StgBCO_literals(bco)   = R2;
1938     StgBCO_ptrs(bco)       = R3;
1939     StgBCO_arity(bco)      = HALF_W_(R4);
1940     StgBCO_size(bco)       = HALF_W_(words);
1941     
1942     // Copy the arity/bitmap info into the BCO
1943     W_ i;
1944     i = 0;
1945 for:
1946     if (i < StgArrWords_words(bitmap_arr)) {
1947         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1948         i = i + 1;
1949         goto for;
1950     }
1951     
1952     RET_P(bco);
1953 }
1954
1955
1956 mkApUpd0zh_fast
1957 {
1958     // R1 = the BCO# for the AP
1959     //  
1960     W_ ap;
1961
1962     // This function is *only* used to wrap zero-arity BCOs in an
1963     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1964     // saturated and always points directly to a FUN or BCO.
1965     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1966            StgBCO_arity(R1) == HALF_W_(0));
1967
1968     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1969     TICK_ALLOC_UP_THK(0, 0);
1970     CCCS_ALLOC(SIZEOF_StgAP);
1971
1972     ap = Hp - SIZEOF_StgAP + WDS(1);
1973     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1974     
1975     StgAP_n_args(ap) = HALF_W_(0);
1976     StgAP_fun(ap) = R1;
1977     
1978     RET_P(ap);
1979 }
1980
1981 unpackClosurezh_fast
1982 {
1983 /* args: R1 = closure to analyze */
1984 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1985
1986     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1987     info  = %GET_STD_INFO(UNTAG(R1));
1988
1989     // Some closures have non-standard layout, so we omit those here.
1990     W_ type;
1991     type = TO_W_(%INFO_TYPE(info));
1992     switch [0 .. N_CLOSURE_TYPES] type {
1993     case THUNK_SELECTOR : {
1994         ptrs = 1;
1995         nptrs = 0;
1996         goto out;
1997     }
1998     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1999          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2000         ptrs = 0;
2001         nptrs = 0;
2002         goto out;
2003     }
2004     default: {
2005         ptrs  = TO_W_(%INFO_PTRS(info)); 
2006         nptrs = TO_W_(%INFO_NPTRS(info));
2007         goto out;
2008     }}
2009 out:
2010
2011     W_ ptrs_arr_sz, nptrs_arr_sz;
2012     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2013     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2014
2015     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2016
2017     W_ clos;
2018     clos = UNTAG(R1);
2019
2020     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2021     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2022
2023     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2024     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2025     p = 0;
2026 for:
2027     if(p < ptrs) {
2028          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2029          p = p + 1;
2030          goto for;
2031     }
2032     
2033     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2034     StgArrWords_words(nptrs_arr) = nptrs;
2035     p = 0;
2036 for2:
2037     if(p < nptrs) {
2038          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2039          p = p + 1;
2040          goto for2;
2041     }
2042     RET_NPP(info, ptrs_arr, nptrs_arr);
2043 }
2044
2045 /* -----------------------------------------------------------------------------
2046    Thread I/O blocking primitives
2047    -------------------------------------------------------------------------- */
2048
2049 /* Add a thread to the end of the blocked queue. (C-- version of the C
2050  * macro in Schedule.h).
2051  */
2052 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2053     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2054     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2055       W_[blocked_queue_hd] = tso;                       \
2056     } else {                                            \
2057       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2058     }                                                   \
2059     W_[blocked_queue_tl] = tso;
2060
2061 waitReadzh_fast
2062 {
2063     /* args: R1 */
2064 #ifdef THREADED_RTS
2065     foreign "C" barf("waitRead# on threaded RTS") never returns;
2066 #else
2067
2068     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2069     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2070     StgTSO_block_info(CurrentTSO) = R1;
2071     // No locking - we're not going to use this interface in the
2072     // threaded RTS anyway.
2073     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2074     jump stg_block_noregs;
2075 #endif
2076 }
2077
2078 waitWritezh_fast
2079 {
2080     /* args: R1 */
2081 #ifdef THREADED_RTS
2082     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2083 #else
2084
2085     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2086     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2087     StgTSO_block_info(CurrentTSO) = R1;
2088     // No locking - we're not going to use this interface in the
2089     // threaded RTS anyway.
2090     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2091     jump stg_block_noregs;
2092 #endif
2093 }
2094
2095
2096 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2097 delayzh_fast
2098 {
2099 #ifdef mingw32_HOST_OS
2100     W_ ares;
2101     CInt reqID;
2102 #else
2103     W_ t, prev, target;
2104 #endif
2105
2106 #ifdef THREADED_RTS
2107     foreign "C" barf("delay# on threaded RTS") never returns;
2108 #else
2109
2110     /* args: R1 (microsecond delay amount) */
2111     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2112     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2113
2114 #ifdef mingw32_HOST_OS
2115
2116     /* could probably allocate this on the heap instead */
2117     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2118                                             stg_delayzh_malloc_str);
2119     (reqID) = foreign "C" addDelayRequest(R1);
2120     StgAsyncIOResult_reqID(ares)   = reqID;
2121     StgAsyncIOResult_len(ares)     = 0;
2122     StgAsyncIOResult_errCode(ares) = 0;
2123     StgTSO_block_info(CurrentTSO)  = ares;
2124
2125     /* Having all async-blocked threads reside on the blocked_queue
2126      * simplifies matters, so change the status to OnDoProc put the
2127      * delayed thread on the blocked_queue.
2128      */
2129     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2130     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2131     jump stg_block_async_void;
2132
2133 #else
2134
2135     W_ time;
2136     W_ divisor;
2137     (time) = foreign "C" getourtimeofday() [R1];
2138     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2139     if (divisor == 0) {
2140         divisor = 50;
2141     }
2142     divisor = divisor * 1000;
2143     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2144            + time + 1; /* Add 1 as getourtimeofday rounds down */
2145     StgTSO_block_info(CurrentTSO) = target;
2146
2147     /* Insert the new thread in the sleeping queue. */
2148     prev = NULL;
2149     t = W_[sleeping_queue];
2150 while:
2151     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2152         prev = t;
2153         t = StgTSO__link(t);
2154         goto while;
2155     }
2156
2157     StgTSO__link(CurrentTSO) = t;
2158     if (prev == NULL) {
2159         W_[sleeping_queue] = CurrentTSO;
2160     } else {
2161         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2162     }
2163     jump stg_block_noregs;
2164 #endif
2165 #endif /* !THREADED_RTS */
2166 }
2167
2168
2169 #ifdef mingw32_HOST_OS
2170 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2171 asyncReadzh_fast
2172 {
2173     W_ ares;
2174     CInt reqID;
2175
2176 #ifdef THREADED_RTS
2177     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2178 #else
2179
2180     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2181     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2182     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2183
2184     /* could probably allocate this on the heap instead */
2185     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2186                                             stg_asyncReadzh_malloc_str)
2187                         [R1,R2,R3,R4];
2188     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2189     StgAsyncIOResult_reqID(ares)   = reqID;
2190     StgAsyncIOResult_len(ares)     = 0;
2191     StgAsyncIOResult_errCode(ares) = 0;
2192     StgTSO_block_info(CurrentTSO)  = ares;
2193     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2194     jump stg_block_async;
2195 #endif
2196 }
2197
2198 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2199 asyncWritezh_fast
2200 {
2201     W_ ares;
2202     CInt reqID;
2203
2204 #ifdef THREADED_RTS
2205     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2206 #else
2207
2208     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2209     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2210     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2211
2212     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2213                                             stg_asyncWritezh_malloc_str)
2214                         [R1,R2,R3,R4];
2215     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2216
2217     StgAsyncIOResult_reqID(ares)   = reqID;
2218     StgAsyncIOResult_len(ares)     = 0;
2219     StgAsyncIOResult_errCode(ares) = 0;
2220     StgTSO_block_info(CurrentTSO)  = ares;
2221     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2222     jump stg_block_async;
2223 #endif
2224 }
2225
2226 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2227 asyncDoProczh_fast
2228 {
2229     W_ ares;
2230     CInt reqID;
2231
2232 #ifdef THREADED_RTS
2233     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2234 #else
2235
2236     /* args: R1 = proc, R2 = param */
2237     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2238     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2239
2240     /* could probably allocate this on the heap instead */
2241     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2242                                             stg_asyncDoProczh_malloc_str) 
2243                                 [R1,R2];
2244     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2245     StgAsyncIOResult_reqID(ares)   = reqID;
2246     StgAsyncIOResult_len(ares)     = 0;
2247     StgAsyncIOResult_errCode(ares) = 0;
2248     StgTSO_block_info(CurrentTSO) = ares;
2249     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2250     jump stg_block_async;
2251 #endif
2252 }
2253 #endif
2254
2255 // noDuplicate# tries to ensure that none of the thunks under
2256 // evaluation by the current thread are also under evaluation by
2257 // another thread.  It relies on *both* threads doing noDuplicate#;
2258 // the second one will get blocked if they are duplicating some work.
2259 noDuplicatezh_fast
2260 {
2261     SAVE_THREAD_STATE();
2262     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2263     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2264     
2265     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2266         jump stg_threadFinished;
2267     } else {
2268         LOAD_THREAD_STATE();
2269         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2270         jump %ENTRY_CODE(Sp(0));
2271     }
2272 }
2273
2274 getApStackValzh_fast
2275 {
2276    W_ ap_stack, offset, val, ok;
2277
2278    /* args: R1 = AP_STACK, R2 = offset */
2279    ap_stack = R1;
2280    offset   = R2;
2281
2282    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2283         ok = 1;
2284         val = StgAP_STACK_payload(ap_stack,offset); 
2285    } else {
2286         ok = 0;
2287         val = R1;
2288    }
2289    RET_NP(ok,val);
2290 }
2291
2292 getSparkzh_fast
2293 {
2294    W_ spark;
2295
2296 #ifndef THREADED_RTS
2297    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2298 #else
2299    (spark) = foreign "C" findSpark(MyCapability());
2300    if (spark != 0) {
2301       RET_NP(1,spark);
2302    } else {
2303       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2304    }
2305 #endif
2306 }