Ensure runhaskell is rebuild in stage2
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_GHCziIOBase_NestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, z, x, y, r;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235 #if defined(THREADED_RTS)
236     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
237 #endif
238
239    x = StgMutVar_var(R1);
240
241    TICK_ALLOC_THUNK_2();
242    CCCS_ALLOC(THUNK_2_SIZE);
243    z = Hp - THUNK_2_SIZE + WDS(1);
244    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
245    LDV_RECORD_CREATE(z);
246    StgThunk_payload(z,0) = R2;
247    StgThunk_payload(z,1) = x;
248
249    TICK_ALLOC_THUNK_1();
250    CCCS_ALLOC(THUNK_1_SIZE);
251    y = z - THUNK_1_SIZE;
252    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
253    LDV_RECORD_CREATE(y);
254    StgThunk_payload(y,0) = z;
255
256    StgMutVar_var(R1) = y;
257    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
258
259    TICK_ALLOC_THUNK_1();
260    CCCS_ALLOC(THUNK_1_SIZE);
261    r = y - THUNK_1_SIZE;
262    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
263    LDV_RECORD_CREATE(r);
264    StgThunk_payload(r,0) = z;
265
266 #if defined(THREADED_RTS)
267     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
268 #endif
269
270    RET_P(r);
271 }
272
273 /* -----------------------------------------------------------------------------
274    Weak Pointer Primitives
275    -------------------------------------------------------------------------- */
276
277 STRING(stg_weak_msg,"New weak pointer at %p\n")
278
279 mkWeakzh_fast
280 {
281   /* R1 = key
282      R2 = value
283      R3 = finalizer (or NULL)
284   */
285   W_ w;
286
287   if (R3 == NULL) {
288     R3 = stg_NO_FINALIZER_closure;
289   }
290
291   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
292
293   w = Hp - SIZEOF_StgWeak + WDS(1);
294   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
295
296   StgWeak_key(w)       = R1;
297   StgWeak_value(w)     = R2;
298   StgWeak_finalizer(w) = R3;
299
300   StgWeak_link(w)       = W_[weak_ptr_list];
301   W_[weak_ptr_list]     = w;
302
303   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
304
305   RET_P(w);
306 }
307
308
309 finalizzeWeakzh_fast
310 {
311   /* R1 = weak ptr
312    */
313   W_ w, f;
314
315   w = R1;
316
317   // already dead?
318   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   }
321
322   // kill it
323 #ifdef PROFILING
324   // @LDV profiling
325   // A weak pointer is inherently used, so we do not need to call
326   // LDV_recordDead_FILL_SLOP_DYNAMIC():
327   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
328   // or, LDV_recordDead():
329   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
330   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
331   // large as weak pointers, so there is no need to fill the slop, either.
332   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
333 #endif
334
335   //
336   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
337   //
338   SET_INFO(w,stg_DEAD_WEAK_info);
339   LDV_RECORD_CREATE(w);
340
341   f = StgWeak_finalizer(w);
342   StgDeadWeak_link(w) = StgWeak_link(w);
343
344   /* return the finalizer */
345   if (f == stg_NO_FINALIZER_closure) {
346       RET_NP(0,stg_NO_FINALIZER_closure);
347   } else {
348       RET_NP(1,f);
349   }
350 }
351
352 deRefWeakzh_fast
353 {
354   /* R1 = weak ptr */
355   W_ w, code, val;
356
357   w = R1;
358   if (GET_INFO(w) == stg_WEAK_info) {
359     code = 1;
360     val = StgWeak_value(w);
361   } else {
362     code = 0;
363     val = w;
364   }
365   RET_NP(code,val);
366 }
367
368 /* -----------------------------------------------------------------------------
369    Arbitrary-precision Integer operations.
370
371    There are some assumptions in this code that mp_limb_t == W_.  This is
372    the case for all the platforms that GHC supports, currently.
373    -------------------------------------------------------------------------- */
374
375 int2Integerzh_fast
376 {
377    /* arguments: R1 = Int# */
378
379    W_ val, s, p;        /* to avoid aliasing */
380
381    val = R1;
382    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
383
384    p = Hp - SIZEOF_StgArrWords;
385    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
386    StgArrWords_words(p) = 1;
387
388    /* mpz_set_si is inlined here, makes things simpler */
389    if (%lt(val,0)) { 
390         s  = -1;
391         Hp(0) = -val;
392    } else { 
393      if (%gt(val,0)) {
394         s = 1;
395         Hp(0) = val;
396      } else {
397         s = 0;
398      }
399   }
400
401    /* returns (# size  :: Int#, 
402                  data  :: ByteArray# 
403                #)
404    */
405    RET_NP(s,p);
406 }
407
408 word2Integerzh_fast
409 {
410    /* arguments: R1 = Word# */
411
412    W_ val, s, p;        /* to avoid aliasing */
413
414    val = R1;
415
416    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
417
418    p = Hp - SIZEOF_StgArrWords;
419    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
420    StgArrWords_words(p) = 1;
421
422    if (val != 0) {
423         s = 1;
424         W_[Hp] = val;
425    } else {
426         s = 0;
427    }
428
429    /* returns (# size  :: Int#, 
430                  data  :: ByteArray# #)
431    */
432    RET_NP(s,p);
433 }
434
435
436 /*
437  * 'long long' primops for converting to/from Integers.
438  */
439
440 #ifdef SUPPORT_LONG_LONGS
441
442 int64ToIntegerzh_fast
443 {
444    /* arguments: L1 = Int64# */
445
446    L_ val;
447    W_ hi, lo, s, neg, words_needed, p;
448
449    val = L1;
450    neg = 0;
451
452    hi = TO_W_(val >> 32);
453    lo = TO_W_(val);
454
455    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
456        // minimum is one word
457        words_needed = 1;
458    } else { 
459        words_needed = 2;
460    }
461
462    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
463                NO_PTRS, int64ToIntegerzh_fast );
464
465    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
466    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
467    StgArrWords_words(p) = words_needed;
468
469    if ( %lt(hi,0) ) {
470      neg = 1;
471      lo = -lo;
472      if(lo == 0) {
473        hi = -hi;
474      } else {
475        hi = -hi - 1;
476      }
477    }
478
479    if ( words_needed == 2 )  { 
480       s = 2;
481       Hp(-1) = lo;
482       Hp(0) = hi;
483    } else { 
484        if ( lo != 0 ) {
485            s = 1;
486            Hp(0) = lo;
487        } else /* val==0 */  {
488            s = 0;
489        }
490    }
491    if ( neg != 0 ) {
492         s = -s;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# #)
497    */
498    RET_NP(s,p);
499 }
500 word64ToIntegerzh_fast
501 {
502    /* arguments: L1 = Word64# */
503
504    L_ val;
505    W_ hi, lo, s, words_needed, p;
506
507    val = L1;
508    hi = TO_W_(val >> 32);
509    lo = TO_W_(val);
510
511    if ( hi != 0 ) {
512       words_needed = 2;
513    } else {
514       words_needed = 1;
515    }
516
517    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
518                NO_PTRS, word64ToIntegerzh_fast );
519
520    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
521    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
522    StgArrWords_words(p) = words_needed;
523
524    if ( hi != 0 ) { 
525      s = 2;
526      Hp(-1) = lo;
527      Hp(0)  = hi;
528    } else {
529       if ( lo != 0 ) {
530         s = 1;
531         Hp(0) = lo;
532      } else /* val==0 */  {
533       s = 0;
534      }
535   }
536
537    /* returns (# size  :: Int#, 
538                  data  :: ByteArray# #)
539    */
540    RET_NP(s,p);
541 }
542
543
544
545 #endif /* SUPPORT_LONG_LONGS */
546
547 /* ToDo: this is shockingly inefficient */
548
549 #ifndef THREADED_RTS
550 section "bss" {
551   mp_tmp1:
552     bits8 [SIZEOF_MP_INT];
553 }
554
555 section "bss" {
556   mp_tmp2:
557     bits8 [SIZEOF_MP_INT];
558 }
559
560 section "bss" {
561   mp_result1:
562     bits8 [SIZEOF_MP_INT];
563 }
564
565 section "bss" {
566   mp_result2:
567     bits8 [SIZEOF_MP_INT];
568 }
569 #endif
570
571 #ifdef THREADED_RTS
572 #define FETCH_MP_TEMP(X) \
573 W_ X; \
574 X = BaseReg + (OFFSET_StgRegTable_r ## X);
575 #else
576 #define FETCH_MP_TEMP(X) /* Nothing */
577 #endif
578
579 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1, s2;                                                          \
583   W_ d1, d2;                                                            \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_tmp2);                                               \
586   FETCH_MP_TEMP(mp_result1)                                             \
587   FETCH_MP_TEMP(mp_result2);                                            \
588                                                                         \
589   /* call doYouWantToGC() */                                            \
590   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
591                                                                         \
592   s1 = W_TO_INT(R1);                                                    \
593   d1 = R2;                                                              \
594   s2 = W_TO_INT(R3);                                                    \
595   d2 = R4;                                                              \
596                                                                         \
597   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
598   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
599   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
600   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
601   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
602   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
603                                                                         \
604   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
605                                                                         \
606   /* Perform the operation */                                           \
607   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
608                                                                         \
609   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
610          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
611 }
612
613 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
614 name                                                                    \
615 {                                                                       \
616   CInt s1;                                                              \
617   W_ d1;                                                                \
618   FETCH_MP_TEMP(mp_tmp1);                                               \
619   FETCH_MP_TEMP(mp_result1)                                             \
620                                                                         \
621   /* call doYouWantToGC() */                                            \
622   MAYBE_GC(R2_PTR, name);                                               \
623                                                                         \
624   d1 = R2;                                                              \
625   s1 = W_TO_INT(R1);                                                    \
626                                                                         \
627   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
628   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
629   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
630                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
632                                                                         \
633   /* Perform the operation */                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
635                                                                         \
636   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
637          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
638 }
639
640 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
641 name                                                                                    \
642 {                                                                                       \
643   CInt s1, s2;                                                                          \
644   W_ d1, d2;                                                                            \
645   FETCH_MP_TEMP(mp_tmp1);                                                               \
646   FETCH_MP_TEMP(mp_tmp2);                                                               \
647   FETCH_MP_TEMP(mp_result1)                                                             \
648   FETCH_MP_TEMP(mp_result2)                                                             \
649                                                                                         \
650   /* call doYouWantToGC() */                                                            \
651   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
652                                                                                         \
653   s1 = W_TO_INT(R1);                                                                    \
654   d1 = R2;                                                                              \
655   s2 = W_TO_INT(R3);                                                                    \
656   d2 = R4;                                                                              \
657                                                                                         \
658   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
659   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
660   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
661   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
662   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
663   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
664                                                                                         \
665   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
666   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
667                                                                                         \
668   /* Perform the operation */                                                           \
669   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
670                                                                                         \
671   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
672            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
673            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
674            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
675 }
676
677 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
678 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
679 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
680 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
681 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
682 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
683 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
684 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
685 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
686 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
687 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
688
689 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
690 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
691
692 #ifndef THREADED_RTS
693 section "bss" {
694   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
695 }
696 #endif
697
698 gcdIntzh_fast
699 {
700     /* R1 = the first Int#; R2 = the second Int# */
701     W_ r; 
702     FETCH_MP_TEMP(mp_tmp_w);
703
704     W_[mp_tmp_w] = R1;
705     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
706
707     R1 = r;
708     /* Result parked in R1, return via info-pointer at TOS */
709     jump %ENTRY_CODE(Sp(0));
710 }
711
712
713 gcdIntegerIntzh_fast
714 {
715     /* R1 = s1; R2 = d1; R3 = the int */
716     W_ s1;
717     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
718     R1 = s1;
719     
720     /* Result parked in R1, return via info-pointer at TOS */
721     jump %ENTRY_CODE(Sp(0));
722 }
723
724
725 cmpIntegerIntzh_fast
726 {
727     /* R1 = s1; R2 = d1; R3 = the int */
728     W_ usize, vsize, v_digit, u_digit;
729
730     usize = R1;
731     vsize = 0;
732     v_digit = R3;
733
734     // paraphrased from __gmpz_cmp_si() in the GMP sources
735     if (%gt(v_digit,0)) {
736         vsize = 1;
737     } else { 
738         if (%lt(v_digit,0)) {
739             vsize = -1;
740             v_digit = -v_digit;
741         }
742     }
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     u_digit = W_[BYTE_ARR_CTS(R2)];
755
756     if (u_digit == v_digit) {
757         R1 = 0; 
758         jump %ENTRY_CODE(Sp(0));
759     }
760
761     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
762         R1 = usize; 
763     } else {
764         R1 = -usize; 
765     }
766
767     jump %ENTRY_CODE(Sp(0));
768 }
769
770 cmpIntegerzh_fast
771 {
772     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
773     W_ usize, vsize, size, up, vp;
774     CInt cmp;
775
776     // paraphrased from __gmpz_cmp() in the GMP sources
777     usize = R1;
778     vsize = R3;
779
780     if (usize != vsize) {
781         R1 = usize - vsize; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (usize == 0) {
786         R1 = 0; 
787         jump %ENTRY_CODE(Sp(0));
788     }
789
790     if (%lt(usize,0)) { // NB. not <, which is unsigned
791         size = -usize;
792     } else {
793         size = usize;
794     }
795
796     up = BYTE_ARR_CTS(R2);
797     vp = BYTE_ARR_CTS(R4);
798
799     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
800
801     if (cmp == 0 :: CInt) {
802         R1 = 0; 
803         jump %ENTRY_CODE(Sp(0));
804     }
805
806     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
807         R1 = 1;
808     } else {
809         R1 = (-1); 
810     }
811     /* Result parked in R1, return via info-pointer at TOS */
812     jump %ENTRY_CODE(Sp(0));
813 }
814
815 integer2Intzh_fast
816 {
817     /* R1 = s; R2 = d */
818     W_ r, s;
819
820     s = R1;
821     if (s == 0) {
822         r = 0;
823     } else {
824         r = W_[R2 + SIZEOF_StgArrWords];
825         if (%lt(s,0)) {
826             r = -r;
827         }
828     }
829     /* Result parked in R1, return via info-pointer at TOS */
830     R1 = r;
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 integer2Wordzh_fast
835 {
836   /* R1 = s; R2 = d */
837   W_ r, s;
838
839   s = R1;
840   if (s == 0) {
841     r = 0;
842   } else {
843     r = W_[R2 + SIZEOF_StgArrWords];
844     if (%lt(s,0)) {
845         r = -r;
846     }
847   }
848   /* Result parked in R1, return via info-pointer at TOS */
849   R1 = r;
850   jump %ENTRY_CODE(Sp(0));
851 }
852
853 decodeFloatzh_fast
854
855     W_ p;
856     F_ arg;
857     FETCH_MP_TEMP(mp_tmp1);
858     FETCH_MP_TEMP(mp_tmp_w);
859     
860     /* arguments: F1 = Float# */
861     arg = F1;
862     
863     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
864     
865     /* Be prepared to tell Lennart-coded __decodeFloat
866        where mantissa._mp_d can be put (it does not care about the rest) */
867     p = Hp - SIZEOF_StgArrWords;
868     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
869     StgArrWords_words(p) = 1;
870     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
871     
872     /* Perform the operation */
873     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
874     
875     /* returns: (Int# (expn), Int#, ByteArray#) */
876     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
877 }
878
879 decodeFloatzuIntzh_fast
880
881     W_ p;
882     F_ arg;
883     FETCH_MP_TEMP(mp_tmp1);
884     FETCH_MP_TEMP(mp_tmp_w);
885     
886     /* arguments: F1 = Float# */
887     arg = F1;
888     
889     /* Perform the operation */
890     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
891     
892     /* returns: (Int# (mantissa), Int# (exponent)) */
893     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
894 }
895
896 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
897 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
898
899 decodeDoublezh_fast
900
901     D_ arg;
902     W_ p;
903     FETCH_MP_TEMP(mp_tmp1);
904     FETCH_MP_TEMP(mp_tmp_w);
905
906     /* arguments: D1 = Double# */
907     arg = D1;
908
909     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
910     
911     /* Be prepared to tell Lennart-coded __decodeDouble
912        where mantissa.d can be put (it does not care about the rest) */
913     p = Hp - ARR_SIZE + WDS(1);
914     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
915     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
916     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
917
918     /* Perform the operation */
919     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
920     
921     /* returns: (Int# (expn), Int#, ByteArray#) */
922     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
923 }
924
925 decodeDoublezu2Intzh_fast
926
927     D_ arg;
928     W_ p;
929     FETCH_MP_TEMP(mp_tmp1);
930     FETCH_MP_TEMP(mp_tmp2);
931     FETCH_MP_TEMP(mp_tmp_w);
932
933     /* arguments: D1 = Double# */
934     arg = D1;
935
936     /* Perform the operation */
937     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", mp_tmp_w "ptr", arg) [];
938     
939     /* returns: (Int# (mant high), Int# (mant low), Int# (expn)) */
940     RET_NNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_tmp_w]);
941 }
942
943 /* -----------------------------------------------------------------------------
944  * Concurrency primitives
945  * -------------------------------------------------------------------------- */
946
947 forkzh_fast
948 {
949   /* args: R1 = closure to spark */
950
951   MAYBE_GC(R1_PTR, forkzh_fast);
952
953   W_ closure;
954   W_ threadid;
955   closure = R1;
956
957   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
958                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
959                                 closure "ptr") [];
960
961   /* start blocked if the current thread is blocked */
962   StgTSO_flags(threadid) = 
963      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
964                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
965
966   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
967
968   // switch at the earliest opportunity
969   CInt[context_switch] = 1 :: CInt;
970   
971   RET_P(threadid);
972 }
973
974 forkOnzh_fast
975 {
976   /* args: R1 = cpu, R2 = closure to spark */
977
978   MAYBE_GC(R2_PTR, forkOnzh_fast);
979
980   W_ cpu;
981   W_ closure;
982   W_ threadid;
983   cpu = R1;
984   closure = R2;
985
986   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
987                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
988                                 closure "ptr") [];
989
990   /* start blocked if the current thread is blocked */
991   StgTSO_flags(threadid) = 
992      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
993                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
994
995   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
996
997   // switch at the earliest opportunity
998   CInt[context_switch] = 1 :: CInt;
999   
1000   RET_P(threadid);
1001 }
1002
1003 yieldzh_fast
1004 {
1005   jump stg_yield_noregs;
1006 }
1007
1008 myThreadIdzh_fast
1009 {
1010   /* no args. */
1011   RET_P(CurrentTSO);
1012 }
1013
1014 labelThreadzh_fast
1015 {
1016   /* args: 
1017         R1 = ThreadId#
1018         R2 = Addr# */
1019 #ifdef DEBUG
1020   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1021 #endif
1022   jump %ENTRY_CODE(Sp(0));
1023 }
1024
1025 isCurrentThreadBoundzh_fast
1026 {
1027   /* no args */
1028   W_ r;
1029   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1030   RET_N(r);
1031 }
1032
1033
1034 /* -----------------------------------------------------------------------------
1035  * TVar primitives
1036  * -------------------------------------------------------------------------- */
1037
1038 #ifdef REG_R1
1039 #define SP_OFF 0
1040 #define IF_NOT_REG_R1(x) 
1041 #else
1042 #define SP_OFF 1
1043 #define IF_NOT_REG_R1(x) x
1044 #endif
1045
1046 // Catch retry frame ------------------------------------------------------------
1047
1048 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1049 #if defined(PROFILING)
1050   W_ unused1, W_ unused2,
1051 #endif
1052   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1053 {
1054    W_ r, frame, trec, outer;
1055    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1056
1057    frame = Sp;
1058    trec = StgTSO_trec(CurrentTSO);
1059    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1060    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1061    if (r != 0) {
1062      /* Succeeded (either first branch or second branch) */
1063      StgTSO_trec(CurrentTSO) = outer;
1064      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1065      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1066      jump %ENTRY_CODE(Sp(SP_OFF));
1067    } else {
1068      /* Did not commit: re-execute */
1069      W_ new_trec;
1070      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1071      StgTSO_trec(CurrentTSO) = new_trec;
1072      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1073        R1 = StgCatchRetryFrame_alt_code(frame);
1074      } else {
1075        R1 = StgCatchRetryFrame_first_code(frame);
1076      }
1077      jump stg_ap_v_fast;
1078    }
1079 }
1080
1081
1082 // Atomically frame ------------------------------------------------------------
1083
1084 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1085 #if defined(PROFILING)
1086   W_ unused1, W_ unused2,
1087 #endif
1088   "ptr" W_ unused3, "ptr" W_ unused4)
1089 {
1090   W_ frame, trec, valid, next_invariant, q, outer;
1091   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1092
1093   frame = Sp;
1094   trec = StgTSO_trec(CurrentTSO);
1095   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1096
1097   if (outer == NO_TREC) {
1098     /* First time back at the atomically frame -- pick up invariants */
1099     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1100     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1101
1102   } else {
1103     /* Second/subsequent time back at the atomically frame -- abort the
1104      * tx that's checking the invariant and move on to the next one */
1105     StgTSO_trec(CurrentTSO) = outer;
1106     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1107     StgInvariantCheckQueue_my_execution(q) = trec;
1108     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1109     /* Don't free trec -- it's linked from q and will be stashed in the
1110      * invariant if we eventually commit. */
1111     q = StgInvariantCheckQueue_next_queue_entry(q);
1112     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1113     trec = outer;
1114   }
1115
1116   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1117
1118   if (q != END_INVARIANT_CHECK_QUEUE) {
1119     /* We can't commit yet: another invariant to check */
1120     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1121     StgTSO_trec(CurrentTSO) = trec;
1122
1123     next_invariant = StgInvariantCheckQueue_invariant(q);
1124     R1 = StgAtomicInvariant_code(next_invariant);
1125     jump stg_ap_v_fast;
1126
1127   } else {
1128
1129     /* We've got no more invariants to check, try to commit */
1130     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1131     if (valid != 0) {
1132       /* Transaction was valid: commit succeeded */
1133       StgTSO_trec(CurrentTSO) = NO_TREC;
1134       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1135       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1136       jump %ENTRY_CODE(Sp(SP_OFF));
1137     } else {
1138       /* Transaction was not valid: try again */
1139       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1140       StgTSO_trec(CurrentTSO) = trec;
1141       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1142       R1 = StgAtomicallyFrame_code(frame);
1143       jump stg_ap_v_fast;
1144     }
1145   }
1146 }
1147
1148 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1149 #if defined(PROFILING)
1150   W_ unused1, W_ unused2,
1151 #endif
1152   "ptr" W_ unused3, "ptr" W_ unused4)
1153 {
1154   W_ frame, trec, valid;
1155   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1156
1157   frame = Sp;
1158
1159   /* The TSO is currently waiting: should we stop waiting? */
1160   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1161   if (valid != 0) {
1162     /* Previous attempt is still valid: no point trying again yet */
1163           IF_NOT_REG_R1(Sp_adj(-2);
1164                         Sp(1) = stg_NO_FINALIZER_closure;
1165                         Sp(0) = stg_ut_1_0_unreg_info;)
1166     jump stg_block_noregs;
1167   } else {
1168     /* Previous attempt is no longer valid: try again */
1169     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1170     StgTSO_trec(CurrentTSO) = trec;
1171     StgHeader_info(frame) = stg_atomically_frame_info;
1172     R1 = StgAtomicallyFrame_code(frame);
1173     jump stg_ap_v_fast;
1174   }
1175 }
1176
1177 // STM catch frame --------------------------------------------------------------
1178
1179 #ifdef REG_R1
1180 #define SP_OFF 0
1181 #else
1182 #define SP_OFF 1
1183 #endif
1184
1185 /* Catch frames are very similar to update frames, but when entering
1186  * one we just pop the frame off the stack and perform the correct
1187  * kind of return to the activation record underneath us on the stack.
1188  */
1189
1190 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1191 #if defined(PROFILING)
1192   W_ unused1, W_ unused2,
1193 #endif
1194   "ptr" W_ unused3, "ptr" W_ unused4)
1195    {
1196       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1197       W_ r, frame, trec, outer;
1198       frame = Sp;
1199       trec = StgTSO_trec(CurrentTSO);
1200       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1201       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1202       if (r != 0) {
1203         /* Commit succeeded */
1204         StgTSO_trec(CurrentTSO) = outer;
1205         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1206         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1207         jump Sp(SP_OFF);
1208       } else {
1209         /* Commit failed */
1210         W_ new_trec;
1211         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1212         StgTSO_trec(CurrentTSO) = new_trec;
1213         R1 = StgCatchSTMFrame_code(frame);
1214         jump stg_ap_v_fast;
1215       }
1216    }
1217
1218
1219 // Primop definition ------------------------------------------------------------
1220
1221 atomicallyzh_fast
1222 {
1223   W_ frame;
1224   W_ old_trec;
1225   W_ new_trec;
1226   
1227   // stmStartTransaction may allocate
1228   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1229
1230   /* Args: R1 = m :: STM a */
1231   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1232
1233   old_trec = StgTSO_trec(CurrentTSO);
1234
1235   /* Nested transactions are not allowed; raise an exception */
1236   if (old_trec != NO_TREC) {
1237      R1 = base_GHCziIOBase_NestedAtomically_closure;
1238      jump raisezh_fast;
1239   }
1240
1241   /* Set up the atomically frame */
1242   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1243   frame = Sp;
1244
1245   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1246   StgAtomicallyFrame_code(frame) = R1;
1247   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1248
1249   /* Start the memory transcation */
1250   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1251   StgTSO_trec(CurrentTSO) = new_trec;
1252
1253   /* Apply R1 to the realworld token */
1254   jump stg_ap_v_fast;
1255 }
1256
1257
1258 catchSTMzh_fast
1259 {
1260   W_ frame;
1261   
1262   /* Args: R1 :: STM a */
1263   /* Args: R2 :: Exception -> STM a */
1264   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1265
1266   /* Set up the catch frame */
1267   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1268   frame = Sp;
1269
1270   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1271   StgCatchSTMFrame_handler(frame) = R2;
1272   StgCatchSTMFrame_code(frame) = R1;
1273
1274   /* Start a nested transaction to run the body of the try block in */
1275   W_ cur_trec;  
1276   W_ new_trec;
1277   cur_trec = StgTSO_trec(CurrentTSO);
1278   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1279   StgTSO_trec(CurrentTSO) = new_trec;
1280
1281   /* Apply R1 to the realworld token */
1282   jump stg_ap_v_fast;
1283 }
1284
1285
1286 catchRetryzh_fast
1287 {
1288   W_ frame;
1289   W_ new_trec;
1290   W_ trec;
1291
1292   // stmStartTransaction may allocate
1293   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1294
1295   /* Args: R1 :: STM a */
1296   /* Args: R2 :: STM a */
1297   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1298
1299   /* Start a nested transaction within which to run the first code */
1300   trec = StgTSO_trec(CurrentTSO);
1301   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1302   StgTSO_trec(CurrentTSO) = new_trec;
1303
1304   /* Set up the catch-retry frame */
1305   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1306   frame = Sp;
1307   
1308   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1309   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1310   StgCatchRetryFrame_first_code(frame) = R1;
1311   StgCatchRetryFrame_alt_code(frame) = R2;
1312
1313   /* Apply R1 to the realworld token */
1314   jump stg_ap_v_fast;
1315 }
1316
1317
1318 retryzh_fast
1319 {
1320   W_ frame_type;
1321   W_ frame;
1322   W_ trec;
1323   W_ outer;
1324   W_ r;
1325
1326   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1327
1328   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1329 retry_pop_stack:
1330   StgTSO_sp(CurrentTSO) = Sp;
1331   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1332   Sp = StgTSO_sp(CurrentTSO);
1333   frame = Sp;
1334   trec = StgTSO_trec(CurrentTSO);
1335   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1336
1337   if (frame_type == CATCH_RETRY_FRAME) {
1338     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1339     ASSERT(outer != NO_TREC);
1340     // Abort the transaction attempting the current branch
1341     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1342     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1343     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1344       // Retry in the first branch: try the alternative
1345       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1346       StgTSO_trec(CurrentTSO) = trec;
1347       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1348       R1 = StgCatchRetryFrame_alt_code(frame);
1349       jump stg_ap_v_fast;
1350     } else {
1351       // Retry in the alternative code: propagate the retry
1352       StgTSO_trec(CurrentTSO) = outer;
1353       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1354       goto retry_pop_stack;
1355     }
1356   }
1357
1358   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1359   ASSERT(frame_type == ATOMICALLY_FRAME);
1360   if (outer != NO_TREC) {
1361     // We called retry while checking invariants, so abort the current
1362     // invariant check (merging its TVar accesses into the parents read
1363     // set so we'll wait on them)
1364     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1365     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1366     trec = outer;
1367     StgTSO_trec(CurrentTSO) = trec;
1368     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1369   }
1370   ASSERT(outer == NO_TREC);
1371
1372   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1373   if (r != 0) {
1374     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1375     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1376     Sp = frame;
1377     // Fix up the stack in the unregisterised case: the return convention is different.
1378     IF_NOT_REG_R1(Sp_adj(-2); 
1379                   Sp(1) = stg_NO_FINALIZER_closure;
1380                   Sp(0) = stg_ut_1_0_unreg_info;)
1381     R3 = trec; // passing to stmWaitUnblock()
1382     jump stg_block_stmwait;
1383   } else {
1384     // Transaction was not valid: retry immediately
1385     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1386     StgTSO_trec(CurrentTSO) = trec;
1387     R1 = StgAtomicallyFrame_code(frame);
1388     Sp = frame;
1389     jump stg_ap_v_fast;
1390   }
1391 }
1392
1393
1394 checkzh_fast
1395 {
1396   W_ trec, closure;
1397
1398   /* Args: R1 = invariant closure */
1399   MAYBE_GC (R1_PTR, checkzh_fast); 
1400
1401   trec = StgTSO_trec(CurrentTSO);
1402   closure = R1;
1403   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1404                                      trec "ptr",
1405                                      closure "ptr") [];
1406
1407   jump %ENTRY_CODE(Sp(0));
1408 }
1409
1410
1411 newTVarzh_fast
1412 {
1413   W_ tv;
1414   W_ new_value;
1415
1416   /* Args: R1 = initialisation value */
1417
1418   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1419   new_value = R1;
1420   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1421   RET_P(tv);
1422 }
1423
1424
1425 readTVarzh_fast
1426 {
1427   W_ trec;
1428   W_ tvar;
1429   W_ result;
1430
1431   /* Args: R1 = TVar closure */
1432
1433   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1434   trec = StgTSO_trec(CurrentTSO);
1435   tvar = R1;
1436   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1437
1438   RET_P(result);
1439 }
1440
1441
1442 writeTVarzh_fast
1443 {
1444   W_ trec;
1445   W_ tvar;
1446   W_ new_value;
1447   
1448   /* Args: R1 = TVar closure */
1449   /*       R2 = New value    */
1450
1451   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1452   trec = StgTSO_trec(CurrentTSO);
1453   tvar = R1;
1454   new_value = R2;
1455   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1456
1457   jump %ENTRY_CODE(Sp(0));
1458 }
1459
1460
1461 /* -----------------------------------------------------------------------------
1462  * MVar primitives
1463  *
1464  * take & putMVar work as follows.  Firstly, an important invariant:
1465  *
1466  *    If the MVar is full, then the blocking queue contains only
1467  *    threads blocked on putMVar, and if the MVar is empty then the
1468  *    blocking queue contains only threads blocked on takeMVar.
1469  *
1470  * takeMvar:
1471  *    MVar empty : then add ourselves to the blocking queue
1472  *    MVar full  : remove the value from the MVar, and
1473  *                 blocking queue empty     : return
1474  *                 blocking queue non-empty : perform the first blocked putMVar
1475  *                                            from the queue, and wake up the
1476  *                                            thread (MVar is now full again)
1477  *
1478  * putMVar is just the dual of the above algorithm.
1479  *
1480  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1481  * the stack of the thread waiting to do the putMVar.  See
1482  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1483  * the stack layout, and the PerformPut and PerformTake macros below.
1484  *
1485  * It is important that a blocked take or put is woken up with the
1486  * take/put already performed, because otherwise there would be a
1487  * small window of vulnerability where the thread could receive an
1488  * exception and never perform its take or put, and we'd end up with a
1489  * deadlock.
1490  *
1491  * -------------------------------------------------------------------------- */
1492
1493 isEmptyMVarzh_fast
1494 {
1495     /* args: R1 = MVar closure */
1496
1497     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1498         RET_N(1);
1499     } else {
1500         RET_N(0);
1501     }
1502 }
1503
1504 newMVarzh_fast
1505 {
1506     /* args: none */
1507     W_ mvar;
1508
1509     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1510   
1511     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1512     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1513         // MVARs start dirty: generation 0 has no mutable list
1514     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1515     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1516     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1517     RET_P(mvar);
1518 }
1519
1520
1521 /* If R1 isn't available, pass it on the stack */
1522 #ifdef REG_R1
1523 #define PerformTake(tso, value)                         \
1524     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1525     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1526 #else
1527 #define PerformTake(tso, value)                                 \
1528     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1529     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1530 #endif
1531
1532 #define PerformPut(tso,lval)                    \
1533     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1534     lval = W_[StgTSO_sp(tso) - WDS(1)];
1535
1536 takeMVarzh_fast
1537 {
1538     W_ mvar, val, info, tso;
1539
1540     /* args: R1 = MVar closure */
1541     mvar = R1;
1542
1543 #if defined(THREADED_RTS)
1544     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1545 #else
1546     info = GET_INFO(mvar);
1547 #endif
1548         
1549     if (info == stg_MVAR_CLEAN_info) {
1550         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1551     }
1552
1553     /* If the MVar is empty, put ourselves on its blocking queue,
1554      * and wait until we're woken up.
1555      */
1556     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1557         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1558             StgMVar_head(mvar) = CurrentTSO;
1559         } else {
1560             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1561         }
1562         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1563         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1564         StgTSO_block_info(CurrentTSO)  = mvar;
1565         StgMVar_tail(mvar) = CurrentTSO;
1566         
1567         jump stg_block_takemvar;
1568   }
1569
1570   /* we got the value... */
1571   val = StgMVar_value(mvar);
1572
1573   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1574   {
1575       /* There are putMVar(s) waiting... 
1576        * wake up the first thread on the queue
1577        */
1578       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1579
1580       /* actually perform the putMVar for the thread that we just woke up */
1581       tso = StgMVar_head(mvar);
1582       PerformPut(tso,StgMVar_value(mvar));
1583       dirtyTSO(tso);
1584
1585 #if defined(GRAN) || defined(PAR)
1586       /* ToDo: check 2nd arg (mvar) is right */
1587       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1588       StgMVar_head(mvar) = tso;
1589 #else
1590       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1591                                          StgMVar_head(mvar) "ptr") [];
1592       StgMVar_head(mvar) = tso;
1593 #endif
1594
1595       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1596           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1597       }
1598
1599 #if defined(THREADED_RTS)
1600       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1601 #else
1602       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1603 #endif
1604       RET_P(val);
1605   } 
1606   else
1607   {
1608       /* No further putMVars, MVar is now empty */
1609       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1610  
1611 #if defined(THREADED_RTS)
1612       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1613 #else
1614       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1615 #endif
1616
1617       RET_P(val);
1618   }
1619 }
1620
1621
1622 tryTakeMVarzh_fast
1623 {
1624     W_ mvar, val, info, tso;
1625
1626     /* args: R1 = MVar closure */
1627
1628     mvar = R1;
1629
1630 #if defined(THREADED_RTS)
1631     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1632 #else
1633     info = GET_INFO(mvar);
1634 #endif
1635
1636     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1637 #if defined(THREADED_RTS)
1638         unlockClosure(mvar, info);
1639 #endif
1640         /* HACK: we need a pointer to pass back, 
1641          * so we abuse NO_FINALIZER_closure
1642          */
1643         RET_NP(0, stg_NO_FINALIZER_closure);
1644     }
1645
1646     if (info == stg_MVAR_CLEAN_info) {
1647         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1648     }
1649
1650     /* we got the value... */
1651     val = StgMVar_value(mvar);
1652
1653     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1654
1655         /* There are putMVar(s) waiting... 
1656          * wake up the first thread on the queue
1657          */
1658         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1659
1660         /* actually perform the putMVar for the thread that we just woke up */
1661         tso = StgMVar_head(mvar);
1662         PerformPut(tso,StgMVar_value(mvar));
1663         dirtyTSO(tso);
1664
1665 #if defined(GRAN) || defined(PAR)
1666         /* ToDo: check 2nd arg (mvar) is right */
1667         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1668         StgMVar_head(mvar) = tso;
1669 #else
1670         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1671                                            StgMVar_head(mvar) "ptr") [];
1672         StgMVar_head(mvar) = tso;
1673 #endif
1674
1675         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1676             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1677         }
1678 #if defined(THREADED_RTS)
1679         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1680 #else
1681         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1682 #endif
1683     }
1684     else 
1685     {
1686         /* No further putMVars, MVar is now empty */
1687         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1688 #if defined(THREADED_RTS)
1689         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1690 #else
1691         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1692 #endif
1693     }
1694     
1695     RET_NP(1, val);
1696 }
1697
1698
1699 putMVarzh_fast
1700 {
1701     W_ mvar, info, tso;
1702
1703     /* args: R1 = MVar, R2 = value */
1704     mvar = R1;
1705
1706 #if defined(THREADED_RTS)
1707     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1708 #else
1709     info = GET_INFO(mvar);
1710 #endif
1711
1712     if (info == stg_MVAR_CLEAN_info) {
1713         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1714     }
1715
1716     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1717         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1718             StgMVar_head(mvar) = CurrentTSO;
1719         } else {
1720             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1721         }
1722         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1723         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1724         StgTSO_block_info(CurrentTSO)  = mvar;
1725         StgMVar_tail(mvar) = CurrentTSO;
1726         
1727         jump stg_block_putmvar;
1728     }
1729   
1730     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1731
1732         /* There are takeMVar(s) waiting: wake up the first one
1733          */
1734         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1735
1736         /* actually perform the takeMVar */
1737         tso = StgMVar_head(mvar);
1738         PerformTake(tso, R2);
1739         dirtyTSO(tso);
1740       
1741 #if defined(GRAN) || defined(PAR)
1742         /* ToDo: check 2nd arg (mvar) is right */
1743         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1744         StgMVar_head(mvar) = tso;
1745 #else
1746         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1747         StgMVar_head(mvar) = tso;
1748 #endif
1749
1750         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1751             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1752         }
1753
1754 #if defined(THREADED_RTS)
1755         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1756 #else
1757         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1758 #endif
1759         jump %ENTRY_CODE(Sp(0));
1760     }
1761     else
1762     {
1763         /* No further takes, the MVar is now full. */
1764         StgMVar_value(mvar) = R2;
1765
1766 #if defined(THREADED_RTS)
1767         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1768 #else
1769         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1770 #endif
1771         jump %ENTRY_CODE(Sp(0));
1772     }
1773     
1774     /* ToDo: yield afterward for better communication performance? */
1775 }
1776
1777
1778 tryPutMVarzh_fast
1779 {
1780     W_ mvar, info, tso;
1781
1782     /* args: R1 = MVar, R2 = value */
1783     mvar = R1;
1784
1785 #if defined(THREADED_RTS)
1786     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1787 #else
1788     info = GET_INFO(mvar);
1789 #endif
1790
1791     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1792 #if defined(THREADED_RTS)
1793         unlockClosure(mvar, info);
1794 #endif
1795         RET_N(0);
1796     }
1797   
1798     if (info == stg_MVAR_CLEAN_info) {
1799         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1800     }
1801
1802     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1803
1804         /* There are takeMVar(s) waiting: wake up the first one
1805          */
1806         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1807         
1808         /* actually perform the takeMVar */
1809         tso = StgMVar_head(mvar);
1810         PerformTake(tso, R2);
1811         dirtyTSO(tso);
1812       
1813 #if defined(GRAN) || defined(PAR)
1814         /* ToDo: check 2nd arg (mvar) is right */
1815         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1816         StgMVar_head(mvar) = tso;
1817 #else
1818         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1819         StgMVar_head(mvar) = tso;
1820 #endif
1821
1822         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1823             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1824         }
1825
1826 #if defined(THREADED_RTS)
1827         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1828 #else
1829         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1830 #endif
1831     }
1832     else
1833     {
1834         /* No further takes, the MVar is now full. */
1835         StgMVar_value(mvar) = R2;
1836
1837 #if defined(THREADED_RTS)
1838         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1839 #else
1840         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1841 #endif
1842     }
1843     
1844     RET_N(1);
1845     /* ToDo: yield afterward for better communication performance? */
1846 }
1847
1848
1849 /* -----------------------------------------------------------------------------
1850    Stable pointer primitives
1851    -------------------------------------------------------------------------  */
1852
1853 makeStableNamezh_fast
1854 {
1855     W_ index, sn_obj;
1856
1857     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1858   
1859     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1860
1861     /* Is there already a StableName for this heap object?
1862      *  stable_ptr_table is a pointer to an array of snEntry structs.
1863      */
1864     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1865         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1866         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1867         StgStableName_sn(sn_obj) = index;
1868         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1869     } else {
1870         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1871     }
1872     
1873     RET_P(sn_obj);
1874 }
1875
1876
1877 makeStablePtrzh_fast
1878 {
1879     /* Args: R1 = a */
1880     W_ sp;
1881     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1882     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1883     RET_N(sp);
1884 }
1885
1886 deRefStablePtrzh_fast
1887 {
1888     /* Args: R1 = the stable ptr */
1889     W_ r, sp;
1890     sp = R1;
1891     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1892     RET_P(r);
1893 }
1894
1895 /* -----------------------------------------------------------------------------
1896    Bytecode object primitives
1897    -------------------------------------------------------------------------  */
1898
1899 newBCOzh_fast
1900 {
1901     /* R1 = instrs
1902        R2 = literals
1903        R3 = ptrs
1904        R4 = arity
1905        R5 = bitmap array
1906     */
1907     W_ bco, bitmap_arr, bytes, words;
1908     
1909     bitmap_arr = R5;
1910
1911     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1912     bytes = WDS(words);
1913
1914     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1915
1916     bco = Hp - bytes + WDS(1);
1917     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1918     
1919     StgBCO_instrs(bco)     = R1;
1920     StgBCO_literals(bco)   = R2;
1921     StgBCO_ptrs(bco)       = R3;
1922     StgBCO_arity(bco)      = HALF_W_(R4);
1923     StgBCO_size(bco)       = HALF_W_(words);
1924     
1925     // Copy the arity/bitmap info into the BCO
1926     W_ i;
1927     i = 0;
1928 for:
1929     if (i < StgArrWords_words(bitmap_arr)) {
1930         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1931         i = i + 1;
1932         goto for;
1933     }
1934     
1935     RET_P(bco);
1936 }
1937
1938
1939 mkApUpd0zh_fast
1940 {
1941     // R1 = the BCO# for the AP
1942     //  
1943     W_ ap;
1944
1945     // This function is *only* used to wrap zero-arity BCOs in an
1946     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1947     // saturated and always points directly to a FUN or BCO.
1948     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1949            StgBCO_arity(R1) == HALF_W_(0));
1950
1951     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1952     TICK_ALLOC_UP_THK(0, 0);
1953     CCCS_ALLOC(SIZEOF_StgAP);
1954
1955     ap = Hp - SIZEOF_StgAP + WDS(1);
1956     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1957     
1958     StgAP_n_args(ap) = HALF_W_(0);
1959     StgAP_fun(ap) = R1;
1960     
1961     RET_P(ap);
1962 }
1963
1964 unpackClosurezh_fast
1965 {
1966 /* args: R1 = closure to analyze */
1967 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1968
1969     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1970     info  = %GET_STD_INFO(UNTAG(R1));
1971
1972     // Some closures have non-standard layout, so we omit those here.
1973     W_ type;
1974     type = TO_W_(%INFO_TYPE(info));
1975     switch [0 .. N_CLOSURE_TYPES] type {
1976     case THUNK_SELECTOR : {
1977         ptrs = 1;
1978         nptrs = 0;
1979         goto out;
1980     }
1981     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1982          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1983         ptrs = 0;
1984         nptrs = 0;
1985         goto out;
1986     }
1987     default: {
1988         ptrs  = TO_W_(%INFO_PTRS(info)); 
1989         nptrs = TO_W_(%INFO_NPTRS(info));
1990         goto out;
1991     }}
1992 out:
1993
1994     W_ ptrs_arr_sz, nptrs_arr_sz;
1995     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1996     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1997
1998     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1999
2000     W_ clos;
2001     clos = UNTAG(R1);
2002
2003     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2004     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2005
2006     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2007     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2008     p = 0;
2009 for:
2010     if(p < ptrs) {
2011          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2012          p = p + 1;
2013          goto for;
2014     }
2015     
2016     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2017     StgArrWords_words(nptrs_arr) = nptrs;
2018     p = 0;
2019 for2:
2020     if(p < nptrs) {
2021          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2022          p = p + 1;
2023          goto for2;
2024     }
2025     RET_NPP(info, ptrs_arr, nptrs_arr);
2026 }
2027
2028 /* -----------------------------------------------------------------------------
2029    Thread I/O blocking primitives
2030    -------------------------------------------------------------------------- */
2031
2032 /* Add a thread to the end of the blocked queue. (C-- version of the C
2033  * macro in Schedule.h).
2034  */
2035 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2036     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
2037     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2038       W_[blocked_queue_hd] = tso;                       \
2039     } else {                                            \
2040       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
2041     }                                                   \
2042     W_[blocked_queue_tl] = tso;
2043
2044 waitReadzh_fast
2045 {
2046     /* args: R1 */
2047 #ifdef THREADED_RTS
2048     foreign "C" barf("waitRead# on threaded RTS") never returns;
2049 #else
2050
2051     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2052     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2053     StgTSO_block_info(CurrentTSO) = R1;
2054     // No locking - we're not going to use this interface in the
2055     // threaded RTS anyway.
2056     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2057     jump stg_block_noregs;
2058 #endif
2059 }
2060
2061 waitWritezh_fast
2062 {
2063     /* args: R1 */
2064 #ifdef THREADED_RTS
2065     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2066 #else
2067
2068     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2069     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2070     StgTSO_block_info(CurrentTSO) = R1;
2071     // No locking - we're not going to use this interface in the
2072     // threaded RTS anyway.
2073     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2074     jump stg_block_noregs;
2075 #endif
2076 }
2077
2078
2079 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2080 delayzh_fast
2081 {
2082 #ifdef mingw32_HOST_OS
2083     W_ ares;
2084     CInt reqID;
2085 #else
2086     W_ t, prev, target;
2087 #endif
2088
2089 #ifdef THREADED_RTS
2090     foreign "C" barf("delay# on threaded RTS") never returns;
2091 #else
2092
2093     /* args: R1 (microsecond delay amount) */
2094     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2095     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2096
2097 #ifdef mingw32_HOST_OS
2098
2099     /* could probably allocate this on the heap instead */
2100     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2101                                             stg_delayzh_malloc_str);
2102     (reqID) = foreign "C" addDelayRequest(R1);
2103     StgAsyncIOResult_reqID(ares)   = reqID;
2104     StgAsyncIOResult_len(ares)     = 0;
2105     StgAsyncIOResult_errCode(ares) = 0;
2106     StgTSO_block_info(CurrentTSO)  = ares;
2107
2108     /* Having all async-blocked threads reside on the blocked_queue
2109      * simplifies matters, so change the status to OnDoProc put the
2110      * delayed thread on the blocked_queue.
2111      */
2112     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2113     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2114     jump stg_block_async_void;
2115
2116 #else
2117
2118     W_ time;
2119     W_ divisor;
2120     (time) = foreign "C" getourtimeofday() [R1];
2121     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2122     if (divisor == 0) {
2123         divisor = 50;
2124     }
2125     divisor = divisor * 1000;
2126     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2127            + time + 1; /* Add 1 as getourtimeofday rounds down */
2128     StgTSO_block_info(CurrentTSO) = target;
2129
2130     /* Insert the new thread in the sleeping queue. */
2131     prev = NULL;
2132     t = W_[sleeping_queue];
2133 while:
2134     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2135         prev = t;
2136         t = StgTSO_link(t);
2137         goto while;
2138     }
2139
2140     StgTSO_link(CurrentTSO) = t;
2141     if (prev == NULL) {
2142         W_[sleeping_queue] = CurrentTSO;
2143     } else {
2144         StgTSO_link(prev) = CurrentTSO;
2145     }
2146     jump stg_block_noregs;
2147 #endif
2148 #endif /* !THREADED_RTS */
2149 }
2150
2151
2152 #ifdef mingw32_HOST_OS
2153 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2154 asyncReadzh_fast
2155 {
2156     W_ ares;
2157     CInt reqID;
2158
2159 #ifdef THREADED_RTS
2160     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2161 #else
2162
2163     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2164     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2165     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2166
2167     /* could probably allocate this on the heap instead */
2168     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2169                                             stg_asyncReadzh_malloc_str)
2170                         [R1,R2,R3,R4];
2171     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2172     StgAsyncIOResult_reqID(ares)   = reqID;
2173     StgAsyncIOResult_len(ares)     = 0;
2174     StgAsyncIOResult_errCode(ares) = 0;
2175     StgTSO_block_info(CurrentTSO)  = ares;
2176     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2177     jump stg_block_async;
2178 #endif
2179 }
2180
2181 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2182 asyncWritezh_fast
2183 {
2184     W_ ares;
2185     CInt reqID;
2186
2187 #ifdef THREADED_RTS
2188     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2189 #else
2190
2191     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2192     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2193     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2194
2195     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2196                                             stg_asyncWritezh_malloc_str)
2197                         [R1,R2,R3,R4];
2198     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2199
2200     StgAsyncIOResult_reqID(ares)   = reqID;
2201     StgAsyncIOResult_len(ares)     = 0;
2202     StgAsyncIOResult_errCode(ares) = 0;
2203     StgTSO_block_info(CurrentTSO)  = ares;
2204     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2205     jump stg_block_async;
2206 #endif
2207 }
2208
2209 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2210 asyncDoProczh_fast
2211 {
2212     W_ ares;
2213     CInt reqID;
2214
2215 #ifdef THREADED_RTS
2216     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2217 #else
2218
2219     /* args: R1 = proc, R2 = param */
2220     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2221     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2222
2223     /* could probably allocate this on the heap instead */
2224     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2225                                             stg_asyncDoProczh_malloc_str) 
2226                                 [R1,R2];
2227     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2228     StgAsyncIOResult_reqID(ares)   = reqID;
2229     StgAsyncIOResult_len(ares)     = 0;
2230     StgAsyncIOResult_errCode(ares) = 0;
2231     StgTSO_block_info(CurrentTSO) = ares;
2232     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2233     jump stg_block_async;
2234 #endif
2235 }
2236 #endif
2237
2238 // noDuplicate# tries to ensure that none of the thunks under
2239 // evaluation by the current thread are also under evaluation by
2240 // another thread.  It relies on *both* threads doing noDuplicate#;
2241 // the second one will get blocked if they are duplicating some work.
2242 noDuplicatezh_fast
2243 {
2244     SAVE_THREAD_STATE();
2245     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2246     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2247     
2248     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2249         jump stg_threadFinished;
2250     } else {
2251         LOAD_THREAD_STATE();
2252         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2253         jump %ENTRY_CODE(Sp(0));
2254     }
2255 }
2256
2257 getApStackValzh_fast
2258 {
2259    W_ ap_stack, offset, val, ok;
2260
2261    /* args: R1 = AP_STACK, R2 = offset */
2262    ap_stack = R1;
2263    offset   = R2;
2264
2265    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2266         ok = 1;
2267         val = StgAP_STACK_payload(ap_stack,offset); 
2268    } else {
2269         ok = 0;
2270         val = R1;
2271    }
2272    RET_NP(ok,val);
2273 }