atomicModifyIORef: use a local cas() instead of the global lock
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, f, z, x, y, r, h;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235    mv = R1;
236    f = R2;
237
238    TICK_ALLOC_THUNK_2();
239    CCCS_ALLOC(THUNK_2_SIZE);
240    z = Hp - THUNK_2_SIZE + WDS(1);
241    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
242    LDV_RECORD_CREATE(z);
243    StgThunk_payload(z,0) = f;
244
245    TICK_ALLOC_THUNK_1();
246    CCCS_ALLOC(THUNK_1_SIZE);
247    y = z - THUNK_1_SIZE;
248    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
249    LDV_RECORD_CREATE(y);
250    StgThunk_payload(y,0) = z;
251
252    TICK_ALLOC_THUNK_1();
253    CCCS_ALLOC(THUNK_1_SIZE);
254    r = y - THUNK_1_SIZE;
255    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
256    LDV_RECORD_CREATE(r);
257    StgThunk_payload(r,0) = z;
258
259  retry:
260    x = StgMutVar_var(mv);
261    StgThunk_payload(z,1) = x;
262 #ifdef THREADED_RTS
263    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
264    if (h != x) { goto retry; }
265 #else
266    StgMutVar_var(mv) = y;
267 #endif
268
269    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
270      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
271    }
272
273    RET_P(r);
274 }
275
276 /* -----------------------------------------------------------------------------
277    Weak Pointer Primitives
278    -------------------------------------------------------------------------- */
279
280 STRING(stg_weak_msg,"New weak pointer at %p\n")
281
282 mkWeakzh_fast
283 {
284   /* R1 = key
285      R2 = value
286      R3 = finalizer (or NULL)
287   */
288   W_ w;
289
290   if (R3 == NULL) {
291     R3 = stg_NO_FINALIZER_closure;
292   }
293
294   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
295
296   w = Hp - SIZEOF_StgWeak + WDS(1);
297   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
298
299   StgWeak_key(w)       = R1;
300   StgWeak_value(w)     = R2;
301   StgWeak_finalizer(w) = R3;
302
303   StgWeak_link(w)       = W_[weak_ptr_list];
304   W_[weak_ptr_list]     = w;
305
306   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
307
308   RET_P(w);
309 }
310
311
312 finalizzeWeakzh_fast
313 {
314   /* R1 = weak ptr
315    */
316   W_ w, f;
317
318   w = R1;
319
320   // already dead?
321   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
322       RET_NP(0,stg_NO_FINALIZER_closure);
323   }
324
325   // kill it
326 #ifdef PROFILING
327   // @LDV profiling
328   // A weak pointer is inherently used, so we do not need to call
329   // LDV_recordDead_FILL_SLOP_DYNAMIC():
330   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
331   // or, LDV_recordDead():
332   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
333   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
334   // large as weak pointers, so there is no need to fill the slop, either.
335   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
336 #endif
337
338   //
339   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
340   //
341   SET_INFO(w,stg_DEAD_WEAK_info);
342   LDV_RECORD_CREATE(w);
343
344   f = StgWeak_finalizer(w);
345   StgDeadWeak_link(w) = StgWeak_link(w);
346
347   /* return the finalizer */
348   if (f == stg_NO_FINALIZER_closure) {
349       RET_NP(0,stg_NO_FINALIZER_closure);
350   } else {
351       RET_NP(1,f);
352   }
353 }
354
355 deRefWeakzh_fast
356 {
357   /* R1 = weak ptr */
358   W_ w, code, val;
359
360   w = R1;
361   if (GET_INFO(w) == stg_WEAK_info) {
362     code = 1;
363     val = StgWeak_value(w);
364   } else {
365     code = 0;
366     val = w;
367   }
368   RET_NP(code,val);
369 }
370
371 /* -----------------------------------------------------------------------------
372    Arbitrary-precision Integer operations.
373
374    There are some assumptions in this code that mp_limb_t == W_.  This is
375    the case for all the platforms that GHC supports, currently.
376    -------------------------------------------------------------------------- */
377
378 int2Integerzh_fast
379 {
380    /* arguments: R1 = Int# */
381
382    W_ val, s, p;        /* to avoid aliasing */
383
384    val = R1;
385    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
386
387    p = Hp - SIZEOF_StgArrWords;
388    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
389    StgArrWords_words(p) = 1;
390
391    /* mpz_set_si is inlined here, makes things simpler */
392    if (%lt(val,0)) { 
393         s  = -1;
394         Hp(0) = -val;
395    } else { 
396      if (%gt(val,0)) {
397         s = 1;
398         Hp(0) = val;
399      } else {
400         s = 0;
401      }
402   }
403
404    /* returns (# size  :: Int#, 
405                  data  :: ByteArray# 
406                #)
407    */
408    RET_NP(s,p);
409 }
410
411 word2Integerzh_fast
412 {
413    /* arguments: R1 = Word# */
414
415    W_ val, s, p;        /* to avoid aliasing */
416
417    val = R1;
418
419    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
420
421    p = Hp - SIZEOF_StgArrWords;
422    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
423    StgArrWords_words(p) = 1;
424
425    if (val != 0) {
426         s = 1;
427         W_[Hp] = val;
428    } else {
429         s = 0;
430    }
431
432    /* returns (# size  :: Int#, 
433                  data  :: ByteArray# #)
434    */
435    RET_NP(s,p);
436 }
437
438
439 /*
440  * 'long long' primops for converting to/from Integers.
441  */
442
443 #ifdef SUPPORT_LONG_LONGS
444
445 int64ToIntegerzh_fast
446 {
447    /* arguments: L1 = Int64# */
448
449    L_ val;
450    W_ hi, lo, s, neg, words_needed, p;
451
452    val = L1;
453    neg = 0;
454
455    hi = TO_W_(val >> 32);
456    lo = TO_W_(val);
457
458    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
459        // minimum is one word
460        words_needed = 1;
461    } else { 
462        words_needed = 2;
463    }
464
465    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
466                NO_PTRS, int64ToIntegerzh_fast );
467
468    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
469    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
470    StgArrWords_words(p) = words_needed;
471
472    if ( %lt(hi,0) ) {
473      neg = 1;
474      lo = -lo;
475      if(lo == 0) {
476        hi = -hi;
477      } else {
478        hi = -hi - 1;
479      }
480    }
481
482    if ( words_needed == 2 )  { 
483       s = 2;
484       Hp(-1) = lo;
485       Hp(0) = hi;
486    } else { 
487        if ( lo != 0 ) {
488            s = 1;
489            Hp(0) = lo;
490        } else /* val==0 */  {
491            s = 0;
492        }
493    }
494    if ( neg != 0 ) {
495         s = -s;
496    }
497
498    /* returns (# size  :: Int#, 
499                  data  :: ByteArray# #)
500    */
501    RET_NP(s,p);
502 }
503 word64ToIntegerzh_fast
504 {
505    /* arguments: L1 = Word64# */
506
507    L_ val;
508    W_ hi, lo, s, words_needed, p;
509
510    val = L1;
511    hi = TO_W_(val >> 32);
512    lo = TO_W_(val);
513
514    if ( hi != 0 ) {
515       words_needed = 2;
516    } else {
517       words_needed = 1;
518    }
519
520    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
521                NO_PTRS, word64ToIntegerzh_fast );
522
523    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
524    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
525    StgArrWords_words(p) = words_needed;
526
527    if ( hi != 0 ) { 
528      s = 2;
529      Hp(-1) = lo;
530      Hp(0)  = hi;
531    } else {
532       if ( lo != 0 ) {
533         s = 1;
534         Hp(0) = lo;
535      } else /* val==0 */  {
536       s = 0;
537      }
538   }
539
540    /* returns (# size  :: Int#, 
541                  data  :: ByteArray# #)
542    */
543    RET_NP(s,p);
544 }
545
546
547
548 #endif /* SUPPORT_LONG_LONGS */
549
550 /* ToDo: this is shockingly inefficient */
551
552 #ifndef THREADED_RTS
553 section "bss" {
554   mp_tmp1:
555     bits8 [SIZEOF_MP_INT];
556 }
557
558 section "bss" {
559   mp_tmp2:
560     bits8 [SIZEOF_MP_INT];
561 }
562
563 section "bss" {
564   mp_result1:
565     bits8 [SIZEOF_MP_INT];
566 }
567
568 section "bss" {
569   mp_result2:
570     bits8 [SIZEOF_MP_INT];
571 }
572 #endif
573
574 #ifdef THREADED_RTS
575 #define FETCH_MP_TEMP(X) \
576 W_ X; \
577 X = BaseReg + (OFFSET_StgRegTable_r ## X);
578 #else
579 #define FETCH_MP_TEMP(X) /* Nothing */
580 #endif
581
582 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
583 name                                                                    \
584 {                                                                       \
585   CInt s1, s2;                                                          \
586   W_ d1, d2;                                                            \
587   FETCH_MP_TEMP(mp_tmp1);                                               \
588   FETCH_MP_TEMP(mp_tmp2);                                               \
589   FETCH_MP_TEMP(mp_result1)                                             \
590   FETCH_MP_TEMP(mp_result2);                                            \
591                                                                         \
592   /* call doYouWantToGC() */                                            \
593   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
594                                                                         \
595   s1 = W_TO_INT(R1);                                                    \
596   d1 = R2;                                                              \
597   s2 = W_TO_INT(R3);                                                    \
598   d2 = R4;                                                              \
599                                                                         \
600   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
601   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
602   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
603   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
604   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
605   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
606                                                                         \
607   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
608                                                                         \
609   /* Perform the operation */                                           \
610   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
611                                                                         \
612   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
613          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
614 }
615
616 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
617 name                                                                    \
618 {                                                                       \
619   CInt s1;                                                              \
620   W_ d1;                                                                \
621   FETCH_MP_TEMP(mp_tmp1);                                               \
622   FETCH_MP_TEMP(mp_result1)                                             \
623                                                                         \
624   /* call doYouWantToGC() */                                            \
625   MAYBE_GC(R2_PTR, name);                                               \
626                                                                         \
627   d1 = R2;                                                              \
628   s1 = W_TO_INT(R1);                                                    \
629                                                                         \
630   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
631   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
632   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
633                                                                         \
634   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
635                                                                         \
636   /* Perform the operation */                                           \
637   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
638                                                                         \
639   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
640          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
641 }
642
643 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
644 name                                                                                    \
645 {                                                                                       \
646   CInt s1, s2;                                                                          \
647   W_ d1, d2;                                                                            \
648   FETCH_MP_TEMP(mp_tmp1);                                                               \
649   FETCH_MP_TEMP(mp_tmp2);                                                               \
650   FETCH_MP_TEMP(mp_result1)                                                             \
651   FETCH_MP_TEMP(mp_result2)                                                             \
652                                                                                         \
653   /* call doYouWantToGC() */                                                            \
654   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
655                                                                                         \
656   s1 = W_TO_INT(R1);                                                                    \
657   d1 = R2;                                                                              \
658   s2 = W_TO_INT(R3);                                                                    \
659   d2 = R4;                                                                              \
660                                                                                         \
661   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
662   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
663   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
664   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
665   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
666   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
667                                                                                         \
668   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
669   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
670                                                                                         \
671   /* Perform the operation */                                                           \
672   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
673                                                                                         \
674   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
675            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
676            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
677            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
678 }
679
680 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
681 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
682 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
683 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
684 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
685 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
686 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
687 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
688 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
689 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
690 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
691
692 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
693 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
694
695 #ifndef THREADED_RTS
696 section "bss" {
697   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
698 }
699 #endif
700
701 gcdIntzh_fast
702 {
703     /* R1 = the first Int#; R2 = the second Int# */
704     W_ r; 
705     FETCH_MP_TEMP(mp_tmp_w);
706
707     W_[mp_tmp_w] = R1;
708     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
709
710     R1 = r;
711     /* Result parked in R1, return via info-pointer at TOS */
712     jump %ENTRY_CODE(Sp(0));
713 }
714
715
716 gcdIntegerIntzh_fast
717 {
718     /* R1 = s1; R2 = d1; R3 = the int */
719     W_ s1;
720     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
721     R1 = s1;
722     
723     /* Result parked in R1, return via info-pointer at TOS */
724     jump %ENTRY_CODE(Sp(0));
725 }
726
727
728 cmpIntegerIntzh_fast
729 {
730     /* R1 = s1; R2 = d1; R3 = the int */
731     W_ usize, vsize, v_digit, u_digit;
732
733     usize = R1;
734     vsize = 0;
735     v_digit = R3;
736
737     // paraphrased from __gmpz_cmp_si() in the GMP sources
738     if (%gt(v_digit,0)) {
739         vsize = 1;
740     } else { 
741         if (%lt(v_digit,0)) {
742             vsize = -1;
743             v_digit = -v_digit;
744         }
745     }
746
747     if (usize != vsize) {
748         R1 = usize - vsize; 
749         jump %ENTRY_CODE(Sp(0));
750     }
751
752     if (usize == 0) {
753         R1 = 0; 
754         jump %ENTRY_CODE(Sp(0));
755     }
756
757     u_digit = W_[BYTE_ARR_CTS(R2)];
758
759     if (u_digit == v_digit) {
760         R1 = 0; 
761         jump %ENTRY_CODE(Sp(0));
762     }
763
764     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
765         R1 = usize; 
766     } else {
767         R1 = -usize; 
768     }
769
770     jump %ENTRY_CODE(Sp(0));
771 }
772
773 cmpIntegerzh_fast
774 {
775     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
776     W_ usize, vsize, size, up, vp;
777     CInt cmp;
778
779     // paraphrased from __gmpz_cmp() in the GMP sources
780     usize = R1;
781     vsize = R3;
782
783     if (usize != vsize) {
784         R1 = usize - vsize; 
785         jump %ENTRY_CODE(Sp(0));
786     }
787
788     if (usize == 0) {
789         R1 = 0; 
790         jump %ENTRY_CODE(Sp(0));
791     }
792
793     if (%lt(usize,0)) { // NB. not <, which is unsigned
794         size = -usize;
795     } else {
796         size = usize;
797     }
798
799     up = BYTE_ARR_CTS(R2);
800     vp = BYTE_ARR_CTS(R4);
801
802     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
803
804     if (cmp == 0 :: CInt) {
805         R1 = 0; 
806         jump %ENTRY_CODE(Sp(0));
807     }
808
809     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
810         R1 = 1;
811     } else {
812         R1 = (-1); 
813     }
814     /* Result parked in R1, return via info-pointer at TOS */
815     jump %ENTRY_CODE(Sp(0));
816 }
817
818 integer2Intzh_fast
819 {
820     /* R1 = s; R2 = d */
821     W_ r, s;
822
823     s = R1;
824     if (s == 0) {
825         r = 0;
826     } else {
827         r = W_[R2 + SIZEOF_StgArrWords];
828         if (%lt(s,0)) {
829             r = -r;
830         }
831     }
832     /* Result parked in R1, return via info-pointer at TOS */
833     R1 = r;
834     jump %ENTRY_CODE(Sp(0));
835 }
836
837 integer2Wordzh_fast
838 {
839   /* R1 = s; R2 = d */
840   W_ r, s;
841
842   s = R1;
843   if (s == 0) {
844     r = 0;
845   } else {
846     r = W_[R2 + SIZEOF_StgArrWords];
847     if (%lt(s,0)) {
848         r = -r;
849     }
850   }
851   /* Result parked in R1, return via info-pointer at TOS */
852   R1 = r;
853   jump %ENTRY_CODE(Sp(0));
854 }
855
856 decodeFloatzh_fast
857
858     W_ p;
859     F_ arg;
860     FETCH_MP_TEMP(mp_tmp1);
861     FETCH_MP_TEMP(mp_tmp_w);
862     
863     /* arguments: F1 = Float# */
864     arg = F1;
865     
866     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
867     
868     /* Be prepared to tell Lennart-coded __decodeFloat
869        where mantissa._mp_d can be put (it does not care about the rest) */
870     p = Hp - SIZEOF_StgArrWords;
871     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
872     StgArrWords_words(p) = 1;
873     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
874     
875     /* Perform the operation */
876     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
877     
878     /* returns: (Int# (expn), Int#, ByteArray#) */
879     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
880 }
881
882 decodeFloatzuIntzh_fast
883
884     W_ p;
885     F_ arg;
886     FETCH_MP_TEMP(mp_tmp1);
887     FETCH_MP_TEMP(mp_tmp_w);
888     
889     /* arguments: F1 = Float# */
890     arg = F1;
891     
892     /* Perform the operation */
893     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
894     
895     /* returns: (Int# (mantissa), Int# (exponent)) */
896     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
897 }
898
899 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
900 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
901
902 decodeDoublezh_fast
903
904     D_ arg;
905     W_ p;
906     FETCH_MP_TEMP(mp_tmp1);
907     FETCH_MP_TEMP(mp_tmp_w);
908
909     /* arguments: D1 = Double# */
910     arg = D1;
911
912     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
913     
914     /* Be prepared to tell Lennart-coded __decodeDouble
915        where mantissa.d can be put (it does not care about the rest) */
916     p = Hp - ARR_SIZE + WDS(1);
917     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
918     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
919     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
920
921     /* Perform the operation */
922     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
923     
924     /* returns: (Int# (expn), Int#, ByteArray#) */
925     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
926 }
927
928 decodeDoublezu2Intzh_fast
929
930     D_ arg;
931     W_ p;
932     FETCH_MP_TEMP(mp_tmp1);
933     FETCH_MP_TEMP(mp_tmp2);
934     FETCH_MP_TEMP(mp_result1);
935     FETCH_MP_TEMP(mp_result2);
936
937     /* arguments: D1 = Double# */
938     arg = D1;
939
940     /* Perform the operation */
941     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
942                                     mp_result1 "ptr", mp_result2 "ptr",
943                                     arg) [];
944
945     /* returns:
946        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
947     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
948 }
949
950 /* -----------------------------------------------------------------------------
951  * Concurrency primitives
952  * -------------------------------------------------------------------------- */
953
954 forkzh_fast
955 {
956   /* args: R1 = closure to spark */
957
958   MAYBE_GC(R1_PTR, forkzh_fast);
959
960   W_ closure;
961   W_ threadid;
962   closure = R1;
963
964   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
965                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
966                                 closure "ptr") [];
967
968   /* start blocked if the current thread is blocked */
969   StgTSO_flags(threadid) = 
970      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
971                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
972
973   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
974
975   // switch at the earliest opportunity
976   Capability_context_switch(MyCapability()) = 1 :: CInt;
977   
978   RET_P(threadid);
979 }
980
981 forkOnzh_fast
982 {
983   /* args: R1 = cpu, R2 = closure to spark */
984
985   MAYBE_GC(R2_PTR, forkOnzh_fast);
986
987   W_ cpu;
988   W_ closure;
989   W_ threadid;
990   cpu = R1;
991   closure = R2;
992
993   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
994                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
995                                 closure "ptr") [];
996
997   /* start blocked if the current thread is blocked */
998   StgTSO_flags(threadid) = 
999      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1000                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1001
1002   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1003
1004   // switch at the earliest opportunity
1005   Capability_context_switch(MyCapability()) = 1 :: CInt;
1006   
1007   RET_P(threadid);
1008 }
1009
1010 yieldzh_fast
1011 {
1012   jump stg_yield_noregs;
1013 }
1014
1015 myThreadIdzh_fast
1016 {
1017   /* no args. */
1018   RET_P(CurrentTSO);
1019 }
1020
1021 labelThreadzh_fast
1022 {
1023   /* args: 
1024         R1 = ThreadId#
1025         R2 = Addr# */
1026 #ifdef DEBUG
1027   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1028 #endif
1029   jump %ENTRY_CODE(Sp(0));
1030 }
1031
1032 isCurrentThreadBoundzh_fast
1033 {
1034   /* no args */
1035   W_ r;
1036   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1037   RET_N(r);
1038 }
1039
1040 threadStatuszh_fast
1041 {
1042     /* args: R1 :: ThreadId# */
1043     W_ tso;
1044     W_ why_blocked;
1045     W_ what_next;
1046     W_ ret;
1047
1048     tso = R1;
1049     loop:
1050       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1051           tso = StgTSO__link(tso);
1052           goto loop;
1053       }
1054
1055     what_next   = TO_W_(StgTSO_what_next(tso));
1056     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1057     // Note: these two reads are not atomic, so they might end up
1058     // being inconsistent.  It doesn't matter, since we
1059     // only return one or the other.  If we wanted to return the
1060     // contents of block_info too, then we'd have to do some synchronisation.
1061
1062     if (what_next == ThreadComplete) {
1063         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1064     } else {
1065         if (what_next == ThreadKilled) {
1066             ret = 17;
1067         } else {
1068             ret = why_blocked;
1069         }
1070     }
1071     RET_N(ret);
1072 }
1073
1074 /* -----------------------------------------------------------------------------
1075  * TVar primitives
1076  * -------------------------------------------------------------------------- */
1077
1078 #define SP_OFF 0
1079
1080 // Catch retry frame ------------------------------------------------------------
1081
1082 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1083 #if defined(PROFILING)
1084   W_ unused1, W_ unused2,
1085 #endif
1086   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1087 {
1088    W_ r, frame, trec, outer;
1089
1090    frame = Sp;
1091    trec = StgTSO_trec(CurrentTSO);
1092    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1093    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1094    if (r != 0) {
1095      /* Succeeded (either first branch or second branch) */
1096      StgTSO_trec(CurrentTSO) = outer;
1097      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1098      jump %ENTRY_CODE(Sp(SP_OFF));
1099    } else {
1100      /* Did not commit: re-execute */
1101      W_ new_trec;
1102      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1103      StgTSO_trec(CurrentTSO) = new_trec;
1104      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1105        R1 = StgCatchRetryFrame_alt_code(frame);
1106      } else {
1107        R1 = StgCatchRetryFrame_first_code(frame);
1108      }
1109      jump stg_ap_v_fast;
1110    }
1111 }
1112
1113
1114 // Atomically frame ------------------------------------------------------------
1115
1116 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1117 #if defined(PROFILING)
1118   W_ unused1, W_ unused2,
1119 #endif
1120   "ptr" W_ unused3, "ptr" W_ unused4)
1121 {
1122   W_ frame, trec, valid, next_invariant, q, outer;
1123
1124   frame = Sp;
1125   trec = StgTSO_trec(CurrentTSO);
1126   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1127
1128   if (outer == NO_TREC) {
1129     /* First time back at the atomically frame -- pick up invariants */
1130     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1131     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1132
1133   } else {
1134     /* Second/subsequent time back at the atomically frame -- abort the
1135      * tx that's checking the invariant and move on to the next one */
1136     StgTSO_trec(CurrentTSO) = outer;
1137     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1138     StgInvariantCheckQueue_my_execution(q) = trec;
1139     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1140     /* Don't free trec -- it's linked from q and will be stashed in the
1141      * invariant if we eventually commit. */
1142     q = StgInvariantCheckQueue_next_queue_entry(q);
1143     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1144     trec = outer;
1145   }
1146
1147   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1148
1149   if (q != END_INVARIANT_CHECK_QUEUE) {
1150     /* We can't commit yet: another invariant to check */
1151     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1152     StgTSO_trec(CurrentTSO) = trec;
1153
1154     next_invariant = StgInvariantCheckQueue_invariant(q);
1155     R1 = StgAtomicInvariant_code(next_invariant);
1156     jump stg_ap_v_fast;
1157
1158   } else {
1159
1160     /* We've got no more invariants to check, try to commit */
1161     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1162     if (valid != 0) {
1163       /* Transaction was valid: commit succeeded */
1164       StgTSO_trec(CurrentTSO) = NO_TREC;
1165       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1166       jump %ENTRY_CODE(Sp(SP_OFF));
1167     } else {
1168       /* Transaction was not valid: try again */
1169       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1170       StgTSO_trec(CurrentTSO) = trec;
1171       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1172       R1 = StgAtomicallyFrame_code(frame);
1173       jump stg_ap_v_fast;
1174     }
1175   }
1176 }
1177
1178 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1179 #if defined(PROFILING)
1180   W_ unused1, W_ unused2,
1181 #endif
1182   "ptr" W_ unused3, "ptr" W_ unused4)
1183 {
1184   W_ frame, trec, valid;
1185
1186   frame = Sp;
1187
1188   /* The TSO is currently waiting: should we stop waiting? */
1189   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1190   if (valid != 0) {
1191     /* Previous attempt is still valid: no point trying again yet */
1192     jump stg_block_noregs;
1193   } else {
1194     /* Previous attempt is no longer valid: try again */
1195     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1196     StgTSO_trec(CurrentTSO) = trec;
1197     StgHeader_info(frame) = stg_atomically_frame_info;
1198     R1 = StgAtomicallyFrame_code(frame);
1199     jump stg_ap_v_fast;
1200   }
1201 }
1202
1203 // STM catch frame --------------------------------------------------------------
1204
1205 #define SP_OFF 0
1206
1207 /* Catch frames are very similar to update frames, but when entering
1208  * one we just pop the frame off the stack and perform the correct
1209  * kind of return to the activation record underneath us on the stack.
1210  */
1211
1212 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1213 #if defined(PROFILING)
1214   W_ unused1, W_ unused2,
1215 #endif
1216   "ptr" W_ unused3, "ptr" W_ unused4)
1217    {
1218       W_ r, frame, trec, outer;
1219       frame = Sp;
1220       trec = StgTSO_trec(CurrentTSO);
1221       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1222       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1223       if (r != 0) {
1224         /* Commit succeeded */
1225         StgTSO_trec(CurrentTSO) = outer;
1226         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1227         jump Sp(SP_OFF);
1228       } else {
1229         /* Commit failed */
1230         W_ new_trec;
1231         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1232         StgTSO_trec(CurrentTSO) = new_trec;
1233         R1 = StgCatchSTMFrame_code(frame);
1234         jump stg_ap_v_fast;
1235       }
1236    }
1237
1238
1239 // Primop definition ------------------------------------------------------------
1240
1241 atomicallyzh_fast
1242 {
1243   W_ frame;
1244   W_ old_trec;
1245   W_ new_trec;
1246   
1247   // stmStartTransaction may allocate
1248   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1249
1250   /* Args: R1 = m :: STM a */
1251   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1252
1253   old_trec = StgTSO_trec(CurrentTSO);
1254
1255   /* Nested transactions are not allowed; raise an exception */
1256   if (old_trec != NO_TREC) {
1257      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1258      jump raisezh_fast;
1259   }
1260
1261   /* Set up the atomically frame */
1262   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1263   frame = Sp;
1264
1265   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1266   StgAtomicallyFrame_code(frame) = R1;
1267   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1268
1269   /* Start the memory transcation */
1270   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1271   StgTSO_trec(CurrentTSO) = new_trec;
1272
1273   /* Apply R1 to the realworld token */
1274   jump stg_ap_v_fast;
1275 }
1276
1277
1278 catchSTMzh_fast
1279 {
1280   W_ frame;
1281   
1282   /* Args: R1 :: STM a */
1283   /* Args: R2 :: Exception -> STM a */
1284   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1285
1286   /* Set up the catch frame */
1287   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1288   frame = Sp;
1289
1290   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1291   StgCatchSTMFrame_handler(frame) = R2;
1292   StgCatchSTMFrame_code(frame) = R1;
1293
1294   /* Start a nested transaction to run the body of the try block in */
1295   W_ cur_trec;  
1296   W_ new_trec;
1297   cur_trec = StgTSO_trec(CurrentTSO);
1298   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1299   StgTSO_trec(CurrentTSO) = new_trec;
1300
1301   /* Apply R1 to the realworld token */
1302   jump stg_ap_v_fast;
1303 }
1304
1305
1306 catchRetryzh_fast
1307 {
1308   W_ frame;
1309   W_ new_trec;
1310   W_ trec;
1311
1312   // stmStartTransaction may allocate
1313   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1314
1315   /* Args: R1 :: STM a */
1316   /* Args: R2 :: STM a */
1317   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1318
1319   /* Start a nested transaction within which to run the first code */
1320   trec = StgTSO_trec(CurrentTSO);
1321   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1322   StgTSO_trec(CurrentTSO) = new_trec;
1323
1324   /* Set up the catch-retry frame */
1325   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1326   frame = Sp;
1327   
1328   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1329   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1330   StgCatchRetryFrame_first_code(frame) = R1;
1331   StgCatchRetryFrame_alt_code(frame) = R2;
1332
1333   /* Apply R1 to the realworld token */
1334   jump stg_ap_v_fast;
1335 }
1336
1337
1338 retryzh_fast
1339 {
1340   W_ frame_type;
1341   W_ frame;
1342   W_ trec;
1343   W_ outer;
1344   W_ r;
1345
1346   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1347
1348   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1349 retry_pop_stack:
1350   StgTSO_sp(CurrentTSO) = Sp;
1351   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1352   Sp = StgTSO_sp(CurrentTSO);
1353   frame = Sp;
1354   trec = StgTSO_trec(CurrentTSO);
1355   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1356
1357   if (frame_type == CATCH_RETRY_FRAME) {
1358     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1359     ASSERT(outer != NO_TREC);
1360     // Abort the transaction attempting the current branch
1361     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1362     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1363     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1364       // Retry in the first branch: try the alternative
1365       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1366       StgTSO_trec(CurrentTSO) = trec;
1367       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1368       R1 = StgCatchRetryFrame_alt_code(frame);
1369       jump stg_ap_v_fast;
1370     } else {
1371       // Retry in the alternative code: propagate the retry
1372       StgTSO_trec(CurrentTSO) = outer;
1373       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1374       goto retry_pop_stack;
1375     }
1376   }
1377
1378   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1379   ASSERT(frame_type == ATOMICALLY_FRAME);
1380   if (outer != NO_TREC) {
1381     // We called retry while checking invariants, so abort the current
1382     // invariant check (merging its TVar accesses into the parents read
1383     // set so we'll wait on them)
1384     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1385     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1386     trec = outer;
1387     StgTSO_trec(CurrentTSO) = trec;
1388     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1389   }
1390   ASSERT(outer == NO_TREC);
1391
1392   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1393   if (r != 0) {
1394     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1395     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1396     Sp = frame;
1397     // Fix up the stack in the unregisterised case: the return convention is different.
1398     R3 = trec; // passing to stmWaitUnblock()
1399     jump stg_block_stmwait;
1400   } else {
1401     // Transaction was not valid: retry immediately
1402     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1403     StgTSO_trec(CurrentTSO) = trec;
1404     R1 = StgAtomicallyFrame_code(frame);
1405     Sp = frame;
1406     jump stg_ap_v_fast;
1407   }
1408 }
1409
1410
1411 checkzh_fast
1412 {
1413   W_ trec, closure;
1414
1415   /* Args: R1 = invariant closure */
1416   MAYBE_GC (R1_PTR, checkzh_fast); 
1417
1418   trec = StgTSO_trec(CurrentTSO);
1419   closure = R1;
1420   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1421                                      trec "ptr",
1422                                      closure "ptr") [];
1423
1424   jump %ENTRY_CODE(Sp(0));
1425 }
1426
1427
1428 newTVarzh_fast
1429 {
1430   W_ tv;
1431   W_ new_value;
1432
1433   /* Args: R1 = initialisation value */
1434
1435   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1436   new_value = R1;
1437   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1438   RET_P(tv);
1439 }
1440
1441
1442 readTVarzh_fast
1443 {
1444   W_ trec;
1445   W_ tvar;
1446   W_ result;
1447
1448   /* Args: R1 = TVar closure */
1449
1450   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1451   trec = StgTSO_trec(CurrentTSO);
1452   tvar = R1;
1453   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1454
1455   RET_P(result);
1456 }
1457
1458
1459 writeTVarzh_fast
1460 {
1461   W_ trec;
1462   W_ tvar;
1463   W_ new_value;
1464   
1465   /* Args: R1 = TVar closure */
1466   /*       R2 = New value    */
1467
1468   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1469   trec = StgTSO_trec(CurrentTSO);
1470   tvar = R1;
1471   new_value = R2;
1472   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1473
1474   jump %ENTRY_CODE(Sp(0));
1475 }
1476
1477
1478 /* -----------------------------------------------------------------------------
1479  * MVar primitives
1480  *
1481  * take & putMVar work as follows.  Firstly, an important invariant:
1482  *
1483  *    If the MVar is full, then the blocking queue contains only
1484  *    threads blocked on putMVar, and if the MVar is empty then the
1485  *    blocking queue contains only threads blocked on takeMVar.
1486  *
1487  * takeMvar:
1488  *    MVar empty : then add ourselves to the blocking queue
1489  *    MVar full  : remove the value from the MVar, and
1490  *                 blocking queue empty     : return
1491  *                 blocking queue non-empty : perform the first blocked putMVar
1492  *                                            from the queue, and wake up the
1493  *                                            thread (MVar is now full again)
1494  *
1495  * putMVar is just the dual of the above algorithm.
1496  *
1497  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1498  * the stack of the thread waiting to do the putMVar.  See
1499  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1500  * the stack layout, and the PerformPut and PerformTake macros below.
1501  *
1502  * It is important that a blocked take or put is woken up with the
1503  * take/put already performed, because otherwise there would be a
1504  * small window of vulnerability where the thread could receive an
1505  * exception and never perform its take or put, and we'd end up with a
1506  * deadlock.
1507  *
1508  * -------------------------------------------------------------------------- */
1509
1510 isEmptyMVarzh_fast
1511 {
1512     /* args: R1 = MVar closure */
1513
1514     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1515         RET_N(1);
1516     } else {
1517         RET_N(0);
1518     }
1519 }
1520
1521 newMVarzh_fast
1522 {
1523     /* args: none */
1524     W_ mvar;
1525
1526     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1527   
1528     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1529     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1530         // MVARs start dirty: generation 0 has no mutable list
1531     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1532     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1533     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1534     RET_P(mvar);
1535 }
1536
1537
1538 #define PerformTake(tso, value)                         \
1539     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1540     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1541
1542 #define PerformPut(tso,lval)                    \
1543     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1544     lval = W_[StgTSO_sp(tso) - WDS(1)];
1545
1546 takeMVarzh_fast
1547 {
1548     W_ mvar, val, info, tso;
1549
1550     /* args: R1 = MVar closure */
1551     mvar = R1;
1552
1553 #if defined(THREADED_RTS)
1554     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1555 #else
1556     info = GET_INFO(mvar);
1557 #endif
1558         
1559     if (info == stg_MVAR_CLEAN_info) {
1560         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1561     }
1562
1563     /* If the MVar is empty, put ourselves on its blocking queue,
1564      * and wait until we're woken up.
1565      */
1566     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1567         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1568             StgMVar_head(mvar) = CurrentTSO;
1569         } else {
1570             foreign "C" setTSOLink(MyCapability() "ptr", 
1571                                    StgMVar_tail(mvar) "ptr",
1572                                    CurrentTSO) [];
1573         }
1574         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1575         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1576         StgTSO_block_info(CurrentTSO)  = mvar;
1577         StgMVar_tail(mvar) = CurrentTSO;
1578         
1579         R1 = mvar;
1580         jump stg_block_takemvar;
1581   }
1582
1583   /* we got the value... */
1584   val = StgMVar_value(mvar);
1585
1586   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1587   {
1588       /* There are putMVar(s) waiting... 
1589        * wake up the first thread on the queue
1590        */
1591       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1592
1593       /* actually perform the putMVar for the thread that we just woke up */
1594       tso = StgMVar_head(mvar);
1595       PerformPut(tso,StgMVar_value(mvar));
1596
1597       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1598           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1599       }
1600
1601       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1602                                             StgMVar_head(mvar) "ptr", 1) [];
1603       StgMVar_head(mvar) = tso;
1604
1605       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1606           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1607       }
1608
1609 #if defined(THREADED_RTS)
1610       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1611 #else
1612       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1613 #endif
1614       RET_P(val);
1615   } 
1616   else
1617   {
1618       /* No further putMVars, MVar is now empty */
1619       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1620  
1621 #if defined(THREADED_RTS)
1622       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1623 #else
1624       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1625 #endif
1626
1627       RET_P(val);
1628   }
1629 }
1630
1631
1632 tryTakeMVarzh_fast
1633 {
1634     W_ mvar, val, info, tso;
1635
1636     /* args: R1 = MVar closure */
1637
1638     mvar = R1;
1639
1640 #if defined(THREADED_RTS)
1641     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1642 #else
1643     info = GET_INFO(mvar);
1644 #endif
1645
1646     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1647 #if defined(THREADED_RTS)
1648         unlockClosure(mvar, info);
1649 #endif
1650         /* HACK: we need a pointer to pass back, 
1651          * so we abuse NO_FINALIZER_closure
1652          */
1653         RET_NP(0, stg_NO_FINALIZER_closure);
1654     }
1655
1656     if (info == stg_MVAR_CLEAN_info) {
1657         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1658     }
1659
1660     /* we got the value... */
1661     val = StgMVar_value(mvar);
1662
1663     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1664
1665         /* There are putMVar(s) waiting... 
1666          * wake up the first thread on the queue
1667          */
1668         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1669
1670         /* actually perform the putMVar for the thread that we just woke up */
1671         tso = StgMVar_head(mvar);
1672         PerformPut(tso,StgMVar_value(mvar));
1673         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1674             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1675         }
1676
1677         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1678                                               StgMVar_head(mvar) "ptr", 1) [];
1679         StgMVar_head(mvar) = tso;
1680
1681         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1682             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1683         }
1684 #if defined(THREADED_RTS)
1685         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1686 #else
1687         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1688 #endif
1689     }
1690     else 
1691     {
1692         /* No further putMVars, MVar is now empty */
1693         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1694 #if defined(THREADED_RTS)
1695         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1696 #else
1697         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1698 #endif
1699     }
1700     
1701     RET_NP(1, val);
1702 }
1703
1704
1705 putMVarzh_fast
1706 {
1707     W_ mvar, val, info, tso;
1708
1709     /* args: R1 = MVar, R2 = value */
1710     mvar = R1;
1711     val  = R2;
1712
1713 #if defined(THREADED_RTS)
1714     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1715 #else
1716     info = GET_INFO(mvar);
1717 #endif
1718
1719     if (info == stg_MVAR_CLEAN_info) {
1720         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1721     }
1722
1723     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1724         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1725             StgMVar_head(mvar) = CurrentTSO;
1726         } else {
1727             foreign "C" setTSOLink(MyCapability() "ptr", 
1728                                    StgMVar_tail(mvar) "ptr",
1729                                    CurrentTSO) [];
1730         }
1731         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1732         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1733         StgTSO_block_info(CurrentTSO)  = mvar;
1734         StgMVar_tail(mvar) = CurrentTSO;
1735         
1736         R1 = mvar;
1737         R2 = val;
1738         jump stg_block_putmvar;
1739     }
1740   
1741     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1742
1743         /* There are takeMVar(s) waiting: wake up the first one
1744          */
1745         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1746
1747         /* actually perform the takeMVar */
1748         tso = StgMVar_head(mvar);
1749         PerformTake(tso, val);
1750         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1751             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1752         }
1753       
1754         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1755                                               StgMVar_head(mvar) "ptr", 1) [];
1756         StgMVar_head(mvar) = tso;
1757
1758         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1759             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1760         }
1761
1762 #if defined(THREADED_RTS)
1763         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1764 #else
1765         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1766 #endif
1767         jump %ENTRY_CODE(Sp(0));
1768     }
1769     else
1770     {
1771         /* No further takes, the MVar is now full. */
1772         StgMVar_value(mvar) = val;
1773
1774 #if defined(THREADED_RTS)
1775         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1776 #else
1777         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1778 #endif
1779         jump %ENTRY_CODE(Sp(0));
1780     }
1781     
1782     /* ToDo: yield afterward for better communication performance? */
1783 }
1784
1785
1786 tryPutMVarzh_fast
1787 {
1788     W_ mvar, info, tso;
1789
1790     /* args: R1 = MVar, R2 = value */
1791     mvar = R1;
1792
1793 #if defined(THREADED_RTS)
1794     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1795 #else
1796     info = GET_INFO(mvar);
1797 #endif
1798
1799     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1800 #if defined(THREADED_RTS)
1801         unlockClosure(mvar, info);
1802 #endif
1803         RET_N(0);
1804     }
1805   
1806     if (info == stg_MVAR_CLEAN_info) {
1807         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1808     }
1809
1810     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1811
1812         /* There are takeMVar(s) waiting: wake up the first one
1813          */
1814         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1815         
1816         /* actually perform the takeMVar */
1817         tso = StgMVar_head(mvar);
1818         PerformTake(tso, R2);
1819         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1820             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1821         }
1822       
1823         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1824                                               StgMVar_head(mvar) "ptr", 1) [];
1825         StgMVar_head(mvar) = tso;
1826
1827         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1828             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1829         }
1830
1831 #if defined(THREADED_RTS)
1832         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1833 #else
1834         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1835 #endif
1836     }
1837     else
1838     {
1839         /* No further takes, the MVar is now full. */
1840         StgMVar_value(mvar) = R2;
1841
1842 #if defined(THREADED_RTS)
1843         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1844 #else
1845         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1846 #endif
1847     }
1848     
1849     RET_N(1);
1850     /* ToDo: yield afterward for better communication performance? */
1851 }
1852
1853
1854 /* -----------------------------------------------------------------------------
1855    Stable pointer primitives
1856    -------------------------------------------------------------------------  */
1857
1858 makeStableNamezh_fast
1859 {
1860     W_ index, sn_obj;
1861
1862     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1863   
1864     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1865
1866     /* Is there already a StableName for this heap object?
1867      *  stable_ptr_table is a pointer to an array of snEntry structs.
1868      */
1869     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1870         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1871         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1872         StgStableName_sn(sn_obj) = index;
1873         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1874     } else {
1875         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1876     }
1877     
1878     RET_P(sn_obj);
1879 }
1880
1881
1882 makeStablePtrzh_fast
1883 {
1884     /* Args: R1 = a */
1885     W_ sp;
1886     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1887     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1888     RET_N(sp);
1889 }
1890
1891 deRefStablePtrzh_fast
1892 {
1893     /* Args: R1 = the stable ptr */
1894     W_ r, sp;
1895     sp = R1;
1896     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1897     RET_P(r);
1898 }
1899
1900 /* -----------------------------------------------------------------------------
1901    Bytecode object primitives
1902    -------------------------------------------------------------------------  */
1903
1904 newBCOzh_fast
1905 {
1906     /* R1 = instrs
1907        R2 = literals
1908        R3 = ptrs
1909        R4 = arity
1910        R5 = bitmap array
1911     */
1912     W_ bco, bitmap_arr, bytes, words;
1913     
1914     bitmap_arr = R5;
1915
1916     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1917     bytes = WDS(words);
1918
1919     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1920
1921     bco = Hp - bytes + WDS(1);
1922     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1923     
1924     StgBCO_instrs(bco)     = R1;
1925     StgBCO_literals(bco)   = R2;
1926     StgBCO_ptrs(bco)       = R3;
1927     StgBCO_arity(bco)      = HALF_W_(R4);
1928     StgBCO_size(bco)       = HALF_W_(words);
1929     
1930     // Copy the arity/bitmap info into the BCO
1931     W_ i;
1932     i = 0;
1933 for:
1934     if (i < StgArrWords_words(bitmap_arr)) {
1935         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1936         i = i + 1;
1937         goto for;
1938     }
1939     
1940     RET_P(bco);
1941 }
1942
1943
1944 mkApUpd0zh_fast
1945 {
1946     // R1 = the BCO# for the AP
1947     //  
1948     W_ ap;
1949
1950     // This function is *only* used to wrap zero-arity BCOs in an
1951     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1952     // saturated and always points directly to a FUN or BCO.
1953     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1954            StgBCO_arity(R1) == HALF_W_(0));
1955
1956     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1957     TICK_ALLOC_UP_THK(0, 0);
1958     CCCS_ALLOC(SIZEOF_StgAP);
1959
1960     ap = Hp - SIZEOF_StgAP + WDS(1);
1961     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1962     
1963     StgAP_n_args(ap) = HALF_W_(0);
1964     StgAP_fun(ap) = R1;
1965     
1966     RET_P(ap);
1967 }
1968
1969 unpackClosurezh_fast
1970 {
1971 /* args: R1 = closure to analyze */
1972 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1973
1974     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1975     info  = %GET_STD_INFO(UNTAG(R1));
1976
1977     // Some closures have non-standard layout, so we omit those here.
1978     W_ type;
1979     type = TO_W_(%INFO_TYPE(info));
1980     switch [0 .. N_CLOSURE_TYPES] type {
1981     case THUNK_SELECTOR : {
1982         ptrs = 1;
1983         nptrs = 0;
1984         goto out;
1985     }
1986     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1987          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1988         ptrs = 0;
1989         nptrs = 0;
1990         goto out;
1991     }
1992     default: {
1993         ptrs  = TO_W_(%INFO_PTRS(info)); 
1994         nptrs = TO_W_(%INFO_NPTRS(info));
1995         goto out;
1996     }}
1997 out:
1998
1999     W_ ptrs_arr_sz, nptrs_arr_sz;
2000     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2001     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2002
2003     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2004
2005     W_ clos;
2006     clos = UNTAG(R1);
2007
2008     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2009     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2010
2011     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2012     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2013     p = 0;
2014 for:
2015     if(p < ptrs) {
2016          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2017          p = p + 1;
2018          goto for;
2019     }
2020     
2021     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2022     StgArrWords_words(nptrs_arr) = nptrs;
2023     p = 0;
2024 for2:
2025     if(p < nptrs) {
2026          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2027          p = p + 1;
2028          goto for2;
2029     }
2030     RET_NPP(info, ptrs_arr, nptrs_arr);
2031 }
2032
2033 /* -----------------------------------------------------------------------------
2034    Thread I/O blocking primitives
2035    -------------------------------------------------------------------------- */
2036
2037 /* Add a thread to the end of the blocked queue. (C-- version of the C
2038  * macro in Schedule.h).
2039  */
2040 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2041     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2042     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2043       W_[blocked_queue_hd] = tso;                       \
2044     } else {                                            \
2045       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2046     }                                                   \
2047     W_[blocked_queue_tl] = tso;
2048
2049 waitReadzh_fast
2050 {
2051     /* args: R1 */
2052 #ifdef THREADED_RTS
2053     foreign "C" barf("waitRead# on threaded RTS") never returns;
2054 #else
2055
2056     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2057     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2058     StgTSO_block_info(CurrentTSO) = R1;
2059     // No locking - we're not going to use this interface in the
2060     // threaded RTS anyway.
2061     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2062     jump stg_block_noregs;
2063 #endif
2064 }
2065
2066 waitWritezh_fast
2067 {
2068     /* args: R1 */
2069 #ifdef THREADED_RTS
2070     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2071 #else
2072
2073     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2074     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2075     StgTSO_block_info(CurrentTSO) = R1;
2076     // No locking - we're not going to use this interface in the
2077     // threaded RTS anyway.
2078     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2079     jump stg_block_noregs;
2080 #endif
2081 }
2082
2083
2084 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2085 delayzh_fast
2086 {
2087 #ifdef mingw32_HOST_OS
2088     W_ ares;
2089     CInt reqID;
2090 #else
2091     W_ t, prev, target;
2092 #endif
2093
2094 #ifdef THREADED_RTS
2095     foreign "C" barf("delay# on threaded RTS") never returns;
2096 #else
2097
2098     /* args: R1 (microsecond delay amount) */
2099     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2100     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2101
2102 #ifdef mingw32_HOST_OS
2103
2104     /* could probably allocate this on the heap instead */
2105     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2106                                             stg_delayzh_malloc_str);
2107     (reqID) = foreign "C" addDelayRequest(R1);
2108     StgAsyncIOResult_reqID(ares)   = reqID;
2109     StgAsyncIOResult_len(ares)     = 0;
2110     StgAsyncIOResult_errCode(ares) = 0;
2111     StgTSO_block_info(CurrentTSO)  = ares;
2112
2113     /* Having all async-blocked threads reside on the blocked_queue
2114      * simplifies matters, so change the status to OnDoProc put the
2115      * delayed thread on the blocked_queue.
2116      */
2117     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2118     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2119     jump stg_block_async_void;
2120
2121 #else
2122
2123     W_ time;
2124     W_ divisor;
2125     (time) = foreign "C" getourtimeofday() [R1];
2126     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2127     if (divisor == 0) {
2128         divisor = 50;
2129     }
2130     divisor = divisor * 1000;
2131     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2132            + time + 1; /* Add 1 as getourtimeofday rounds down */
2133     StgTSO_block_info(CurrentTSO) = target;
2134
2135     /* Insert the new thread in the sleeping queue. */
2136     prev = NULL;
2137     t = W_[sleeping_queue];
2138 while:
2139     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2140         prev = t;
2141         t = StgTSO__link(t);
2142         goto while;
2143     }
2144
2145     StgTSO__link(CurrentTSO) = t;
2146     if (prev == NULL) {
2147         W_[sleeping_queue] = CurrentTSO;
2148     } else {
2149         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2150     }
2151     jump stg_block_noregs;
2152 #endif
2153 #endif /* !THREADED_RTS */
2154 }
2155
2156
2157 #ifdef mingw32_HOST_OS
2158 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2159 asyncReadzh_fast
2160 {
2161     W_ ares;
2162     CInt reqID;
2163
2164 #ifdef THREADED_RTS
2165     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2166 #else
2167
2168     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2169     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2170     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2171
2172     /* could probably allocate this on the heap instead */
2173     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2174                                             stg_asyncReadzh_malloc_str)
2175                         [R1,R2,R3,R4];
2176     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2177     StgAsyncIOResult_reqID(ares)   = reqID;
2178     StgAsyncIOResult_len(ares)     = 0;
2179     StgAsyncIOResult_errCode(ares) = 0;
2180     StgTSO_block_info(CurrentTSO)  = ares;
2181     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2182     jump stg_block_async;
2183 #endif
2184 }
2185
2186 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2187 asyncWritezh_fast
2188 {
2189     W_ ares;
2190     CInt reqID;
2191
2192 #ifdef THREADED_RTS
2193     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2194 #else
2195
2196     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2197     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2198     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2199
2200     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2201                                             stg_asyncWritezh_malloc_str)
2202                         [R1,R2,R3,R4];
2203     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2204
2205     StgAsyncIOResult_reqID(ares)   = reqID;
2206     StgAsyncIOResult_len(ares)     = 0;
2207     StgAsyncIOResult_errCode(ares) = 0;
2208     StgTSO_block_info(CurrentTSO)  = ares;
2209     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2210     jump stg_block_async;
2211 #endif
2212 }
2213
2214 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2215 asyncDoProczh_fast
2216 {
2217     W_ ares;
2218     CInt reqID;
2219
2220 #ifdef THREADED_RTS
2221     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2222 #else
2223
2224     /* args: R1 = proc, R2 = param */
2225     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2226     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2227
2228     /* could probably allocate this on the heap instead */
2229     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2230                                             stg_asyncDoProczh_malloc_str) 
2231                                 [R1,R2];
2232     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2233     StgAsyncIOResult_reqID(ares)   = reqID;
2234     StgAsyncIOResult_len(ares)     = 0;
2235     StgAsyncIOResult_errCode(ares) = 0;
2236     StgTSO_block_info(CurrentTSO) = ares;
2237     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2238     jump stg_block_async;
2239 #endif
2240 }
2241 #endif
2242
2243 // noDuplicate# tries to ensure that none of the thunks under
2244 // evaluation by the current thread are also under evaluation by
2245 // another thread.  It relies on *both* threads doing noDuplicate#;
2246 // the second one will get blocked if they are duplicating some work.
2247 noDuplicatezh_fast
2248 {
2249     SAVE_THREAD_STATE();
2250     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2251     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2252     
2253     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2254         jump stg_threadFinished;
2255     } else {
2256         LOAD_THREAD_STATE();
2257         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2258         jump %ENTRY_CODE(Sp(0));
2259     }
2260 }
2261
2262 getApStackValzh_fast
2263 {
2264    W_ ap_stack, offset, val, ok;
2265
2266    /* args: R1 = AP_STACK, R2 = offset */
2267    ap_stack = R1;
2268    offset   = R2;
2269
2270    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2271         ok = 1;
2272         val = StgAP_STACK_payload(ap_stack,offset); 
2273    } else {
2274         ok = 0;
2275         val = R1;
2276    }
2277    RET_NP(ok,val);
2278 }