get exception names from Control.Exception.Base instead of Control.Exception
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, z, x, y, r;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235 #if defined(THREADED_RTS)
236     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
237 #endif
238
239    x = StgMutVar_var(R1);
240
241    TICK_ALLOC_THUNK_2();
242    CCCS_ALLOC(THUNK_2_SIZE);
243    z = Hp - THUNK_2_SIZE + WDS(1);
244    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
245    LDV_RECORD_CREATE(z);
246    StgThunk_payload(z,0) = R2;
247    StgThunk_payload(z,1) = x;
248
249    TICK_ALLOC_THUNK_1();
250    CCCS_ALLOC(THUNK_1_SIZE);
251    y = z - THUNK_1_SIZE;
252    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
253    LDV_RECORD_CREATE(y);
254    StgThunk_payload(y,0) = z;
255
256    StgMutVar_var(R1) = y;
257    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
258
259    TICK_ALLOC_THUNK_1();
260    CCCS_ALLOC(THUNK_1_SIZE);
261    r = y - THUNK_1_SIZE;
262    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
263    LDV_RECORD_CREATE(r);
264    StgThunk_payload(r,0) = z;
265
266 #if defined(THREADED_RTS)
267     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
268 #endif
269
270    RET_P(r);
271 }
272
273 /* -----------------------------------------------------------------------------
274    Weak Pointer Primitives
275    -------------------------------------------------------------------------- */
276
277 STRING(stg_weak_msg,"New weak pointer at %p\n")
278
279 mkWeakzh_fast
280 {
281   /* R1 = key
282      R2 = value
283      R3 = finalizer (or NULL)
284   */
285   W_ w;
286
287   if (R3 == NULL) {
288     R3 = stg_NO_FINALIZER_closure;
289   }
290
291   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
292
293   w = Hp - SIZEOF_StgWeak + WDS(1);
294   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
295
296   StgWeak_key(w)       = R1;
297   StgWeak_value(w)     = R2;
298   StgWeak_finalizer(w) = R3;
299
300   StgWeak_link(w)       = W_[weak_ptr_list];
301   W_[weak_ptr_list]     = w;
302
303   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
304
305   RET_P(w);
306 }
307
308
309 finalizzeWeakzh_fast
310 {
311   /* R1 = weak ptr
312    */
313   W_ w, f;
314
315   w = R1;
316
317   // already dead?
318   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   }
321
322   // kill it
323 #ifdef PROFILING
324   // @LDV profiling
325   // A weak pointer is inherently used, so we do not need to call
326   // LDV_recordDead_FILL_SLOP_DYNAMIC():
327   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
328   // or, LDV_recordDead():
329   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
330   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
331   // large as weak pointers, so there is no need to fill the slop, either.
332   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
333 #endif
334
335   //
336   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
337   //
338   SET_INFO(w,stg_DEAD_WEAK_info);
339   LDV_RECORD_CREATE(w);
340
341   f = StgWeak_finalizer(w);
342   StgDeadWeak_link(w) = StgWeak_link(w);
343
344   /* return the finalizer */
345   if (f == stg_NO_FINALIZER_closure) {
346       RET_NP(0,stg_NO_FINALIZER_closure);
347   } else {
348       RET_NP(1,f);
349   }
350 }
351
352 deRefWeakzh_fast
353 {
354   /* R1 = weak ptr */
355   W_ w, code, val;
356
357   w = R1;
358   if (GET_INFO(w) == stg_WEAK_info) {
359     code = 1;
360     val = StgWeak_value(w);
361   } else {
362     code = 0;
363     val = w;
364   }
365   RET_NP(code,val);
366 }
367
368 /* -----------------------------------------------------------------------------
369    Arbitrary-precision Integer operations.
370
371    There are some assumptions in this code that mp_limb_t == W_.  This is
372    the case for all the platforms that GHC supports, currently.
373    -------------------------------------------------------------------------- */
374
375 int2Integerzh_fast
376 {
377    /* arguments: R1 = Int# */
378
379    W_ val, s, p;        /* to avoid aliasing */
380
381    val = R1;
382    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
383
384    p = Hp - SIZEOF_StgArrWords;
385    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
386    StgArrWords_words(p) = 1;
387
388    /* mpz_set_si is inlined here, makes things simpler */
389    if (%lt(val,0)) { 
390         s  = -1;
391         Hp(0) = -val;
392    } else { 
393      if (%gt(val,0)) {
394         s = 1;
395         Hp(0) = val;
396      } else {
397         s = 0;
398      }
399   }
400
401    /* returns (# size  :: Int#, 
402                  data  :: ByteArray# 
403                #)
404    */
405    RET_NP(s,p);
406 }
407
408 word2Integerzh_fast
409 {
410    /* arguments: R1 = Word# */
411
412    W_ val, s, p;        /* to avoid aliasing */
413
414    val = R1;
415
416    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
417
418    p = Hp - SIZEOF_StgArrWords;
419    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
420    StgArrWords_words(p) = 1;
421
422    if (val != 0) {
423         s = 1;
424         W_[Hp] = val;
425    } else {
426         s = 0;
427    }
428
429    /* returns (# size  :: Int#, 
430                  data  :: ByteArray# #)
431    */
432    RET_NP(s,p);
433 }
434
435
436 /*
437  * 'long long' primops for converting to/from Integers.
438  */
439
440 #ifdef SUPPORT_LONG_LONGS
441
442 int64ToIntegerzh_fast
443 {
444    /* arguments: L1 = Int64# */
445
446    L_ val;
447    W_ hi, lo, s, neg, words_needed, p;
448
449    val = L1;
450    neg = 0;
451
452    hi = TO_W_(val >> 32);
453    lo = TO_W_(val);
454
455    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
456        // minimum is one word
457        words_needed = 1;
458    } else { 
459        words_needed = 2;
460    }
461
462    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
463                NO_PTRS, int64ToIntegerzh_fast );
464
465    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
466    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
467    StgArrWords_words(p) = words_needed;
468
469    if ( %lt(hi,0) ) {
470      neg = 1;
471      lo = -lo;
472      if(lo == 0) {
473        hi = -hi;
474      } else {
475        hi = -hi - 1;
476      }
477    }
478
479    if ( words_needed == 2 )  { 
480       s = 2;
481       Hp(-1) = lo;
482       Hp(0) = hi;
483    } else { 
484        if ( lo != 0 ) {
485            s = 1;
486            Hp(0) = lo;
487        } else /* val==0 */  {
488            s = 0;
489        }
490    }
491    if ( neg != 0 ) {
492         s = -s;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# #)
497    */
498    RET_NP(s,p);
499 }
500 word64ToIntegerzh_fast
501 {
502    /* arguments: L1 = Word64# */
503
504    L_ val;
505    W_ hi, lo, s, words_needed, p;
506
507    val = L1;
508    hi = TO_W_(val >> 32);
509    lo = TO_W_(val);
510
511    if ( hi != 0 ) {
512       words_needed = 2;
513    } else {
514       words_needed = 1;
515    }
516
517    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
518                NO_PTRS, word64ToIntegerzh_fast );
519
520    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
521    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
522    StgArrWords_words(p) = words_needed;
523
524    if ( hi != 0 ) { 
525      s = 2;
526      Hp(-1) = lo;
527      Hp(0)  = hi;
528    } else {
529       if ( lo != 0 ) {
530         s = 1;
531         Hp(0) = lo;
532      } else /* val==0 */  {
533       s = 0;
534      }
535   }
536
537    /* returns (# size  :: Int#, 
538                  data  :: ByteArray# #)
539    */
540    RET_NP(s,p);
541 }
542
543
544
545 #endif /* SUPPORT_LONG_LONGS */
546
547 /* ToDo: this is shockingly inefficient */
548
549 #ifndef THREADED_RTS
550 section "bss" {
551   mp_tmp1:
552     bits8 [SIZEOF_MP_INT];
553 }
554
555 section "bss" {
556   mp_tmp2:
557     bits8 [SIZEOF_MP_INT];
558 }
559
560 section "bss" {
561   mp_result1:
562     bits8 [SIZEOF_MP_INT];
563 }
564
565 section "bss" {
566   mp_result2:
567     bits8 [SIZEOF_MP_INT];
568 }
569 #endif
570
571 #ifdef THREADED_RTS
572 #define FETCH_MP_TEMP(X) \
573 W_ X; \
574 X = BaseReg + (OFFSET_StgRegTable_r ## X);
575 #else
576 #define FETCH_MP_TEMP(X) /* Nothing */
577 #endif
578
579 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1, s2;                                                          \
583   W_ d1, d2;                                                            \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_tmp2);                                               \
586   FETCH_MP_TEMP(mp_result1)                                             \
587   FETCH_MP_TEMP(mp_result2);                                            \
588                                                                         \
589   /* call doYouWantToGC() */                                            \
590   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
591                                                                         \
592   s1 = W_TO_INT(R1);                                                    \
593   d1 = R2;                                                              \
594   s2 = W_TO_INT(R3);                                                    \
595   d2 = R4;                                                              \
596                                                                         \
597   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
598   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
599   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
600   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
601   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
602   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
603                                                                         \
604   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
605                                                                         \
606   /* Perform the operation */                                           \
607   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
608                                                                         \
609   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
610          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
611 }
612
613 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
614 name                                                                    \
615 {                                                                       \
616   CInt s1;                                                              \
617   W_ d1;                                                                \
618   FETCH_MP_TEMP(mp_tmp1);                                               \
619   FETCH_MP_TEMP(mp_result1)                                             \
620                                                                         \
621   /* call doYouWantToGC() */                                            \
622   MAYBE_GC(R2_PTR, name);                                               \
623                                                                         \
624   d1 = R2;                                                              \
625   s1 = W_TO_INT(R1);                                                    \
626                                                                         \
627   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
628   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
629   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
630                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
632                                                                         \
633   /* Perform the operation */                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
635                                                                         \
636   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
637          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
638 }
639
640 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
641 name                                                                                    \
642 {                                                                                       \
643   CInt s1, s2;                                                                          \
644   W_ d1, d2;                                                                            \
645   FETCH_MP_TEMP(mp_tmp1);                                                               \
646   FETCH_MP_TEMP(mp_tmp2);                                                               \
647   FETCH_MP_TEMP(mp_result1)                                                             \
648   FETCH_MP_TEMP(mp_result2)                                                             \
649                                                                                         \
650   /* call doYouWantToGC() */                                                            \
651   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
652                                                                                         \
653   s1 = W_TO_INT(R1);                                                                    \
654   d1 = R2;                                                                              \
655   s2 = W_TO_INT(R3);                                                                    \
656   d2 = R4;                                                                              \
657                                                                                         \
658   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
659   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
660   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
661   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
662   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
663   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
664                                                                                         \
665   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
666   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
667                                                                                         \
668   /* Perform the operation */                                                           \
669   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
670                                                                                         \
671   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
672            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
673            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
674            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
675 }
676
677 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
678 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
679 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
680 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
681 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
682 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
683 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
684 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
685 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
686 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
687 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
688
689 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
690 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
691
692 #ifndef THREADED_RTS
693 section "bss" {
694   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
695 }
696 #endif
697
698 gcdIntzh_fast
699 {
700     /* R1 = the first Int#; R2 = the second Int# */
701     W_ r; 
702     FETCH_MP_TEMP(mp_tmp_w);
703
704     W_[mp_tmp_w] = R1;
705     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
706
707     R1 = r;
708     /* Result parked in R1, return via info-pointer at TOS */
709     jump %ENTRY_CODE(Sp(0));
710 }
711
712
713 gcdIntegerIntzh_fast
714 {
715     /* R1 = s1; R2 = d1; R3 = the int */
716     W_ s1;
717     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
718     R1 = s1;
719     
720     /* Result parked in R1, return via info-pointer at TOS */
721     jump %ENTRY_CODE(Sp(0));
722 }
723
724
725 cmpIntegerIntzh_fast
726 {
727     /* R1 = s1; R2 = d1; R3 = the int */
728     W_ usize, vsize, v_digit, u_digit;
729
730     usize = R1;
731     vsize = 0;
732     v_digit = R3;
733
734     // paraphrased from __gmpz_cmp_si() in the GMP sources
735     if (%gt(v_digit,0)) {
736         vsize = 1;
737     } else { 
738         if (%lt(v_digit,0)) {
739             vsize = -1;
740             v_digit = -v_digit;
741         }
742     }
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     u_digit = W_[BYTE_ARR_CTS(R2)];
755
756     if (u_digit == v_digit) {
757         R1 = 0; 
758         jump %ENTRY_CODE(Sp(0));
759     }
760
761     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
762         R1 = usize; 
763     } else {
764         R1 = -usize; 
765     }
766
767     jump %ENTRY_CODE(Sp(0));
768 }
769
770 cmpIntegerzh_fast
771 {
772     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
773     W_ usize, vsize, size, up, vp;
774     CInt cmp;
775
776     // paraphrased from __gmpz_cmp() in the GMP sources
777     usize = R1;
778     vsize = R3;
779
780     if (usize != vsize) {
781         R1 = usize - vsize; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (usize == 0) {
786         R1 = 0; 
787         jump %ENTRY_CODE(Sp(0));
788     }
789
790     if (%lt(usize,0)) { // NB. not <, which is unsigned
791         size = -usize;
792     } else {
793         size = usize;
794     }
795
796     up = BYTE_ARR_CTS(R2);
797     vp = BYTE_ARR_CTS(R4);
798
799     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
800
801     if (cmp == 0 :: CInt) {
802         R1 = 0; 
803         jump %ENTRY_CODE(Sp(0));
804     }
805
806     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
807         R1 = 1;
808     } else {
809         R1 = (-1); 
810     }
811     /* Result parked in R1, return via info-pointer at TOS */
812     jump %ENTRY_CODE(Sp(0));
813 }
814
815 integer2Intzh_fast
816 {
817     /* R1 = s; R2 = d */
818     W_ r, s;
819
820     s = R1;
821     if (s == 0) {
822         r = 0;
823     } else {
824         r = W_[R2 + SIZEOF_StgArrWords];
825         if (%lt(s,0)) {
826             r = -r;
827         }
828     }
829     /* Result parked in R1, return via info-pointer at TOS */
830     R1 = r;
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 integer2Wordzh_fast
835 {
836   /* R1 = s; R2 = d */
837   W_ r, s;
838
839   s = R1;
840   if (s == 0) {
841     r = 0;
842   } else {
843     r = W_[R2 + SIZEOF_StgArrWords];
844     if (%lt(s,0)) {
845         r = -r;
846     }
847   }
848   /* Result parked in R1, return via info-pointer at TOS */
849   R1 = r;
850   jump %ENTRY_CODE(Sp(0));
851 }
852
853 decodeFloatzh_fast
854
855     W_ p;
856     F_ arg;
857     FETCH_MP_TEMP(mp_tmp1);
858     FETCH_MP_TEMP(mp_tmp_w);
859     
860     /* arguments: F1 = Float# */
861     arg = F1;
862     
863     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
864     
865     /* Be prepared to tell Lennart-coded __decodeFloat
866        where mantissa._mp_d can be put (it does not care about the rest) */
867     p = Hp - SIZEOF_StgArrWords;
868     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
869     StgArrWords_words(p) = 1;
870     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
871     
872     /* Perform the operation */
873     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
874     
875     /* returns: (Int# (expn), Int#, ByteArray#) */
876     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
877 }
878
879 decodeFloatzuIntzh_fast
880
881     W_ p;
882     F_ arg;
883     FETCH_MP_TEMP(mp_tmp1);
884     FETCH_MP_TEMP(mp_tmp_w);
885     
886     /* arguments: F1 = Float# */
887     arg = F1;
888     
889     /* Perform the operation */
890     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
891     
892     /* returns: (Int# (mantissa), Int# (exponent)) */
893     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
894 }
895
896 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
897 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
898
899 decodeDoublezh_fast
900
901     D_ arg;
902     W_ p;
903     FETCH_MP_TEMP(mp_tmp1);
904     FETCH_MP_TEMP(mp_tmp_w);
905
906     /* arguments: D1 = Double# */
907     arg = D1;
908
909     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
910     
911     /* Be prepared to tell Lennart-coded __decodeDouble
912        where mantissa.d can be put (it does not care about the rest) */
913     p = Hp - ARR_SIZE + WDS(1);
914     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
915     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
916     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
917
918     /* Perform the operation */
919     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
920     
921     /* returns: (Int# (expn), Int#, ByteArray#) */
922     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
923 }
924
925 decodeDoublezu2Intzh_fast
926
927     D_ arg;
928     W_ p;
929     FETCH_MP_TEMP(mp_tmp1);
930     FETCH_MP_TEMP(mp_tmp2);
931     FETCH_MP_TEMP(mp_result1);
932     FETCH_MP_TEMP(mp_result2);
933
934     /* arguments: D1 = Double# */
935     arg = D1;
936
937     /* Perform the operation */
938     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
939                                     mp_result1 "ptr", mp_result2 "ptr",
940                                     arg) [];
941
942     /* returns:
943        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
944     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
945 }
946
947 /* -----------------------------------------------------------------------------
948  * Concurrency primitives
949  * -------------------------------------------------------------------------- */
950
951 forkzh_fast
952 {
953   /* args: R1 = closure to spark */
954
955   MAYBE_GC(R1_PTR, forkzh_fast);
956
957   W_ closure;
958   W_ threadid;
959   closure = R1;
960
961   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
962                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
963                                 closure "ptr") [];
964
965   /* start blocked if the current thread is blocked */
966   StgTSO_flags(threadid) = 
967      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
968                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
969
970   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
971
972   // switch at the earliest opportunity
973   CInt[context_switch] = 1 :: CInt;
974   
975   RET_P(threadid);
976 }
977
978 forkOnzh_fast
979 {
980   /* args: R1 = cpu, R2 = closure to spark */
981
982   MAYBE_GC(R2_PTR, forkOnzh_fast);
983
984   W_ cpu;
985   W_ closure;
986   W_ threadid;
987   cpu = R1;
988   closure = R2;
989
990   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
991                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
992                                 closure "ptr") [];
993
994   /* start blocked if the current thread is blocked */
995   StgTSO_flags(threadid) = 
996      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
997                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
998
999   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1000
1001   // switch at the earliest opportunity
1002   CInt[context_switch] = 1 :: CInt;
1003   
1004   RET_P(threadid);
1005 }
1006
1007 yieldzh_fast
1008 {
1009   jump stg_yield_noregs;
1010 }
1011
1012 myThreadIdzh_fast
1013 {
1014   /* no args. */
1015   RET_P(CurrentTSO);
1016 }
1017
1018 labelThreadzh_fast
1019 {
1020   /* args: 
1021         R1 = ThreadId#
1022         R2 = Addr# */
1023 #ifdef DEBUG
1024   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1025 #endif
1026   jump %ENTRY_CODE(Sp(0));
1027 }
1028
1029 isCurrentThreadBoundzh_fast
1030 {
1031   /* no args */
1032   W_ r;
1033   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1034   RET_N(r);
1035 }
1036
1037 threadStatuszh_fast
1038 {
1039     /* args: R1 :: ThreadId# */
1040     W_ tso;
1041     W_ why_blocked;
1042     W_ what_next;
1043     W_ ret;
1044
1045     tso = R1;
1046     loop:
1047       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1048           tso = StgTSO__link(tso);
1049           goto loop;
1050       }
1051
1052     what_next   = TO_W_(StgTSO_what_next(tso));
1053     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1054     // Note: these two reads are not atomic, so they might end up
1055     // being inconsistent.  It doesn't matter, since we
1056     // only return one or the other.  If we wanted to return the
1057     // contents of block_info too, then we'd have to do some synchronisation.
1058
1059     if (what_next == ThreadComplete) {
1060         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1061     } else {
1062         if (what_next == ThreadKilled) {
1063             ret = 17;
1064         } else {
1065             ret = why_blocked;
1066         }
1067     }
1068     RET_N(ret);
1069 }
1070
1071 /* -----------------------------------------------------------------------------
1072  * TVar primitives
1073  * -------------------------------------------------------------------------- */
1074
1075 #define SP_OFF 0
1076
1077 // Catch retry frame ------------------------------------------------------------
1078
1079 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1080 #if defined(PROFILING)
1081   W_ unused1, W_ unused2,
1082 #endif
1083   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1084 {
1085    W_ r, frame, trec, outer;
1086
1087    frame = Sp;
1088    trec = StgTSO_trec(CurrentTSO);
1089    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1090    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1091    if (r != 0) {
1092      /* Succeeded (either first branch or second branch) */
1093      StgTSO_trec(CurrentTSO) = outer;
1094      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1095      jump %ENTRY_CODE(Sp(SP_OFF));
1096    } else {
1097      /* Did not commit: re-execute */
1098      W_ new_trec;
1099      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1100      StgTSO_trec(CurrentTSO) = new_trec;
1101      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1102        R1 = StgCatchRetryFrame_alt_code(frame);
1103      } else {
1104        R1 = StgCatchRetryFrame_first_code(frame);
1105      }
1106      jump stg_ap_v_fast;
1107    }
1108 }
1109
1110
1111 // Atomically frame ------------------------------------------------------------
1112
1113 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1114 #if defined(PROFILING)
1115   W_ unused1, W_ unused2,
1116 #endif
1117   "ptr" W_ unused3, "ptr" W_ unused4)
1118 {
1119   W_ frame, trec, valid, next_invariant, q, outer;
1120
1121   frame = Sp;
1122   trec = StgTSO_trec(CurrentTSO);
1123   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1124
1125   if (outer == NO_TREC) {
1126     /* First time back at the atomically frame -- pick up invariants */
1127     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1128     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1129
1130   } else {
1131     /* Second/subsequent time back at the atomically frame -- abort the
1132      * tx that's checking the invariant and move on to the next one */
1133     StgTSO_trec(CurrentTSO) = outer;
1134     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1135     StgInvariantCheckQueue_my_execution(q) = trec;
1136     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1137     /* Don't free trec -- it's linked from q and will be stashed in the
1138      * invariant if we eventually commit. */
1139     q = StgInvariantCheckQueue_next_queue_entry(q);
1140     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1141     trec = outer;
1142   }
1143
1144   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1145
1146   if (q != END_INVARIANT_CHECK_QUEUE) {
1147     /* We can't commit yet: another invariant to check */
1148     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1149     StgTSO_trec(CurrentTSO) = trec;
1150
1151     next_invariant = StgInvariantCheckQueue_invariant(q);
1152     R1 = StgAtomicInvariant_code(next_invariant);
1153     jump stg_ap_v_fast;
1154
1155   } else {
1156
1157     /* We've got no more invariants to check, try to commit */
1158     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1159     if (valid != 0) {
1160       /* Transaction was valid: commit succeeded */
1161       StgTSO_trec(CurrentTSO) = NO_TREC;
1162       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1163       jump %ENTRY_CODE(Sp(SP_OFF));
1164     } else {
1165       /* Transaction was not valid: try again */
1166       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1167       StgTSO_trec(CurrentTSO) = trec;
1168       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1169       R1 = StgAtomicallyFrame_code(frame);
1170       jump stg_ap_v_fast;
1171     }
1172   }
1173 }
1174
1175 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1176 #if defined(PROFILING)
1177   W_ unused1, W_ unused2,
1178 #endif
1179   "ptr" W_ unused3, "ptr" W_ unused4)
1180 {
1181   W_ frame, trec, valid;
1182
1183   frame = Sp;
1184
1185   /* The TSO is currently waiting: should we stop waiting? */
1186   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1187   if (valid != 0) {
1188     /* Previous attempt is still valid: no point trying again yet */
1189     jump stg_block_noregs;
1190   } else {
1191     /* Previous attempt is no longer valid: try again */
1192     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1193     StgTSO_trec(CurrentTSO) = trec;
1194     StgHeader_info(frame) = stg_atomically_frame_info;
1195     R1 = StgAtomicallyFrame_code(frame);
1196     jump stg_ap_v_fast;
1197   }
1198 }
1199
1200 // STM catch frame --------------------------------------------------------------
1201
1202 #define SP_OFF 0
1203
1204 /* Catch frames are very similar to update frames, but when entering
1205  * one we just pop the frame off the stack and perform the correct
1206  * kind of return to the activation record underneath us on the stack.
1207  */
1208
1209 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1210 #if defined(PROFILING)
1211   W_ unused1, W_ unused2,
1212 #endif
1213   "ptr" W_ unused3, "ptr" W_ unused4)
1214    {
1215       W_ r, frame, trec, outer;
1216       frame = Sp;
1217       trec = StgTSO_trec(CurrentTSO);
1218       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1219       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1220       if (r != 0) {
1221         /* Commit succeeded */
1222         StgTSO_trec(CurrentTSO) = outer;
1223         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1224         jump Sp(SP_OFF);
1225       } else {
1226         /* Commit failed */
1227         W_ new_trec;
1228         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1229         StgTSO_trec(CurrentTSO) = new_trec;
1230         R1 = StgCatchSTMFrame_code(frame);
1231         jump stg_ap_v_fast;
1232       }
1233    }
1234
1235
1236 // Primop definition ------------------------------------------------------------
1237
1238 atomicallyzh_fast
1239 {
1240   W_ frame;
1241   W_ old_trec;
1242   W_ new_trec;
1243   
1244   // stmStartTransaction may allocate
1245   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1246
1247   /* Args: R1 = m :: STM a */
1248   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1249
1250   old_trec = StgTSO_trec(CurrentTSO);
1251
1252   /* Nested transactions are not allowed; raise an exception */
1253   if (old_trec != NO_TREC) {
1254      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1255      jump raisezh_fast;
1256   }
1257
1258   /* Set up the atomically frame */
1259   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1260   frame = Sp;
1261
1262   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1263   StgAtomicallyFrame_code(frame) = R1;
1264   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1265
1266   /* Start the memory transcation */
1267   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1268   StgTSO_trec(CurrentTSO) = new_trec;
1269
1270   /* Apply R1 to the realworld token */
1271   jump stg_ap_v_fast;
1272 }
1273
1274
1275 catchSTMzh_fast
1276 {
1277   W_ frame;
1278   
1279   /* Args: R1 :: STM a */
1280   /* Args: R2 :: Exception -> STM a */
1281   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1282
1283   /* Set up the catch frame */
1284   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1285   frame = Sp;
1286
1287   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1288   StgCatchSTMFrame_handler(frame) = R2;
1289   StgCatchSTMFrame_code(frame) = R1;
1290
1291   /* Start a nested transaction to run the body of the try block in */
1292   W_ cur_trec;  
1293   W_ new_trec;
1294   cur_trec = StgTSO_trec(CurrentTSO);
1295   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1296   StgTSO_trec(CurrentTSO) = new_trec;
1297
1298   /* Apply R1 to the realworld token */
1299   jump stg_ap_v_fast;
1300 }
1301
1302
1303 catchRetryzh_fast
1304 {
1305   W_ frame;
1306   W_ new_trec;
1307   W_ trec;
1308
1309   // stmStartTransaction may allocate
1310   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1311
1312   /* Args: R1 :: STM a */
1313   /* Args: R2 :: STM a */
1314   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1315
1316   /* Start a nested transaction within which to run the first code */
1317   trec = StgTSO_trec(CurrentTSO);
1318   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1319   StgTSO_trec(CurrentTSO) = new_trec;
1320
1321   /* Set up the catch-retry frame */
1322   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1323   frame = Sp;
1324   
1325   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1326   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1327   StgCatchRetryFrame_first_code(frame) = R1;
1328   StgCatchRetryFrame_alt_code(frame) = R2;
1329
1330   /* Apply R1 to the realworld token */
1331   jump stg_ap_v_fast;
1332 }
1333
1334
1335 retryzh_fast
1336 {
1337   W_ frame_type;
1338   W_ frame;
1339   W_ trec;
1340   W_ outer;
1341   W_ r;
1342
1343   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1344
1345   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1346 retry_pop_stack:
1347   StgTSO_sp(CurrentTSO) = Sp;
1348   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1349   Sp = StgTSO_sp(CurrentTSO);
1350   frame = Sp;
1351   trec = StgTSO_trec(CurrentTSO);
1352   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1353
1354   if (frame_type == CATCH_RETRY_FRAME) {
1355     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1356     ASSERT(outer != NO_TREC);
1357     // Abort the transaction attempting the current branch
1358     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1359     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1360     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1361       // Retry in the first branch: try the alternative
1362       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1363       StgTSO_trec(CurrentTSO) = trec;
1364       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1365       R1 = StgCatchRetryFrame_alt_code(frame);
1366       jump stg_ap_v_fast;
1367     } else {
1368       // Retry in the alternative code: propagate the retry
1369       StgTSO_trec(CurrentTSO) = outer;
1370       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1371       goto retry_pop_stack;
1372     }
1373   }
1374
1375   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1376   ASSERT(frame_type == ATOMICALLY_FRAME);
1377   if (outer != NO_TREC) {
1378     // We called retry while checking invariants, so abort the current
1379     // invariant check (merging its TVar accesses into the parents read
1380     // set so we'll wait on them)
1381     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1382     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1383     trec = outer;
1384     StgTSO_trec(CurrentTSO) = trec;
1385     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1386   }
1387   ASSERT(outer == NO_TREC);
1388
1389   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1390   if (r != 0) {
1391     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1392     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1393     Sp = frame;
1394     // Fix up the stack in the unregisterised case: the return convention is different.
1395     R3 = trec; // passing to stmWaitUnblock()
1396     jump stg_block_stmwait;
1397   } else {
1398     // Transaction was not valid: retry immediately
1399     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1400     StgTSO_trec(CurrentTSO) = trec;
1401     R1 = StgAtomicallyFrame_code(frame);
1402     Sp = frame;
1403     jump stg_ap_v_fast;
1404   }
1405 }
1406
1407
1408 checkzh_fast
1409 {
1410   W_ trec, closure;
1411
1412   /* Args: R1 = invariant closure */
1413   MAYBE_GC (R1_PTR, checkzh_fast); 
1414
1415   trec = StgTSO_trec(CurrentTSO);
1416   closure = R1;
1417   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1418                                      trec "ptr",
1419                                      closure "ptr") [];
1420
1421   jump %ENTRY_CODE(Sp(0));
1422 }
1423
1424
1425 newTVarzh_fast
1426 {
1427   W_ tv;
1428   W_ new_value;
1429
1430   /* Args: R1 = initialisation value */
1431
1432   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1433   new_value = R1;
1434   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1435   RET_P(tv);
1436 }
1437
1438
1439 readTVarzh_fast
1440 {
1441   W_ trec;
1442   W_ tvar;
1443   W_ result;
1444
1445   /* Args: R1 = TVar closure */
1446
1447   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1448   trec = StgTSO_trec(CurrentTSO);
1449   tvar = R1;
1450   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1451
1452   RET_P(result);
1453 }
1454
1455
1456 writeTVarzh_fast
1457 {
1458   W_ trec;
1459   W_ tvar;
1460   W_ new_value;
1461   
1462   /* Args: R1 = TVar closure */
1463   /*       R2 = New value    */
1464
1465   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1466   trec = StgTSO_trec(CurrentTSO);
1467   tvar = R1;
1468   new_value = R2;
1469   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1470
1471   jump %ENTRY_CODE(Sp(0));
1472 }
1473
1474
1475 /* -----------------------------------------------------------------------------
1476  * MVar primitives
1477  *
1478  * take & putMVar work as follows.  Firstly, an important invariant:
1479  *
1480  *    If the MVar is full, then the blocking queue contains only
1481  *    threads blocked on putMVar, and if the MVar is empty then the
1482  *    blocking queue contains only threads blocked on takeMVar.
1483  *
1484  * takeMvar:
1485  *    MVar empty : then add ourselves to the blocking queue
1486  *    MVar full  : remove the value from the MVar, and
1487  *                 blocking queue empty     : return
1488  *                 blocking queue non-empty : perform the first blocked putMVar
1489  *                                            from the queue, and wake up the
1490  *                                            thread (MVar is now full again)
1491  *
1492  * putMVar is just the dual of the above algorithm.
1493  *
1494  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1495  * the stack of the thread waiting to do the putMVar.  See
1496  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1497  * the stack layout, and the PerformPut and PerformTake macros below.
1498  *
1499  * It is important that a blocked take or put is woken up with the
1500  * take/put already performed, because otherwise there would be a
1501  * small window of vulnerability where the thread could receive an
1502  * exception and never perform its take or put, and we'd end up with a
1503  * deadlock.
1504  *
1505  * -------------------------------------------------------------------------- */
1506
1507 isEmptyMVarzh_fast
1508 {
1509     /* args: R1 = MVar closure */
1510
1511     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1512         RET_N(1);
1513     } else {
1514         RET_N(0);
1515     }
1516 }
1517
1518 newMVarzh_fast
1519 {
1520     /* args: none */
1521     W_ mvar;
1522
1523     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1524   
1525     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1526     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1527         // MVARs start dirty: generation 0 has no mutable list
1528     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1529     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1530     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1531     RET_P(mvar);
1532 }
1533
1534
1535 #define PerformTake(tso, value)                         \
1536     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1537     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1538
1539 #define PerformPut(tso,lval)                    \
1540     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1541     lval = W_[StgTSO_sp(tso) - WDS(1)];
1542
1543 takeMVarzh_fast
1544 {
1545     W_ mvar, val, info, tso;
1546
1547     /* args: R1 = MVar closure */
1548     mvar = R1;
1549
1550 #if defined(THREADED_RTS)
1551     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1552 #else
1553     info = GET_INFO(mvar);
1554 #endif
1555         
1556     if (info == stg_MVAR_CLEAN_info) {
1557         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1558     }
1559
1560     /* If the MVar is empty, put ourselves on its blocking queue,
1561      * and wait until we're woken up.
1562      */
1563     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1564         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1565             StgMVar_head(mvar) = CurrentTSO;
1566         } else {
1567             foreign "C" setTSOLink(MyCapability() "ptr", 
1568                                    StgMVar_tail(mvar) "ptr",
1569                                    CurrentTSO) [];
1570         }
1571         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1572         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1573         StgTSO_block_info(CurrentTSO)  = mvar;
1574         StgMVar_tail(mvar) = CurrentTSO;
1575         
1576         R1 = mvar;
1577         jump stg_block_takemvar;
1578   }
1579
1580   /* we got the value... */
1581   val = StgMVar_value(mvar);
1582
1583   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1584   {
1585       /* There are putMVar(s) waiting... 
1586        * wake up the first thread on the queue
1587        */
1588       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1589
1590       /* actually perform the putMVar for the thread that we just woke up */
1591       tso = StgMVar_head(mvar);
1592       PerformPut(tso,StgMVar_value(mvar));
1593
1594       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1595           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1596       }
1597
1598       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1599                                             StgMVar_head(mvar) "ptr", 1) [];
1600       StgMVar_head(mvar) = tso;
1601
1602       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1603           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1604       }
1605
1606 #if defined(THREADED_RTS)
1607       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1608 #else
1609       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1610 #endif
1611       RET_P(val);
1612   } 
1613   else
1614   {
1615       /* No further putMVars, MVar is now empty */
1616       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1617  
1618 #if defined(THREADED_RTS)
1619       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1620 #else
1621       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1622 #endif
1623
1624       RET_P(val);
1625   }
1626 }
1627
1628
1629 tryTakeMVarzh_fast
1630 {
1631     W_ mvar, val, info, tso;
1632
1633     /* args: R1 = MVar closure */
1634
1635     mvar = R1;
1636
1637 #if defined(THREADED_RTS)
1638     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1639 #else
1640     info = GET_INFO(mvar);
1641 #endif
1642
1643     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1644 #if defined(THREADED_RTS)
1645         unlockClosure(mvar, info);
1646 #endif
1647         /* HACK: we need a pointer to pass back, 
1648          * so we abuse NO_FINALIZER_closure
1649          */
1650         RET_NP(0, stg_NO_FINALIZER_closure);
1651     }
1652
1653     if (info == stg_MVAR_CLEAN_info) {
1654         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1655     }
1656
1657     /* we got the value... */
1658     val = StgMVar_value(mvar);
1659
1660     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1661
1662         /* There are putMVar(s) waiting... 
1663          * wake up the first thread on the queue
1664          */
1665         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1666
1667         /* actually perform the putMVar for the thread that we just woke up */
1668         tso = StgMVar_head(mvar);
1669         PerformPut(tso,StgMVar_value(mvar));
1670         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1671             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1672         }
1673
1674         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1675                                               StgMVar_head(mvar) "ptr", 1) [];
1676         StgMVar_head(mvar) = tso;
1677
1678         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1679             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1680         }
1681 #if defined(THREADED_RTS)
1682         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1683 #else
1684         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1685 #endif
1686     }
1687     else 
1688     {
1689         /* No further putMVars, MVar is now empty */
1690         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1691 #if defined(THREADED_RTS)
1692         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1693 #else
1694         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1695 #endif
1696     }
1697     
1698     RET_NP(1, val);
1699 }
1700
1701
1702 putMVarzh_fast
1703 {
1704     W_ mvar, val, info, tso;
1705
1706     /* args: R1 = MVar, R2 = value */
1707     mvar = R1;
1708     val  = R2;
1709
1710 #if defined(THREADED_RTS)
1711     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1712 #else
1713     info = GET_INFO(mvar);
1714 #endif
1715
1716     if (info == stg_MVAR_CLEAN_info) {
1717         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1718     }
1719
1720     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1721         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1722             StgMVar_head(mvar) = CurrentTSO;
1723         } else {
1724             foreign "C" setTSOLink(MyCapability() "ptr", 
1725                                    StgMVar_tail(mvar) "ptr",
1726                                    CurrentTSO) [];
1727         }
1728         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1729         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1730         StgTSO_block_info(CurrentTSO)  = mvar;
1731         StgMVar_tail(mvar) = CurrentTSO;
1732         
1733         R1 = mvar;
1734         R2 = val;
1735         jump stg_block_putmvar;
1736     }
1737   
1738     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1739
1740         /* There are takeMVar(s) waiting: wake up the first one
1741          */
1742         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1743
1744         /* actually perform the takeMVar */
1745         tso = StgMVar_head(mvar);
1746         PerformTake(tso, val);
1747         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1748             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1749         }
1750       
1751         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1752                                               StgMVar_head(mvar) "ptr", 1) [];
1753         StgMVar_head(mvar) = tso;
1754
1755         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1756             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1757         }
1758
1759 #if defined(THREADED_RTS)
1760         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1761 #else
1762         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1763 #endif
1764         jump %ENTRY_CODE(Sp(0));
1765     }
1766     else
1767     {
1768         /* No further takes, the MVar is now full. */
1769         StgMVar_value(mvar) = val;
1770
1771 #if defined(THREADED_RTS)
1772         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1773 #else
1774         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1775 #endif
1776         jump %ENTRY_CODE(Sp(0));
1777     }
1778     
1779     /* ToDo: yield afterward for better communication performance? */
1780 }
1781
1782
1783 tryPutMVarzh_fast
1784 {
1785     W_ mvar, info, tso;
1786
1787     /* args: R1 = MVar, R2 = value */
1788     mvar = R1;
1789
1790 #if defined(THREADED_RTS)
1791     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1792 #else
1793     info = GET_INFO(mvar);
1794 #endif
1795
1796     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1797 #if defined(THREADED_RTS)
1798         unlockClosure(mvar, info);
1799 #endif
1800         RET_N(0);
1801     }
1802   
1803     if (info == stg_MVAR_CLEAN_info) {
1804         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1805     }
1806
1807     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1808
1809         /* There are takeMVar(s) waiting: wake up the first one
1810          */
1811         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1812         
1813         /* actually perform the takeMVar */
1814         tso = StgMVar_head(mvar);
1815         PerformTake(tso, R2);
1816         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1817             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1818         }
1819       
1820         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1821                                               StgMVar_head(mvar) "ptr", 1) [];
1822         StgMVar_head(mvar) = tso;
1823
1824         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1825             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1826         }
1827
1828 #if defined(THREADED_RTS)
1829         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1830 #else
1831         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1832 #endif
1833     }
1834     else
1835     {
1836         /* No further takes, the MVar is now full. */
1837         StgMVar_value(mvar) = R2;
1838
1839 #if defined(THREADED_RTS)
1840         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1841 #else
1842         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1843 #endif
1844     }
1845     
1846     RET_N(1);
1847     /* ToDo: yield afterward for better communication performance? */
1848 }
1849
1850
1851 /* -----------------------------------------------------------------------------
1852    Stable pointer primitives
1853    -------------------------------------------------------------------------  */
1854
1855 makeStableNamezh_fast
1856 {
1857     W_ index, sn_obj;
1858
1859     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1860   
1861     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1862
1863     /* Is there already a StableName for this heap object?
1864      *  stable_ptr_table is a pointer to an array of snEntry structs.
1865      */
1866     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1867         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1868         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1869         StgStableName_sn(sn_obj) = index;
1870         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1871     } else {
1872         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1873     }
1874     
1875     RET_P(sn_obj);
1876 }
1877
1878
1879 makeStablePtrzh_fast
1880 {
1881     /* Args: R1 = a */
1882     W_ sp;
1883     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1884     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1885     RET_N(sp);
1886 }
1887
1888 deRefStablePtrzh_fast
1889 {
1890     /* Args: R1 = the stable ptr */
1891     W_ r, sp;
1892     sp = R1;
1893     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1894     RET_P(r);
1895 }
1896
1897 /* -----------------------------------------------------------------------------
1898    Bytecode object primitives
1899    -------------------------------------------------------------------------  */
1900
1901 newBCOzh_fast
1902 {
1903     /* R1 = instrs
1904        R2 = literals
1905        R3 = ptrs
1906        R4 = arity
1907        R5 = bitmap array
1908     */
1909     W_ bco, bitmap_arr, bytes, words;
1910     
1911     bitmap_arr = R5;
1912
1913     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1914     bytes = WDS(words);
1915
1916     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1917
1918     bco = Hp - bytes + WDS(1);
1919     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1920     
1921     StgBCO_instrs(bco)     = R1;
1922     StgBCO_literals(bco)   = R2;
1923     StgBCO_ptrs(bco)       = R3;
1924     StgBCO_arity(bco)      = HALF_W_(R4);
1925     StgBCO_size(bco)       = HALF_W_(words);
1926     
1927     // Copy the arity/bitmap info into the BCO
1928     W_ i;
1929     i = 0;
1930 for:
1931     if (i < StgArrWords_words(bitmap_arr)) {
1932         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1933         i = i + 1;
1934         goto for;
1935     }
1936     
1937     RET_P(bco);
1938 }
1939
1940
1941 mkApUpd0zh_fast
1942 {
1943     // R1 = the BCO# for the AP
1944     //  
1945     W_ ap;
1946
1947     // This function is *only* used to wrap zero-arity BCOs in an
1948     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1949     // saturated and always points directly to a FUN or BCO.
1950     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1951            StgBCO_arity(R1) == HALF_W_(0));
1952
1953     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1954     TICK_ALLOC_UP_THK(0, 0);
1955     CCCS_ALLOC(SIZEOF_StgAP);
1956
1957     ap = Hp - SIZEOF_StgAP + WDS(1);
1958     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1959     
1960     StgAP_n_args(ap) = HALF_W_(0);
1961     StgAP_fun(ap) = R1;
1962     
1963     RET_P(ap);
1964 }
1965
1966 unpackClosurezh_fast
1967 {
1968 /* args: R1 = closure to analyze */
1969 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1970
1971     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1972     info  = %GET_STD_INFO(UNTAG(R1));
1973
1974     // Some closures have non-standard layout, so we omit those here.
1975     W_ type;
1976     type = TO_W_(%INFO_TYPE(info));
1977     switch [0 .. N_CLOSURE_TYPES] type {
1978     case THUNK_SELECTOR : {
1979         ptrs = 1;
1980         nptrs = 0;
1981         goto out;
1982     }
1983     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1984          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1985         ptrs = 0;
1986         nptrs = 0;
1987         goto out;
1988     }
1989     default: {
1990         ptrs  = TO_W_(%INFO_PTRS(info)); 
1991         nptrs = TO_W_(%INFO_NPTRS(info));
1992         goto out;
1993     }}
1994 out:
1995
1996     W_ ptrs_arr_sz, nptrs_arr_sz;
1997     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1998     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1999
2000     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2001
2002     W_ clos;
2003     clos = UNTAG(R1);
2004
2005     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2006     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2007
2008     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2009     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2010     p = 0;
2011 for:
2012     if(p < ptrs) {
2013          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2014          p = p + 1;
2015          goto for;
2016     }
2017     
2018     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2019     StgArrWords_words(nptrs_arr) = nptrs;
2020     p = 0;
2021 for2:
2022     if(p < nptrs) {
2023          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2024          p = p + 1;
2025          goto for2;
2026     }
2027     RET_NPP(info, ptrs_arr, nptrs_arr);
2028 }
2029
2030 /* -----------------------------------------------------------------------------
2031    Thread I/O blocking primitives
2032    -------------------------------------------------------------------------- */
2033
2034 /* Add a thread to the end of the blocked queue. (C-- version of the C
2035  * macro in Schedule.h).
2036  */
2037 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2038     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2039     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2040       W_[blocked_queue_hd] = tso;                       \
2041     } else {                                            \
2042       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2043     }                                                   \
2044     W_[blocked_queue_tl] = tso;
2045
2046 waitReadzh_fast
2047 {
2048     /* args: R1 */
2049 #ifdef THREADED_RTS
2050     foreign "C" barf("waitRead# on threaded RTS") never returns;
2051 #else
2052
2053     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2054     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2055     StgTSO_block_info(CurrentTSO) = R1;
2056     // No locking - we're not going to use this interface in the
2057     // threaded RTS anyway.
2058     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2059     jump stg_block_noregs;
2060 #endif
2061 }
2062
2063 waitWritezh_fast
2064 {
2065     /* args: R1 */
2066 #ifdef THREADED_RTS
2067     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2068 #else
2069
2070     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2071     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2072     StgTSO_block_info(CurrentTSO) = R1;
2073     // No locking - we're not going to use this interface in the
2074     // threaded RTS anyway.
2075     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2076     jump stg_block_noregs;
2077 #endif
2078 }
2079
2080
2081 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2082 delayzh_fast
2083 {
2084 #ifdef mingw32_HOST_OS
2085     W_ ares;
2086     CInt reqID;
2087 #else
2088     W_ t, prev, target;
2089 #endif
2090
2091 #ifdef THREADED_RTS
2092     foreign "C" barf("delay# on threaded RTS") never returns;
2093 #else
2094
2095     /* args: R1 (microsecond delay amount) */
2096     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2097     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2098
2099 #ifdef mingw32_HOST_OS
2100
2101     /* could probably allocate this on the heap instead */
2102     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2103                                             stg_delayzh_malloc_str);
2104     (reqID) = foreign "C" addDelayRequest(R1);
2105     StgAsyncIOResult_reqID(ares)   = reqID;
2106     StgAsyncIOResult_len(ares)     = 0;
2107     StgAsyncIOResult_errCode(ares) = 0;
2108     StgTSO_block_info(CurrentTSO)  = ares;
2109
2110     /* Having all async-blocked threads reside on the blocked_queue
2111      * simplifies matters, so change the status to OnDoProc put the
2112      * delayed thread on the blocked_queue.
2113      */
2114     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2115     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2116     jump stg_block_async_void;
2117
2118 #else
2119
2120     W_ time;
2121     W_ divisor;
2122     (time) = foreign "C" getourtimeofday() [R1];
2123     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2124     if (divisor == 0) {
2125         divisor = 50;
2126     }
2127     divisor = divisor * 1000;
2128     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2129            + time + 1; /* Add 1 as getourtimeofday rounds down */
2130     StgTSO_block_info(CurrentTSO) = target;
2131
2132     /* Insert the new thread in the sleeping queue. */
2133     prev = NULL;
2134     t = W_[sleeping_queue];
2135 while:
2136     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2137         prev = t;
2138         t = StgTSO__link(t);
2139         goto while;
2140     }
2141
2142     StgTSO__link(CurrentTSO) = t;
2143     if (prev == NULL) {
2144         W_[sleeping_queue] = CurrentTSO;
2145     } else {
2146         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2147     }
2148     jump stg_block_noregs;
2149 #endif
2150 #endif /* !THREADED_RTS */
2151 }
2152
2153
2154 #ifdef mingw32_HOST_OS
2155 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2156 asyncReadzh_fast
2157 {
2158     W_ ares;
2159     CInt reqID;
2160
2161 #ifdef THREADED_RTS
2162     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2163 #else
2164
2165     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2166     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2167     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2168
2169     /* could probably allocate this on the heap instead */
2170     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2171                                             stg_asyncReadzh_malloc_str)
2172                         [R1,R2,R3,R4];
2173     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2174     StgAsyncIOResult_reqID(ares)   = reqID;
2175     StgAsyncIOResult_len(ares)     = 0;
2176     StgAsyncIOResult_errCode(ares) = 0;
2177     StgTSO_block_info(CurrentTSO)  = ares;
2178     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2179     jump stg_block_async;
2180 #endif
2181 }
2182
2183 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2184 asyncWritezh_fast
2185 {
2186     W_ ares;
2187     CInt reqID;
2188
2189 #ifdef THREADED_RTS
2190     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2191 #else
2192
2193     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2194     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2195     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2196
2197     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2198                                             stg_asyncWritezh_malloc_str)
2199                         [R1,R2,R3,R4];
2200     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2201
2202     StgAsyncIOResult_reqID(ares)   = reqID;
2203     StgAsyncIOResult_len(ares)     = 0;
2204     StgAsyncIOResult_errCode(ares) = 0;
2205     StgTSO_block_info(CurrentTSO)  = ares;
2206     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2207     jump stg_block_async;
2208 #endif
2209 }
2210
2211 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2212 asyncDoProczh_fast
2213 {
2214     W_ ares;
2215     CInt reqID;
2216
2217 #ifdef THREADED_RTS
2218     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2219 #else
2220
2221     /* args: R1 = proc, R2 = param */
2222     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2223     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2224
2225     /* could probably allocate this on the heap instead */
2226     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2227                                             stg_asyncDoProczh_malloc_str) 
2228                                 [R1,R2];
2229     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2230     StgAsyncIOResult_reqID(ares)   = reqID;
2231     StgAsyncIOResult_len(ares)     = 0;
2232     StgAsyncIOResult_errCode(ares) = 0;
2233     StgTSO_block_info(CurrentTSO) = ares;
2234     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2235     jump stg_block_async;
2236 #endif
2237 }
2238 #endif
2239
2240 // noDuplicate# tries to ensure that none of the thunks under
2241 // evaluation by the current thread are also under evaluation by
2242 // another thread.  It relies on *both* threads doing noDuplicate#;
2243 // the second one will get blocked if they are duplicating some work.
2244 noDuplicatezh_fast
2245 {
2246     SAVE_THREAD_STATE();
2247     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2248     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2249     
2250     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2251         jump stg_threadFinished;
2252     } else {
2253         LOAD_THREAD_STATE();
2254         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2255         jump %ENTRY_CODE(Sp(0));
2256     }
2257 }
2258
2259 getApStackValzh_fast
2260 {
2261    W_ ap_stack, offset, val, ok;
2262
2263    /* args: R1 = AP_STACK, R2 = offset */
2264    ap_stack = R1;
2265    offset   = R2;
2266
2267    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2268         ok = 1;
2269         val = StgAP_STACK_payload(ap_stack,offset); 
2270    } else {
2271         ok = 0;
2272         val = R1;
2273    }
2274    RET_NP(ok,val);
2275 }