dfaa0b50d2fee7f3820e00757761831ce6acb221
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr") [R1];
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr") [];
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Weak Pointer Primitives
244    -------------------------------------------------------------------------- */
245
246 STRING(stg_weak_msg,"New weak pointer at %p\n")
247
248 mkWeakzh_fast
249 {
250   /* R1 = key
251      R2 = value
252      R3 = finalizer (or NULL)
253   */
254   W_ w;
255
256   if (R3 == NULL) {
257     R3 = stg_NO_FINALIZER_closure;
258   }
259
260   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
261
262   w = Hp - SIZEOF_StgWeak + WDS(1);
263   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
264
265   StgWeak_key(w)       = R1;
266   StgWeak_value(w)     = R2;
267   StgWeak_finalizer(w) = R3;
268
269   StgWeak_link(w)       = W_[weak_ptr_list];
270   W_[weak_ptr_list]     = w;
271
272   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
273
274   RET_P(w);
275 }
276
277
278 finalizzeWeakzh_fast
279 {
280   /* R1 = weak ptr
281    */
282   W_ w, f;
283
284   w = R1;
285
286   // already dead?
287   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
288       RET_NP(0,stg_NO_FINALIZER_closure);
289   }
290
291   // kill it
292 #ifdef PROFILING
293   // @LDV profiling
294   // A weak pointer is inherently used, so we do not need to call
295   // LDV_recordDead_FILL_SLOP_DYNAMIC():
296   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
297   // or, LDV_recordDead():
298   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
299   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
300   // large as weak pointers, so there is no need to fill the slop, either.
301   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
302 #endif
303
304   //
305   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
306   //
307   SET_INFO(w,stg_DEAD_WEAK_info);
308   LDV_RECORD_CREATE(w);
309
310   f = StgWeak_finalizer(w);
311   StgDeadWeak_link(w) = StgWeak_link(w);
312
313   /* return the finalizer */
314   if (f == stg_NO_FINALIZER_closure) {
315       RET_NP(0,stg_NO_FINALIZER_closure);
316   } else {
317       RET_NP(1,f);
318   }
319 }
320
321 deRefWeakzh_fast
322 {
323   /* R1 = weak ptr */
324   W_ w, code, val;
325
326   w = R1;
327   if (GET_INFO(w) == stg_WEAK_info) {
328     code = 1;
329     val = StgWeak_value(w);
330   } else {
331     code = 0;
332     val = w;
333   }
334   RET_NP(code,val);
335 }
336
337 /* -----------------------------------------------------------------------------
338    Arbitrary-precision Integer operations.
339
340    There are some assumptions in this code that mp_limb_t == W_.  This is
341    the case for all the platforms that GHC supports, currently.
342    -------------------------------------------------------------------------- */
343
344 int2Integerzh_fast
345 {
346    /* arguments: R1 = Int# */
347
348    W_ val, s, p;        /* to avoid aliasing */
349
350    val = R1;
351    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
352
353    p = Hp - SIZEOF_StgArrWords;
354    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
355    StgArrWords_words(p) = 1;
356
357    /* mpz_set_si is inlined here, makes things simpler */
358    if (%lt(val,0)) { 
359         s  = -1;
360         Hp(0) = -val;
361    } else { 
362      if (%gt(val,0)) {
363         s = 1;
364         Hp(0) = val;
365      } else {
366         s = 0;
367      }
368   }
369
370    /* returns (# size  :: Int#, 
371                  data  :: ByteArray# 
372                #)
373    */
374    RET_NP(s,p);
375 }
376
377 word2Integerzh_fast
378 {
379    /* arguments: R1 = Word# */
380
381    W_ val, s, p;        /* to avoid aliasing */
382
383    val = R1;
384
385    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
386
387    p = Hp - SIZEOF_StgArrWords;
388    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
389    StgArrWords_words(p) = 1;
390
391    if (val != 0) {
392         s = 1;
393         W_[Hp] = val;
394    } else {
395         s = 0;
396    }
397
398    /* returns (# size  :: Int#, 
399                  data  :: ByteArray# #)
400    */
401    RET_NP(s,p);
402 }
403
404
405 /*
406  * 'long long' primops for converting to/from Integers.
407  */
408
409 #ifdef SUPPORT_LONG_LONGS
410
411 int64ToIntegerzh_fast
412 {
413    /* arguments: L1 = Int64# */
414
415    L_ val;
416    W_ hi, s, neg, words_needed, p;
417
418    val = L1;
419    neg = 0;
420
421    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
422        words_needed = 2;
423    } else { 
424        // minimum is one word
425        words_needed = 1;
426    }
427
428    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
429                NO_PTRS, int64ToIntegerzh_fast );
430
431    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
432    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
433    StgArrWords_words(p) = words_needed;
434
435    if ( %lt(val,0::L_) ) {
436      neg = 1;
437      val = -val;
438    }
439
440    hi = TO_W_(val >> 32);
441
442    if ( words_needed == 2 )  { 
443       s = 2;
444       Hp(-1) = TO_W_(val);
445       Hp(0) = hi;
446    } else { 
447        if ( val != 0::L_ ) {
448            s = 1;
449            Hp(0) = TO_W_(val);
450        } else /* val==0 */  {
451            s = 0;
452        }
453    }
454    if ( neg != 0 ) {
455         s = -s;
456    }
457
458    /* returns (# size  :: Int#, 
459                  data  :: ByteArray# #)
460    */
461    RET_NP(s,p);
462 }
463
464 word64ToIntegerzh_fast
465 {
466    /* arguments: L1 = Word64# */
467
468    L_ val;
469    W_ hi, s, words_needed, p;
470
471    val = L1;
472    if ( val >= 0x100000000::L_ ) {
473       words_needed = 2;
474    } else {
475       words_needed = 1;
476    }
477
478    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
479                NO_PTRS, word64ToIntegerzh_fast );
480
481    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
482    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
483    StgArrWords_words(p) = words_needed;
484
485    hi = TO_W_(val >> 32);
486    if ( val >= 0x100000000::L_ ) { 
487      s = 2;
488      Hp(-1) = TO_W_(val);
489      Hp(0)  = hi;
490    } else {
491       if ( val != 0::L_ ) {
492         s = 1;
493         Hp(0) = TO_W_(val);
494      } else /* val==0 */  {
495       s = 0;
496      }
497   }
498
499    /* returns (# size  :: Int#, 
500                  data  :: ByteArray# #)
501    */
502    RET_NP(s,p);
503 }
504
505
506 #endif /* SUPPORT_LONG_LONGS */
507
508 /* ToDo: this is shockingly inefficient */
509
510 #ifndef SMP
511 section "bss" {
512   mp_tmp1:
513     bits8 [SIZEOF_MP_INT];
514 }
515
516 section "bss" {
517   mp_tmp2:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_result1:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result2:
528     bits8 [SIZEOF_MP_INT];
529 }
530 #endif
531
532 #ifdef SMP
533 #define FETCH_MP_TEMP(X) \
534 W_ X; \
535 X = BaseReg + (OFFSET_StgRegTable_r ## X);
536 #else
537 #define FETCH_MP_TEMP(X) /* Nothing */
538 #endif
539
540 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
541 name                                                                    \
542 {                                                                       \
543   CInt s1, s2;                                                          \
544   W_ d1, d2;                                                            \
545   FETCH_MP_TEMP(mp_tmp1);                                               \
546   FETCH_MP_TEMP(mp_tmp2);                                               \
547   FETCH_MP_TEMP(mp_result1)                                             \
548   FETCH_MP_TEMP(mp_result2);                                            \
549                                                                         \
550   /* call doYouWantToGC() */                                            \
551   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
552                                                                         \
553   s1 = W_TO_INT(R1);                                                    \
554   d1 = R2;                                                              \
555   s2 = W_TO_INT(R3);                                                    \
556   d2 = R4;                                                              \
557                                                                         \
558   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
559   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
560   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
561   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
562   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
563   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
564                                                                         \
565   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
566                                                                         \
567   /* Perform the operation */                                           \
568   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
569                                                                         \
570   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
571          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
572 }
573
574 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
575 name                                                                    \
576 {                                                                       \
577   CInt s1;                                                              \
578   W_ d1;                                                                \
579   FETCH_MP_TEMP(mp_tmp1);                                               \
580   FETCH_MP_TEMP(mp_result1)                                             \
581                                                                         \
582   /* call doYouWantToGC() */                                            \
583   MAYBE_GC(R2_PTR, name);                                               \
584                                                                         \
585   d1 = R2;                                                              \
586   s1 = W_TO_INT(R1);                                                    \
587                                                                         \
588   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
589   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
590   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
591                                                                         \
592   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
593                                                                         \
594   /* Perform the operation */                                           \
595   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
596                                                                         \
597   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
598          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
599 }
600
601 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
602 name                                                                                    \
603 {                                                                                       \
604   CInt s1, s2;                                                                          \
605   W_ d1, d2;                                                                            \
606   FETCH_MP_TEMP(mp_tmp1);                                                               \
607   FETCH_MP_TEMP(mp_tmp2);                                                               \
608   FETCH_MP_TEMP(mp_result1)                                                             \
609   FETCH_MP_TEMP(mp_result2)                                                             \
610                                                                                         \
611   /* call doYouWantToGC() */                                                            \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
613                                                                                         \
614   s1 = W_TO_INT(R1);                                                                    \
615   d1 = R2;                                                                              \
616   s2 = W_TO_INT(R3);                                                                    \
617   d2 = R4;                                                                              \
618                                                                                         \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
625                                                                                         \
626   foreign "C" mpz_init(mp_result1 "ptr") [];                                               \
627   foreign "C" mpz_init(mp_result2 "ptr") [];                                               \
628                                                                                         \
629   /* Perform the operation */                                                           \
630   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
631                                                                                         \
632   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
633            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
634            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
635            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 #ifndef SMP
654 section "bss" {
655   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
656 }
657 #endif
658
659 gcdIntzh_fast
660 {
661     /* R1 = the first Int#; R2 = the second Int# */
662     W_ r; 
663     FETCH_MP_TEMP(mp_tmp_w);
664
665     W_[mp_tmp_w] = R1;
666     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
667
668     R1 = r;
669     /* Result parked in R1, return via info-pointer at TOS */
670     jump %ENTRY_CODE(Sp(0));
671 }
672
673
674 gcdIntegerIntzh_fast
675 {
676     /* R1 = s1; R2 = d1; R3 = the int */
677     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
678     
679     /* Result parked in R1, return via info-pointer at TOS */
680     jump %ENTRY_CODE(Sp(0));
681 }
682
683
684 cmpIntegerIntzh_fast
685 {
686     /* R1 = s1; R2 = d1; R3 = the int */
687     W_ usize, vsize, v_digit, u_digit;
688
689     usize = R1;
690     vsize = 0;
691     v_digit = R3;
692
693     // paraphrased from mpz_cmp_si() in the GMP sources
694     if (%gt(v_digit,0)) {
695         vsize = 1;
696     } else { 
697         if (%lt(v_digit,0)) {
698             vsize = -1;
699             v_digit = -v_digit;
700         }
701     }
702
703     if (usize != vsize) {
704         R1 = usize - vsize; 
705         jump %ENTRY_CODE(Sp(0));
706     }
707
708     if (usize == 0) {
709         R1 = 0; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     u_digit = W_[BYTE_ARR_CTS(R2)];
714
715     if (u_digit == v_digit) {
716         R1 = 0; 
717         jump %ENTRY_CODE(Sp(0));
718     }
719
720     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
721         R1 = usize; 
722     } else {
723         R1 = -usize; 
724     }
725
726     jump %ENTRY_CODE(Sp(0));
727 }
728
729 cmpIntegerzh_fast
730 {
731     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
732     W_ usize, vsize, size, up, vp;
733     CInt cmp;
734
735     // paraphrased from mpz_cmp() in the GMP sources
736     usize = R1;
737     vsize = R3;
738
739     if (usize != vsize) {
740         R1 = usize - vsize; 
741         jump %ENTRY_CODE(Sp(0));
742     }
743
744     if (usize == 0) {
745         R1 = 0; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (%lt(usize,0)) { // NB. not <, which is unsigned
750         size = -usize;
751     } else {
752         size = usize;
753     }
754
755     up = BYTE_ARR_CTS(R2);
756     vp = BYTE_ARR_CTS(R4);
757
758     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
759
760     if (cmp == 0 :: CInt) {
761         R1 = 0; 
762         jump %ENTRY_CODE(Sp(0));
763     }
764
765     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
766         R1 = 1;
767     } else {
768         R1 = (-1); 
769     }
770     /* Result parked in R1, return via info-pointer at TOS */
771     jump %ENTRY_CODE(Sp(0));
772 }
773
774 integer2Intzh_fast
775 {
776     /* R1 = s; R2 = d */
777     W_ r, s;
778
779     s = R1;
780     if (s == 0) {
781         r = 0;
782     } else {
783         r = W_[R2 + SIZEOF_StgArrWords];
784         if (%lt(s,0)) {
785             r = -r;
786         }
787     }
788     /* Result parked in R1, return via info-pointer at TOS */
789     R1 = r;
790     jump %ENTRY_CODE(Sp(0));
791 }
792
793 integer2Wordzh_fast
794 {
795   /* R1 = s; R2 = d */
796   W_ r, s;
797
798   s = R1;
799   if (s == 0) {
800     r = 0;
801   } else {
802     r = W_[R2 + SIZEOF_StgArrWords];
803     if (%lt(s,0)) {
804         r = -r;
805     }
806   }
807   /* Result parked in R1, return via info-pointer at TOS */
808   R1 = r;
809   jump %ENTRY_CODE(Sp(0));
810 }
811
812 decodeFloatzh_fast
813
814     W_ p;
815     F_ arg;
816     FETCH_MP_TEMP(mp_tmp1);
817     FETCH_MP_TEMP(mp_tmp_w);
818     
819     /* arguments: F1 = Float# */
820     arg = F1;
821     
822     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
823     
824     /* Be prepared to tell Lennart-coded __decodeFloat
825        where mantissa._mp_d can be put (it does not care about the rest) */
826     p = Hp - SIZEOF_StgArrWords;
827     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
828     StgArrWords_words(p) = 1;
829     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
830     
831     /* Perform the operation */
832     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
833     
834     /* returns: (Int# (expn), Int#, ByteArray#) */
835     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
836 }
837
838 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
839 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
840
841 decodeDoublezh_fast
842
843     D_ arg;
844     W_ p;
845     FETCH_MP_TEMP(mp_tmp1);
846     FETCH_MP_TEMP(mp_tmp_w);
847
848     /* arguments: D1 = Double# */
849     arg = D1;
850
851     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
852     
853     /* Be prepared to tell Lennart-coded __decodeDouble
854        where mantissa.d can be put (it does not care about the rest) */
855     p = Hp - ARR_SIZE + WDS(1);
856     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
857     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
858     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
859
860     /* Perform the operation */
861     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
862     
863     /* returns: (Int# (expn), Int#, ByteArray#) */
864     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
865 }
866
867 /* -----------------------------------------------------------------------------
868  * Concurrency primitives
869  * -------------------------------------------------------------------------- */
870
871 forkzh_fast
872 {
873   /* args: R1 = closure to spark */
874   
875   MAYBE_GC(R1_PTR, forkzh_fast);
876
877   // create it right now, return ThreadID in R1
878   "ptr" R1 = foreign "C" createIOThread( MyCapability() "ptr", 
879                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
880                                 R1 "ptr");
881   foreign "C" scheduleThread(MyCapability() "ptr", R1 "ptr");
882
883   // switch at the earliest opportunity
884   CInt[context_switch] = 1 :: CInt;
885   
886   RET_P(R1);
887 }
888
889 yieldzh_fast
890 {
891   jump stg_yield_noregs;
892 }
893
894 myThreadIdzh_fast
895 {
896   /* no args. */
897   RET_P(CurrentTSO);
898 }
899
900 labelThreadzh_fast
901 {
902   /* args: 
903         R1 = ThreadId#
904         R2 = Addr# */
905 #ifdef DEBUG
906   foreign "C" labelThread(R1 "ptr", R2 "ptr");
907 #endif
908   jump %ENTRY_CODE(Sp(0));
909 }
910
911 isCurrentThreadBoundzh_fast
912 {
913   /* no args */
914   W_ r;
915   r = foreign "C" isThreadBound(CurrentTSO) [];
916   RET_N(r);
917 }
918
919
920 /* -----------------------------------------------------------------------------
921  * TVar primitives
922  * -------------------------------------------------------------------------- */
923
924 #ifdef REG_R1
925 #define SP_OFF 0
926 #define IF_NOT_REG_R1(x) 
927 #else
928 #define SP_OFF 1
929 #define IF_NOT_REG_R1(x) x
930 #endif
931
932 // Catch retry frame ------------------------------------------------------------
933
934 #define CATCH_RETRY_FRAME_ERROR(label) \
935   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
936
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
941 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
942 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
943 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
944 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
945
946 #if MAX_VECTORED_RTN > 8
947 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
948 #endif
949
950 #if defined(PROFILING)
951 #define CATCH_RETRY_FRAME_BITMAP 7
952 #define CATCH_RETRY_FRAME_WORDS  6
953 #else
954 #define CATCH_RETRY_FRAME_BITMAP 1
955 #define CATCH_RETRY_FRAME_WORDS  4
956 #endif
957
958 INFO_TABLE_RET(stg_catch_retry_frame,
959                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
960                CATCH_RETRY_FRAME,
961                stg_catch_retry_frame_0_ret,
962                stg_catch_retry_frame_1_ret,
963                stg_catch_retry_frame_2_ret,
964                stg_catch_retry_frame_3_ret,
965                stg_catch_retry_frame_4_ret,
966                stg_catch_retry_frame_5_ret,
967                stg_catch_retry_frame_6_ret,
968                stg_catch_retry_frame_7_ret)
969 {
970    W_ r, frame, trec, outer;
971    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
972
973    frame = Sp;
974    trec = StgTSO_trec(CurrentTSO);
975    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
976    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
977    if (r) {
978      /* Succeeded (either first branch or second branch) */
979      StgTSO_trec(CurrentTSO) = outer;
980      Sp = Sp + SIZEOF_StgCatchRetryFrame;
981      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
982      jump %ENTRY_CODE(Sp(SP_OFF));
983    } else {
984      /* Did not commit: retry */
985      W_ new_trec;
986      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
987      StgTSO_trec(CurrentTSO) = new_trec;
988      if (StgCatchRetryFrame_running_alt_code(frame)) {
989        R1 = StgCatchRetryFrame_alt_code(frame);
990      } else {
991        R1 = StgCatchRetryFrame_first_code(frame);
992        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
993      }
994      Sp_adj(-1);
995      jump RET_LBL(stg_ap_v);
996    }
997 }
998
999
1000 // Atomically frame -------------------------------------------------------------
1001
1002
1003 #define ATOMICALLY_FRAME_ERROR(label) \
1004   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1005
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1010 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1011 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1012 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1013 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1014
1015 #if MAX_VECTORED_RTN > 8
1016 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1017 #endif
1018
1019 #if defined(PROFILING)
1020 #define ATOMICALLY_FRAME_BITMAP 7
1021 #define ATOMICALLY_FRAME_WORDS  4
1022 #else
1023 #define ATOMICALLY_FRAME_BITMAP 1
1024 #define ATOMICALLY_FRAME_WORDS  2
1025 #endif
1026
1027
1028 INFO_TABLE_RET(stg_atomically_frame,
1029                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1030                ATOMICALLY_FRAME,
1031                stg_atomically_frame_0_ret,
1032                stg_atomically_frame_1_ret,
1033                stg_atomically_frame_2_ret,
1034                stg_atomically_frame_3_ret,
1035                stg_atomically_frame_4_ret,
1036                stg_atomically_frame_5_ret,
1037                stg_atomically_frame_6_ret,
1038                stg_atomically_frame_7_ret)
1039 {
1040    W_ frame, trec, valid;
1041    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1042
1043    frame = Sp;
1044    trec = StgTSO_trec(CurrentTSO);
1045    if (StgAtomicallyFrame_waiting(frame)) {
1046      /* The TSO is currently waiting: should we stop waiting? */
1047      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1048      if (valid) {
1049        /* Previous attempt is still valid: no point trying again yet */
1050           IF_NOT_REG_R1(Sp_adj(-2);
1051                         Sp(1) = stg_NO_FINALIZER_closure;
1052                         Sp(0) = stg_ut_1_0_unreg_info;)
1053        jump stg_block_noregs;
1054      } else {
1055        /* Previous attempt is no longer valid: try again */
1056        "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
1057        StgTSO_trec(CurrentTSO) = trec;
1058        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1059        R1 = StgAtomicallyFrame_code(frame);
1060        Sp_adj(-1);
1061        jump RET_LBL(stg_ap_v);
1062      }
1063    } else {
1064      /* The TSO is not currently waiting: try to commit the transaction */
1065      valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr");
1066      if (valid) {
1067        /* Transaction was valid: commit succeeded */
1068        StgTSO_trec(CurrentTSO) = NO_TREC;
1069        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1070        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1071        jump %ENTRY_CODE(Sp(SP_OFF));
1072      } else {
1073        /* Transaction was not valid: try again */
1074        "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
1075        StgTSO_trec(CurrentTSO) = trec;
1076        R1 = StgAtomicallyFrame_code(frame);
1077        Sp_adj(-1);
1078        jump RET_LBL(stg_ap_v);
1079      }
1080    }
1081 }
1082
1083
1084 // STM catch frame --------------------------------------------------------------
1085
1086 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1087    label                                                   \
1088    {                                                       \
1089       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1090       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1091       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1092       jump ret;                                            \
1093    }
1094
1095 #ifdef REG_R1
1096 #define SP_OFF 0
1097 #else
1098 #define SP_OFF 1
1099 #endif
1100
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1105 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1106 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1107 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1108 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1109
1110 #if MAX_VECTORED_RTN > 8
1111 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1112 #endif
1113
1114 #if defined(PROFILING)
1115 #define CATCH_STM_FRAME_BITMAP 3
1116 #define CATCH_STM_FRAME_WORDS  3
1117 #else
1118 #define CATCH_STM_FRAME_BITMAP 0
1119 #define CATCH_STM_FRAME_WORDS  1
1120 #endif
1121
1122 /* Catch frames are very similar to update frames, but when entering
1123  * one we just pop the frame off the stack and perform the correct
1124  * kind of return to the activation record underneath us on the stack.
1125  */
1126
1127 INFO_TABLE_RET(stg_catch_stm_frame,
1128                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1129                CATCH_STM_FRAME,
1130                stg_catch_stm_frame_0_ret,
1131                stg_catch_stm_frame_1_ret,
1132                stg_catch_stm_frame_2_ret,
1133                stg_catch_stm_frame_3_ret,
1134                stg_catch_stm_frame_4_ret,
1135                stg_catch_stm_frame_5_ret,
1136                stg_catch_stm_frame_6_ret,
1137                stg_catch_stm_frame_7_ret)
1138 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1139
1140
1141 // Primop definition ------------------------------------------------------------
1142
1143 atomicallyzh_fast
1144 {
1145   W_ frame;
1146   W_ old_trec;
1147   W_ new_trec;
1148   
1149   // stmStartTransaction may allocate
1150   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1151
1152   /* Args: R1 = m :: STM a */
1153   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1154
1155   /* Set up the atomically frame */
1156   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1157   frame = Sp;
1158
1159   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1160   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1161   StgAtomicallyFrame_code(frame) = R1;
1162
1163   /* Start the memory transcation */
1164   old_trec = StgTSO_trec(CurrentTSO);
1165   ASSERT(old_trec == NO_TREC);
1166   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr");
1167   StgTSO_trec(CurrentTSO) = new_trec;
1168
1169   /* Apply R1 to the realworld token */
1170   Sp_adj(-1);
1171   jump RET_LBL(stg_ap_v);
1172 }
1173
1174
1175 catchSTMzh_fast
1176 {
1177   W_ frame;
1178   
1179   /* Args: R1 :: STM a */
1180   /* Args: R2 :: Exception -> STM a */
1181   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1182
1183   /* Set up the catch frame */
1184   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1185   frame = Sp;
1186
1187   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1188   StgCatchSTMFrame_handler(frame) = R2;
1189
1190   /* Apply R1 to the realworld token */
1191   Sp_adj(-1);
1192   jump RET_LBL(stg_ap_v);
1193 }
1194
1195
1196 catchRetryzh_fast
1197 {
1198   W_ frame;
1199   W_ new_trec;
1200   W_ trec;
1201
1202   // stmStartTransaction may allocate
1203   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1204
1205   /* Args: R1 :: STM a */
1206   /* Args: R2 :: STM a */
1207   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1208
1209   /* Start a nested transaction within which to run the first code */
1210   trec = StgTSO_trec(CurrentTSO);
1211   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr");
1212   StgTSO_trec(CurrentTSO) = new_trec;
1213
1214   /* Set up the catch-retry frame */
1215   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1216   frame = Sp;
1217   
1218   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1219   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1220   StgCatchRetryFrame_first_code(frame) = R1;
1221   StgCatchRetryFrame_alt_code(frame) = R2;
1222   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1223
1224   /* Apply R1 to the realworld token */
1225   Sp_adj(-1);
1226   jump RET_LBL(stg_ap_v);  
1227 }
1228
1229
1230 retryzh_fast
1231 {
1232   W_ frame_type;
1233   W_ frame;
1234   W_ trec;
1235   W_ outer;
1236   W_ r;
1237
1238   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1239
1240   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1241 retry_pop_stack:
1242   trec = StgTSO_trec(CurrentTSO);
1243   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1244   StgTSO_sp(CurrentTSO) = Sp;
1245   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1246   Sp = StgTSO_sp(CurrentTSO);
1247   frame = Sp;
1248
1249   if (frame_type == CATCH_RETRY_FRAME) {
1250     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1251     ASSERT(outer != NO_TREC);
1252     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1253       // Retry in the first code: try the alternative
1254       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1255       StgTSO_trec(CurrentTSO) = trec;
1256       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1257       R1 = StgCatchRetryFrame_alt_code(frame);
1258       Sp_adj(-1);
1259       jump RET_LBL(stg_ap_v);
1260     } else {
1261       // Retry in the alternative code: propagate
1262       W_ other_trec;
1263       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1264       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr");
1265       if (r) {
1266         r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
1267       }
1268       if (r) {
1269         // Merge between siblings succeeded: commit it back to enclosing transaction
1270         // and then propagate the retry
1271         StgTSO_trec(CurrentTSO) = outer;
1272         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1273         goto retry_pop_stack;
1274       } else {
1275         // Merge failed: we musn't propagate the retry.  Try both paths again.
1276         "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1277         StgCatchRetryFrame_first_code_trec(frame) = trec;
1278         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1279         StgTSO_trec(CurrentTSO) = trec;
1280         R1 = StgCatchRetryFrame_first_code(frame);
1281         Sp_adj(-1);
1282         jump RET_LBL(stg_ap_v);
1283       }
1284     }
1285   }
1286
1287   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1288   ASSERT(frame_type == ATOMICALLY_FRAME);
1289   ASSERT(outer == NO_TREC);
1290   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr");
1291   if (r) {
1292     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1293     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1294     Sp = frame;
1295     // Fix up the stack in the unregisterised case: the return convention is different.
1296     IF_NOT_REG_R1(Sp_adj(-2); 
1297                   Sp(1) = stg_NO_FINALIZER_closure;
1298                   Sp(0) = stg_ut_1_0_unreg_info;)
1299     jump stg_block_noregs;
1300   } else {
1301     // Transaction was not valid: retry immediately
1302     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1303     StgTSO_trec(CurrentTSO) = trec;
1304     R1 = StgAtomicallyFrame_code(frame);
1305     Sp = frame;
1306     Sp_adj(-1);
1307     jump RET_LBL(stg_ap_v);
1308   }
1309 }
1310
1311
1312 newTVarzh_fast
1313 {
1314   W_ tv;
1315   W_ new_value;
1316
1317   /* Args: R1 = initialisation value */
1318
1319   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1320   new_value = R1;
1321   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr");
1322   RET_P(tv);
1323 }
1324
1325
1326 readTVarzh_fast
1327 {
1328   W_ trec;
1329   W_ tvar;
1330   W_ result;
1331
1332   /* Args: R1 = TVar closure */
1333
1334   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1335   trec = StgTSO_trec(CurrentTSO);
1336   tvar = R1;
1337   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1338
1339   RET_P(result);
1340 }
1341
1342
1343 writeTVarzh_fast
1344 {
1345   W_ trec;
1346   W_ tvar;
1347   W_ new_value;
1348   
1349   /* Args: R1 = TVar closure */
1350   /*       R2 = New value    */
1351
1352   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1353   trec = StgTSO_trec(CurrentTSO);
1354   tvar = R1;
1355   new_value = R2;
1356   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1357
1358   jump %ENTRY_CODE(Sp(0));
1359 }
1360
1361
1362 /* -----------------------------------------------------------------------------
1363  * MVar primitives
1364  *
1365  * take & putMVar work as follows.  Firstly, an important invariant:
1366  *
1367  *    If the MVar is full, then the blocking queue contains only
1368  *    threads blocked on putMVar, and if the MVar is empty then the
1369  *    blocking queue contains only threads blocked on takeMVar.
1370  *
1371  * takeMvar:
1372  *    MVar empty : then add ourselves to the blocking queue
1373  *    MVar full  : remove the value from the MVar, and
1374  *                 blocking queue empty     : return
1375  *                 blocking queue non-empty : perform the first blocked putMVar
1376  *                                            from the queue, and wake up the
1377  *                                            thread (MVar is now full again)
1378  *
1379  * putMVar is just the dual of the above algorithm.
1380  *
1381  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1382  * the stack of the thread waiting to do the putMVar.  See
1383  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1384  * the stack layout, and the PerformPut and PerformTake macros below.
1385  *
1386  * It is important that a blocked take or put is woken up with the
1387  * take/put already performed, because otherwise there would be a
1388  * small window of vulnerability where the thread could receive an
1389  * exception and never perform its take or put, and we'd end up with a
1390  * deadlock.
1391  *
1392  * -------------------------------------------------------------------------- */
1393
1394 isEmptyMVarzh_fast
1395 {
1396     /* args: R1 = MVar closure */
1397
1398     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1399         RET_N(1);
1400     } else {
1401         RET_N(0);
1402     }
1403 }
1404
1405 newMVarzh_fast
1406 {
1407     /* args: none */
1408     W_ mvar;
1409
1410     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1411   
1412     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1413     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1414     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1415     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1416     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1417     RET_P(mvar);
1418 }
1419
1420
1421 /* If R1 isn't available, pass it on the stack */
1422 #ifdef REG_R1
1423 #define PerformTake(tso, value)                         \
1424     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1425     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1426 #else
1427 #define PerformTake(tso, value)                                 \
1428     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1429     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1430 #endif
1431
1432 #define PerformPut(tso,lval)                    \
1433     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1434     lval = W_[StgTSO_sp(tso) - WDS(1)];
1435
1436 takeMVarzh_fast
1437 {
1438     W_ mvar, val, info, tso;
1439
1440     /* args: R1 = MVar closure */
1441     mvar = R1;
1442
1443 #if defined(SMP)
1444     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1445 #else
1446     info = GET_INFO(mvar);
1447 #endif
1448
1449     /* If the MVar is empty, put ourselves on its blocking queue,
1450      * and wait until we're woken up.
1451      */
1452     if (info == stg_EMPTY_MVAR_info) {
1453         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1454             StgMVar_head(mvar) = CurrentTSO;
1455         } else {
1456             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1457         }
1458         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1459         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1460         StgTSO_block_info(CurrentTSO)  = mvar;
1461         StgMVar_tail(mvar) = CurrentTSO;
1462         
1463         jump stg_block_takemvar;
1464   }
1465
1466   /* we got the value... */
1467   val = StgMVar_value(mvar);
1468
1469   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1470   {
1471       /* There are putMVar(s) waiting... 
1472        * wake up the first thread on the queue
1473        */
1474       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1475
1476       /* actually perform the putMVar for the thread that we just woke up */
1477       tso = StgMVar_head(mvar);
1478       PerformPut(tso,StgMVar_value(mvar));
1479
1480 #if defined(GRAN) || defined(PAR)
1481       /* ToDo: check 2nd arg (mvar) is right */
1482       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1483       StgMVar_head(mvar) = tso;
1484 #else
1485       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1486                                          StgMVar_head(mvar) "ptr") [];
1487       StgMVar_head(mvar) = tso;
1488 #endif
1489
1490       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1491           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1492       }
1493
1494 #if defined(SMP)
1495       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1496 #endif
1497       RET_P(val);
1498   } 
1499   else
1500   {
1501       /* No further putMVars, MVar is now empty */
1502       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1503  
1504 #if defined(SMP)
1505       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1506 #else
1507       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1508 #endif
1509
1510       RET_P(val);
1511   }
1512 }
1513
1514
1515 tryTakeMVarzh_fast
1516 {
1517     W_ mvar, val, info, tso;
1518
1519     /* args: R1 = MVar closure */
1520
1521     mvar = R1;
1522
1523 #if defined(SMP)
1524     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1525 #else
1526     info = GET_INFO(mvar);
1527 #endif
1528
1529     if (info == stg_EMPTY_MVAR_info) {
1530 #if defined(SMP)
1531         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1532 #endif
1533         /* HACK: we need a pointer to pass back, 
1534          * so we abuse NO_FINALIZER_closure
1535          */
1536         RET_NP(0, stg_NO_FINALIZER_closure);
1537     }
1538
1539     /* we got the value... */
1540     val = StgMVar_value(mvar);
1541
1542     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1543
1544         /* There are putMVar(s) waiting... 
1545          * wake up the first thread on the queue
1546          */
1547         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1548
1549         /* actually perform the putMVar for the thread that we just woke up */
1550         tso = StgMVar_head(mvar);
1551         PerformPut(tso,StgMVar_value(mvar));
1552
1553 #if defined(GRAN) || defined(PAR)
1554         /* ToDo: check 2nd arg (mvar) is right */
1555         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1556         StgMVar_head(mvar) = tso;
1557 #else
1558         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1559                                            StgMVar_head(mvar) "ptr") [];
1560         StgMVar_head(mvar) = tso;
1561 #endif
1562
1563         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1564             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1565         }
1566 #if defined(SMP)
1567         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1568 #endif
1569     }
1570     else 
1571     {
1572         /* No further putMVars, MVar is now empty */
1573         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1574 #if defined(SMP)
1575         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1576 #else
1577         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1578 #endif
1579     }
1580     
1581     RET_NP(1, val);
1582 }
1583
1584
1585 putMVarzh_fast
1586 {
1587     W_ mvar, info, tso;
1588
1589     /* args: R1 = MVar, R2 = value */
1590     mvar = R1;
1591
1592 #if defined(SMP)
1593     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1594 #else
1595     info = GET_INFO(mvar);
1596 #endif
1597
1598     if (info == stg_FULL_MVAR_info) {
1599         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1600             StgMVar_head(mvar) = CurrentTSO;
1601         } else {
1602             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1603         }
1604         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1605         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1606         StgTSO_block_info(CurrentTSO)  = mvar;
1607         StgMVar_tail(mvar) = CurrentTSO;
1608         
1609         jump stg_block_putmvar;
1610     }
1611   
1612     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1613
1614         /* There are takeMVar(s) waiting: wake up the first one
1615          */
1616         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1617
1618         /* actually perform the takeMVar */
1619         tso = StgMVar_head(mvar);
1620         PerformTake(tso, R2);
1621       
1622 #if defined(GRAN) || defined(PAR)
1623         /* ToDo: check 2nd arg (mvar) is right */
1624         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1625         StgMVar_head(mvar) = tso;
1626 #else
1627         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1628         StgMVar_head(mvar) = tso;
1629 #endif
1630
1631         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1632             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1633         }
1634
1635 #if defined(SMP)
1636         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1637 #endif
1638         jump %ENTRY_CODE(Sp(0));
1639     }
1640     else
1641     {
1642         /* No further takes, the MVar is now full. */
1643         StgMVar_value(mvar) = R2;
1644
1645 #if defined(SMP)
1646         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1647 #else
1648         SET_INFO(mvar,stg_FULL_MVAR_info);
1649 #endif
1650         jump %ENTRY_CODE(Sp(0));
1651     }
1652     
1653     /* ToDo: yield afterward for better communication performance? */
1654 }
1655
1656
1657 tryPutMVarzh_fast
1658 {
1659     W_ mvar, info, tso;
1660
1661     /* args: R1 = MVar, R2 = value */
1662     mvar = R1;
1663
1664 #if defined(SMP)
1665     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1666 #else
1667     info = GET_INFO(mvar);
1668 #endif
1669
1670     if (info == stg_FULL_MVAR_info) {
1671 #if defined(SMP)
1672         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1673 #endif
1674         RET_N(0);
1675     }
1676   
1677     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1678
1679         /* There are takeMVar(s) waiting: wake up the first one
1680          */
1681         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1682         
1683         /* actually perform the takeMVar */
1684         tso = StgMVar_head(mvar);
1685         PerformTake(tso, R2);
1686       
1687 #if defined(GRAN) || defined(PAR)
1688         /* ToDo: check 2nd arg (mvar) is right */
1689         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1690         StgMVar_head(mvar) = tso;
1691 #else
1692         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1693         StgMVar_head(mvar) = tso;
1694 #endif
1695
1696         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1697             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1698         }
1699
1700 #if defined(SMP)
1701         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1702 #endif
1703         jump %ENTRY_CODE(Sp(0));
1704     }
1705     else
1706     {
1707         /* No further takes, the MVar is now full. */
1708         StgMVar_value(mvar) = R2;
1709
1710 #if defined(SMP)
1711         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1712 #else
1713         SET_INFO(mvar,stg_FULL_MVAR_info);
1714 #endif
1715         jump %ENTRY_CODE(Sp(0));
1716     }
1717     
1718     /* ToDo: yield afterward for better communication performance? */
1719 }
1720
1721
1722 /* -----------------------------------------------------------------------------
1723    Stable pointer primitives
1724    -------------------------------------------------------------------------  */
1725
1726 makeStableNamezh_fast
1727 {
1728     W_ index, sn_obj;
1729
1730     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1731   
1732     index = foreign "C" lookupStableName(R1 "ptr") [];
1733
1734     /* Is there already a StableName for this heap object?
1735      *  stable_ptr_table is a pointer to an array of snEntry structs.
1736      */
1737     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1738         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1739         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1740         StgStableName_sn(sn_obj) = index;
1741         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1742     } else {
1743         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1744     }
1745     
1746     RET_P(sn_obj);
1747 }
1748
1749
1750 makeStablePtrzh_fast
1751 {
1752     /* Args: R1 = a */
1753     W_ sp;
1754     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1755     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1756     RET_N(sp);
1757 }
1758
1759 deRefStablePtrzh_fast
1760 {
1761     /* Args: R1 = the stable ptr */
1762     W_ r, sp;
1763     sp = R1;
1764     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1765     RET_P(r);
1766 }
1767
1768 /* -----------------------------------------------------------------------------
1769    Bytecode object primitives
1770    -------------------------------------------------------------------------  */
1771
1772 newBCOzh_fast
1773 {
1774     /* R1 = instrs
1775        R2 = literals
1776        R3 = ptrs
1777        R4 = itbls
1778        R5 = arity
1779        R6 = bitmap array
1780     */
1781     W_ bco, bitmap_arr, bytes, words;
1782     
1783     bitmap_arr = R6;
1784     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1785     bytes = WDS(words);
1786
1787     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1788
1789     bco = Hp - bytes + WDS(1);
1790     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1791     
1792     StgBCO_instrs(bco)     = R1;
1793     StgBCO_literals(bco)   = R2;
1794     StgBCO_ptrs(bco)       = R3;
1795     StgBCO_itbls(bco)      = R4;
1796     StgBCO_arity(bco)      = HALF_W_(R5);
1797     StgBCO_size(bco)       = HALF_W_(words);
1798     
1799     // Copy the arity/bitmap info into the BCO
1800     W_ i;
1801     i = 0;
1802 for:
1803     if (i < StgArrWords_words(bitmap_arr)) {
1804         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1805         i = i + 1;
1806         goto for;
1807     }
1808     
1809     RET_P(bco);
1810 }
1811
1812
1813 mkApUpd0zh_fast
1814 {
1815     // R1 = the BCO# for the AP
1816     //  
1817     W_ ap;
1818
1819     // This function is *only* used to wrap zero-arity BCOs in an
1820     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1821     // saturated and always points directly to a FUN or BCO.
1822     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1823            StgBCO_arity(R1) == HALF_W_(0));
1824
1825     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1826     TICK_ALLOC_UP_THK(0, 0);
1827     CCCS_ALLOC(SIZEOF_StgAP);
1828
1829     ap = Hp - SIZEOF_StgAP + WDS(1);
1830     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1831     
1832     StgAP_n_args(ap) = HALF_W_(0);
1833     StgAP_fun(ap) = R1;
1834     
1835     RET_P(ap);
1836 }
1837
1838 /* -----------------------------------------------------------------------------
1839    Thread I/O blocking primitives
1840    -------------------------------------------------------------------------- */
1841
1842 /* Add a thread to the end of the blocked queue. (C-- version of the C
1843  * macro in Schedule.h).
1844  */
1845 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1846     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1847     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1848       W_[blocked_queue_hd] = tso;                       \
1849     } else {                                            \
1850       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1851     }                                                   \
1852     W_[blocked_queue_tl] = tso;
1853
1854 waitReadzh_fast
1855 {
1856     /* args: R1 */
1857 #ifdef THREADED_RTS
1858     foreign "C" barf("waitRead# on threaded RTS");
1859 #else
1860
1861     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1862     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1863     StgTSO_block_info(CurrentTSO) = R1;
1864     // No locking - we're not going to use this interface in the
1865     // threaded RTS anyway.
1866     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1867     jump stg_block_noregs;
1868 #endif
1869 }
1870
1871 waitWritezh_fast
1872 {
1873     /* args: R1 */
1874 #ifdef THREADED_RTS
1875     foreign "C" barf("waitWrite# on threaded RTS");
1876 #else
1877
1878     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1879     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1880     StgTSO_block_info(CurrentTSO) = R1;
1881     // No locking - we're not going to use this interface in the
1882     // threaded RTS anyway.
1883     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1884     jump stg_block_noregs;
1885 #endif
1886 }
1887
1888
1889 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1890 delayzh_fast
1891 {
1892 #ifdef mingw32_HOST_OS
1893     W_ ares;
1894     CInt reqID;
1895 #else
1896     W_ t, prev, target;
1897 #endif
1898
1899 #ifdef THREADED_RTS
1900     foreign "C" barf("delay# on threaded RTS");
1901 #else
1902
1903     /* args: R1 (microsecond delay amount) */
1904     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1905     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1906
1907 #ifdef mingw32_HOST_OS
1908
1909     /* could probably allocate this on the heap instead */
1910     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1911                                             stg_delayzh_malloc_str);
1912     reqID = foreign "C" addDelayRequest(R1);
1913     StgAsyncIOResult_reqID(ares)   = reqID;
1914     StgAsyncIOResult_len(ares)     = 0;
1915     StgAsyncIOResult_errCode(ares) = 0;
1916     StgTSO_block_info(CurrentTSO)  = ares;
1917
1918     /* Having all async-blocked threads reside on the blocked_queue
1919      * simplifies matters, so change the status to OnDoProc put the
1920      * delayed thread on the blocked_queue.
1921      */
1922     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1923     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1924     jump stg_block_async_void;
1925
1926 #else
1927
1928     W_ time;
1929     time = foreign "C" getourtimeofday();
1930     target = (R1 / (TICK_MILLISECS*1000)) + time;
1931     StgTSO_block_info(CurrentTSO) = target;
1932
1933     /* Insert the new thread in the sleeping queue. */
1934     prev = NULL;
1935     t = W_[sleeping_queue];
1936 while:
1937     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1938         prev = t;
1939         t = StgTSO_link(t);
1940         goto while;
1941     }
1942
1943     StgTSO_link(CurrentTSO) = t;
1944     if (prev == NULL) {
1945         W_[sleeping_queue] = CurrentTSO;
1946     } else {
1947         StgTSO_link(prev) = CurrentTSO;
1948     }
1949     jump stg_block_noregs;
1950 #endif
1951 #endif /* !THREADED_RTS */
1952 }
1953
1954
1955 #ifdef mingw32_HOST_OS
1956 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1957 asyncReadzh_fast
1958 {
1959     W_ ares;
1960     CInt reqID;
1961
1962 #ifdef THREADED_RTS
1963     foreign "C" barf("asyncRead# on threaded RTS");
1964 #endif
1965
1966     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1967     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1968     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1969
1970     /* could probably allocate this on the heap instead */
1971     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1972                                             stg_asyncReadzh_malloc_str);
1973     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1974     StgAsyncIOResult_reqID(ares)   = reqID;
1975     StgAsyncIOResult_len(ares)     = 0;
1976     StgAsyncIOResult_errCode(ares) = 0;
1977     StgTSO_block_info(CurrentTSO)  = ares;
1978     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1979     jump stg_block_async;
1980 }
1981
1982 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1983 asyncWritezh_fast
1984 {
1985     W_ ares;
1986     CInt reqID;
1987
1988 #ifdef THREADED_RTS
1989     foreign "C" barf("asyncWrite# on threaded RTS");
1990 #endif
1991
1992     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1993     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1994     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1995
1996     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1997                                             stg_asyncWritezh_malloc_str);
1998     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1999
2000     StgAsyncIOResult_reqID(ares)   = reqID;
2001     StgAsyncIOResult_len(ares)     = 0;
2002     StgAsyncIOResult_errCode(ares) = 0;
2003     StgTSO_block_info(CurrentTSO)  = ares;
2004     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2005     jump stg_block_async;
2006 }
2007
2008 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2009 asyncDoProczh_fast
2010 {
2011     W_ ares;
2012     CInt reqID;
2013
2014     /* args: R1 = proc, R2 = param */
2015     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2016     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2017
2018     /* could probably allocate this on the heap instead */
2019     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2020                                             stg_asyncDoProczh_malloc_str);
2021     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2022     StgAsyncIOResult_reqID(ares)   = reqID;
2023     StgAsyncIOResult_len(ares)     = 0;
2024     StgAsyncIOResult_errCode(ares) = 0;
2025     StgTSO_block_info(CurrentTSO) = ares;
2026     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2027     jump stg_block_async;
2028 }
2029 #endif
2030
2031 /* -----------------------------------------------------------------------------
2032   ** temporary **
2033
2034    classes CCallable and CReturnable don't really exist, but the
2035    compiler insists on generating dictionaries containing references
2036    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2037    for these.  Some C compilers can't cope with zero-length static arrays,
2038    so we have to make these one element long.
2039   --------------------------------------------------------------------------- */
2040
2041 section "rodata" {
2042   GHC_ZCCCallable_static_info:   W_ 0;
2043 }
2044
2045 section "rodata" {
2046   GHC_ZCCReturnable_static_info: W_ 0;
2047 }