7b1c8aad1bbbd5dcd4c46deaeeb657498f0357ab
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 #ifndef SMP
531 section "bss" {
532   mp_tmp1:
533     bits8 [SIZEOF_MP_INT];
534 }
535
536 section "bss" {
537   mp_tmp2:
538     bits8 [SIZEOF_MP_INT];
539 }
540
541 section "bss" {
542   mp_result1:
543     bits8 [SIZEOF_MP_INT];
544 }
545
546 section "bss" {
547   mp_result2:
548     bits8 [SIZEOF_MP_INT];
549 }
550 #endif
551
552 #ifdef SMP
553 #define FETCH_MP_TEMP(X) \
554 W_ X; \
555 X = BaseReg + (OFFSET_StgRegTable_r ## X);
556 #else
557 #define FETCH_MP_TEMP(X) /* Nothing */
558 #endif
559
560 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
561 name                                                                    \
562 {                                                                       \
563   CInt s1, s2;                                                          \
564   W_ d1, d2;                                                            \
565   FETCH_MP_TEMP(mp_tmp1);                                               \
566   FETCH_MP_TEMP(mp_tmp2);                                               \
567   FETCH_MP_TEMP(mp_result1)                                             \
568   FETCH_MP_TEMP(mp_result2);                                            \
569                                                                         \
570   /* call doYouWantToGC() */                                            \
571   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
572                                                                         \
573   s1 = W_TO_INT(R1);                                                    \
574   d1 = R2;                                                              \
575   s2 = W_TO_INT(R3);                                                    \
576   d2 = R4;                                                              \
577                                                                         \
578   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
579   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
580   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
581   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
582   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
583   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
584                                                                         \
585   foreign "C" mpz_init(mp_result1 "ptr");                               \
586                                                                         \
587   /* Perform the operation */                                           \
588   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr");   \
589                                                                         \
590   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
591          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
592 }
593
594 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
595 name                                                                    \
596 {                                                                       \
597   CInt s1;                                                              \
598   W_ d1;                                                                \
599   FETCH_MP_TEMP(mp_tmp1);                                               \
600   FETCH_MP_TEMP(mp_result1)                                             \
601                                                                         \
602   /* call doYouWantToGC() */                                            \
603   MAYBE_GC(R2_PTR, name);                                               \
604                                                                         \
605   d1 = R2;                                                              \
606   s1 = W_TO_INT(R1);                                                    \
607                                                                         \
608   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
609   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
610   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
611                                                                         \
612   foreign "C" mpz_init(mp_result1 "ptr");                               \
613                                                                         \
614   /* Perform the operation */                                           \
615   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr");                   \
616                                                                         \
617   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
618          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
619 }
620
621 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
622 name                                                                                    \
623 {                                                                                       \
624   CInt s1, s2;                                                                          \
625   W_ d1, d2;                                                                            \
626   FETCH_MP_TEMP(mp_tmp1);                                                               \
627   FETCH_MP_TEMP(mp_tmp2);                                                               \
628   FETCH_MP_TEMP(mp_result1)                                                             \
629   FETCH_MP_TEMP(mp_result2)                                                             \
630                                                                                         \
631   /* call doYouWantToGC() */                                                            \
632   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
633                                                                                         \
634   s1 = W_TO_INT(R1);                                                                    \
635   d1 = R2;                                                                              \
636   s2 = W_TO_INT(R3);                                                                    \
637   d2 = R4;                                                                              \
638                                                                                         \
639   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
640   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
641   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
642   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
643   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
644   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
645                                                                                         \
646   foreign "C" mpz_init(mp_result1 "ptr");                                               \
647   foreign "C" mpz_init(mp_result2 "ptr");                                               \
648                                                                                         \
649   /* Perform the operation */                                                           \
650   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr");    \
651                                                                                         \
652   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
653            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
654            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
655            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
656 }
657
658 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
659 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
660 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
661 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
662 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
663 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
664 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
665 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
666 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
667 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
668 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
669
670 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
671 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
672
673 #ifndef SMP
674 section "bss" {
675   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
676 }
677 #endif
678
679 gcdIntzh_fast
680 {
681     /* R1 = the first Int#; R2 = the second Int# */
682     W_ r; 
683     FETCH_MP_TEMP(mp_tmp_w);
684
685     W_[mp_tmp_w] = R1;
686     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2);
687
688     R1 = r;
689     /* Result parked in R1, return via info-pointer at TOS */
690     jump %ENTRY_CODE(Sp(0));
691 }
692
693
694 gcdIntegerIntzh_fast
695 {
696     /* R1 = s1; R2 = d1; R3 = the int */
697     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
698     
699     /* Result parked in R1, return via info-pointer at TOS */
700     jump %ENTRY_CODE(Sp(0));
701 }
702
703
704 cmpIntegerIntzh_fast
705 {
706     /* R1 = s1; R2 = d1; R3 = the int */
707     W_ usize, vsize, v_digit, u_digit;
708
709     usize = R1;
710     vsize = 0;
711     v_digit = R3;
712
713     // paraphrased from mpz_cmp_si() in the GMP sources
714     if (%gt(v_digit,0)) {
715         vsize = 1;
716     } else { 
717         if (%lt(v_digit,0)) {
718             vsize = -1;
719             v_digit = -v_digit;
720         }
721     }
722
723     if (usize != vsize) {
724         R1 = usize - vsize; 
725         jump %ENTRY_CODE(Sp(0));
726     }
727
728     if (usize == 0) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     u_digit = W_[BYTE_ARR_CTS(R2)];
734
735     if (u_digit == v_digit) {
736         R1 = 0; 
737         jump %ENTRY_CODE(Sp(0));
738     }
739
740     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
741         R1 = usize; 
742     } else {
743         R1 = -usize; 
744     }
745
746     jump %ENTRY_CODE(Sp(0));
747 }
748
749 cmpIntegerzh_fast
750 {
751     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
752     W_ usize, vsize, size, up, vp;
753     CInt cmp;
754
755     // paraphrased from mpz_cmp() in the GMP sources
756     usize = R1;
757     vsize = R3;
758
759     if (usize != vsize) {
760         R1 = usize - vsize; 
761         jump %ENTRY_CODE(Sp(0));
762     }
763
764     if (usize == 0) {
765         R1 = 0; 
766         jump %ENTRY_CODE(Sp(0));
767     }
768
769     if (%lt(usize,0)) { // NB. not <, which is unsigned
770         size = -usize;
771     } else {
772         size = usize;
773     }
774
775     up = BYTE_ARR_CTS(R2);
776     vp = BYTE_ARR_CTS(R4);
777
778     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
779
780     if (cmp == 0 :: CInt) {
781         R1 = 0; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
786         R1 = 1;
787     } else {
788         R1 = (-1); 
789     }
790     /* Result parked in R1, return via info-pointer at TOS */
791     jump %ENTRY_CODE(Sp(0));
792 }
793
794 integer2Intzh_fast
795 {
796     /* R1 = s; R2 = d */
797     W_ r, s;
798
799     s = R1;
800     if (s == 0) {
801         r = 0;
802     } else {
803         r = W_[R2 + SIZEOF_StgArrWords];
804         if (%lt(s,0)) {
805             r = -r;
806         }
807     }
808     /* Result parked in R1, return via info-pointer at TOS */
809     R1 = r;
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Wordzh_fast
814 {
815   /* R1 = s; R2 = d */
816   W_ r, s;
817
818   s = R1;
819   if (s == 0) {
820     r = 0;
821   } else {
822     r = W_[R2 + SIZEOF_StgArrWords];
823     if (%lt(s,0)) {
824         r = -r;
825     }
826   }
827   /* Result parked in R1, return via info-pointer at TOS */
828   R1 = r;
829   jump %ENTRY_CODE(Sp(0));
830 }
831
832 decodeFloatzh_fast
833
834     W_ p;
835     F_ arg;
836     FETCH_MP_TEMP(mp_tmp1);
837     FETCH_MP_TEMP(mp_tmp_w);
838     
839     /* arguments: F1 = Float# */
840     arg = F1;
841     
842     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
843     
844     /* Be prepared to tell Lennart-coded __decodeFloat
845        where mantissa._mp_d can be put (it does not care about the rest) */
846     p = Hp - SIZEOF_StgArrWords;
847     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
848     StgArrWords_words(p) = 1;
849     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
850     
851     /* Perform the operation */
852     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg);
853     
854     /* returns: (Int# (expn), Int#, ByteArray#) */
855     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
856 }
857
858 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
859 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
860
861 decodeDoublezh_fast
862
863     D_ arg;
864     W_ p;
865     FETCH_MP_TEMP(mp_tmp1);
866     FETCH_MP_TEMP(mp_tmp_w);
867
868     /* arguments: D1 = Double# */
869     arg = D1;
870
871     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
872     
873     /* Be prepared to tell Lennart-coded __decodeDouble
874        where mantissa.d can be put (it does not care about the rest) */
875     p = Hp - ARR_SIZE + WDS(1);
876     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
877     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
878     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
879
880     /* Perform the operation */
881     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg);
882     
883     /* returns: (Int# (expn), Int#, ByteArray#) */
884     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
885 }
886
887 /* -----------------------------------------------------------------------------
888  * Concurrency primitives
889  * -------------------------------------------------------------------------- */
890
891 forkzh_fast
892 {
893   /* args: R1 = closure to spark */
894   
895   MAYBE_GC(R1_PTR, forkzh_fast);
896
897   // create it right now, return ThreadID in R1
898   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
899                                    R1 "ptr");
900   foreign "C" scheduleThread(R1 "ptr");
901
902   // switch at the earliest opportunity
903   CInt[context_switch] = 1 :: CInt;
904   
905   RET_P(R1);
906 }
907
908 yieldzh_fast
909 {
910   jump stg_yield_noregs;
911 }
912
913 myThreadIdzh_fast
914 {
915   /* no args. */
916   RET_P(CurrentTSO);
917 }
918
919 labelThreadzh_fast
920 {
921   /* args: 
922         R1 = ThreadId#
923         R2 = Addr# */
924 #ifdef DEBUG
925   foreign "C" labelThread(R1 "ptr", R2 "ptr");
926 #endif
927   jump %ENTRY_CODE(Sp(0));
928 }
929
930 isCurrentThreadBoundzh_fast
931 {
932   /* no args */
933   W_ r;
934   r = foreign "C" isThreadBound(CurrentTSO);
935   RET_N(r);
936 }
937
938
939 /* -----------------------------------------------------------------------------
940  * TVar primitives
941  * -------------------------------------------------------------------------- */
942
943 #ifdef REG_R1
944 #define SP_OFF 0
945 #define IF_NOT_REG_R1(x) 
946 #else
947 #define SP_OFF 1
948 #define IF_NOT_REG_R1(x) x
949 #endif
950
951 // Catch retry frame ------------------------------------------------------------
952
953 #define CATCH_RETRY_FRAME_ERROR(label) \
954   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
955
956 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
957 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
958 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
959 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
960 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
961 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
962 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
963 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
964
965 #if MAX_VECTORED_RTN > 8
966 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
967 #endif
968
969 #if defined(PROFILING)
970 #define CATCH_RETRY_FRAME_BITMAP 7
971 #define CATCH_RETRY_FRAME_WORDS  6
972 #else
973 #define CATCH_RETRY_FRAME_BITMAP 1
974 #define CATCH_RETRY_FRAME_WORDS  4
975 #endif
976
977 INFO_TABLE_RET(stg_catch_retry_frame,
978                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
979                CATCH_RETRY_FRAME,
980                stg_catch_retry_frame_0_ret,
981                stg_catch_retry_frame_1_ret,
982                stg_catch_retry_frame_2_ret,
983                stg_catch_retry_frame_3_ret,
984                stg_catch_retry_frame_4_ret,
985                stg_catch_retry_frame_5_ret,
986                stg_catch_retry_frame_6_ret,
987                stg_catch_retry_frame_7_ret)
988 {
989    W_ r, frame, trec, outer;
990    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
991
992    frame = Sp;
993    trec = StgTSO_trec(CurrentTSO);
994    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
995    r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
996    if (r) {
997      /* Succeeded (either first branch or second branch) */
998      StgTSO_trec(CurrentTSO) = outer;
999      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1000      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1001      jump %ENTRY_CODE(Sp(SP_OFF));
1002    } else {
1003      /* Did not commit: retry */
1004      W_ new_trec;
1005      "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1006      StgTSO_trec(CurrentTSO) = new_trec;
1007      if (StgCatchRetryFrame_running_alt_code(frame)) {
1008        R1 = StgCatchRetryFrame_alt_code(frame);
1009      } else {
1010        R1 = StgCatchRetryFrame_first_code(frame);
1011        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1012      }
1013      Sp_adj(-1);
1014      jump RET_LBL(stg_ap_v);
1015    }
1016 }
1017
1018
1019 // Atomically frame -------------------------------------------------------------
1020
1021
1022 #define ATOMICALLY_FRAME_ERROR(label) \
1023   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1024
1025 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1026 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1027 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1028 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1029 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1030 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1031 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1032 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1033
1034 #if MAX_VECTORED_RTN > 8
1035 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1036 #endif
1037
1038 #if defined(PROFILING)
1039 #define ATOMICALLY_FRAME_BITMAP 7
1040 #define ATOMICALLY_FRAME_WORDS  4
1041 #else
1042 #define ATOMICALLY_FRAME_BITMAP 1
1043 #define ATOMICALLY_FRAME_WORDS  2
1044 #endif
1045
1046
1047 INFO_TABLE_RET(stg_atomically_frame,
1048                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1049                ATOMICALLY_FRAME,
1050                stg_atomically_frame_0_ret,
1051                stg_atomically_frame_1_ret,
1052                stg_atomically_frame_2_ret,
1053                stg_atomically_frame_3_ret,
1054                stg_atomically_frame_4_ret,
1055                stg_atomically_frame_5_ret,
1056                stg_atomically_frame_6_ret,
1057                stg_atomically_frame_7_ret)
1058 {
1059    W_ frame, trec, valid;
1060    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1061
1062    frame = Sp;
1063    trec = StgTSO_trec(CurrentTSO);
1064    if (StgAtomicallyFrame_waiting(frame)) {
1065      /* The TSO is currently waiting: should we stop waiting? */
1066      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1067      if (valid) {
1068        /* Previous attempt is still valid: no point trying again yet */
1069           IF_NOT_REG_R1(Sp_adj(-2);
1070                         Sp(1) = stg_NO_FINALIZER_closure;
1071                         Sp(0) = stg_ut_1_0_unreg_info;)
1072        jump stg_block_noregs;
1073      } else {
1074        /* Previous attempt is no longer valid: try again */
1075        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1076        StgTSO_trec(CurrentTSO) = trec;
1077        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1078        R1 = StgAtomicallyFrame_code(frame);
1079        Sp_adj(-1);
1080        jump RET_LBL(stg_ap_v);
1081      }
1082    } else {
1083      /* The TSO is not currently waiting: try to commit the transaction */
1084      valid = foreign "C" stmCommitTransaction(BaseReg "ptr", trec "ptr");
1085      if (valid) {
1086        /* Transaction was valid: commit succeeded */
1087        StgTSO_trec(CurrentTSO) = NO_TREC;
1088        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1089        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1090        jump %ENTRY_CODE(Sp(SP_OFF));
1091      } else {
1092        /* Transaction was not valid: try again */
1093        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1094        StgTSO_trec(CurrentTSO) = trec;
1095        R1 = StgAtomicallyFrame_code(frame);
1096        Sp_adj(-1);
1097        jump RET_LBL(stg_ap_v);
1098      }
1099    }
1100 }
1101
1102
1103 // STM catch frame --------------------------------------------------------------
1104
1105 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1106    label                                                   \
1107    {                                                       \
1108       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1109       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1110       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1111       jump ret;                                            \
1112    }
1113
1114 #ifdef REG_R1
1115 #define SP_OFF 0
1116 #else
1117 #define SP_OFF 1
1118 #endif
1119
1120 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1121 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1123 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1124 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1125 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1126 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1127 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1128
1129 #if MAX_VECTORED_RTN > 8
1130 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1131 #endif
1132
1133 #if defined(PROFILING)
1134 #define CATCH_STM_FRAME_BITMAP 3
1135 #define CATCH_STM_FRAME_WORDS  3
1136 #else
1137 #define CATCH_STM_FRAME_BITMAP 0
1138 #define CATCH_STM_FRAME_WORDS  1
1139 #endif
1140
1141 /* Catch frames are very similar to update frames, but when entering
1142  * one we just pop the frame off the stack and perform the correct
1143  * kind of return to the activation record underneath us on the stack.
1144  */
1145
1146 INFO_TABLE_RET(stg_catch_stm_frame,
1147                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1148                CATCH_STM_FRAME,
1149                stg_catch_stm_frame_0_ret,
1150                stg_catch_stm_frame_1_ret,
1151                stg_catch_stm_frame_2_ret,
1152                stg_catch_stm_frame_3_ret,
1153                stg_catch_stm_frame_4_ret,
1154                stg_catch_stm_frame_5_ret,
1155                stg_catch_stm_frame_6_ret,
1156                stg_catch_stm_frame_7_ret)
1157 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1158
1159
1160 // Primop definition ------------------------------------------------------------
1161
1162 atomicallyzh_fast
1163 {
1164   W_ frame;
1165   W_ old_trec;
1166   W_ new_trec;
1167   
1168   // stmStartTransaction may allocate
1169   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1170
1171   /* Args: R1 = m :: STM a */
1172   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1173
1174   /* Set up the atomically frame */
1175   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1176   frame = Sp;
1177
1178   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1179   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1180   StgAtomicallyFrame_code(frame) = R1;
1181
1182   /* Start the memory transcation */
1183   old_trec = StgTSO_trec(CurrentTSO);
1184   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", old_trec "ptr");
1185   StgTSO_trec(CurrentTSO) = new_trec;
1186
1187   /* Apply R1 to the realworld token */
1188   Sp_adj(-1);
1189   jump RET_LBL(stg_ap_v);
1190 }
1191
1192
1193 catchSTMzh_fast
1194 {
1195   W_ frame;
1196   
1197   /* Args: R1 :: STM a */
1198   /* Args: R2 :: Exception -> STM a */
1199   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1200
1201   /* Set up the catch frame */
1202   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1203   frame = Sp;
1204
1205   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1206   StgCatchSTMFrame_handler(frame) = R2;
1207
1208   /* Apply R1 to the realworld token */
1209   Sp_adj(-1);
1210   jump RET_LBL(stg_ap_v);
1211 }
1212
1213
1214 catchRetryzh_fast
1215 {
1216   W_ frame;
1217   W_ new_trec;
1218   W_ trec;
1219
1220   // stmStartTransaction may allocate
1221   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1222
1223   /* Args: R1 :: STM a */
1224   /* Args: R2 :: STM a */
1225   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1226
1227   /* Start a nested transaction within which to run the first code */
1228   trec = StgTSO_trec(CurrentTSO);
1229   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", trec "ptr");
1230   StgTSO_trec(CurrentTSO) = new_trec;
1231
1232   /* Set up the catch-retry frame */
1233   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1234   frame = Sp;
1235   
1236   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1237   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1238   StgCatchRetryFrame_first_code(frame) = R1;
1239   StgCatchRetryFrame_alt_code(frame) = R2;
1240   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1241
1242   /* Apply R1 to the realworld token */
1243   Sp_adj(-1);
1244   jump RET_LBL(stg_ap_v);  
1245 }
1246
1247
1248 retryzh_fast
1249 {
1250   W_ frame_type;
1251   W_ frame;
1252   W_ trec;
1253   W_ outer;
1254   W_ r;
1255
1256   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1257
1258   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1259 retry_pop_stack:
1260   trec = StgTSO_trec(CurrentTSO);
1261   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1262   StgTSO_sp(CurrentTSO) = Sp;
1263   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1264   Sp = StgTSO_sp(CurrentTSO);
1265   frame = Sp;
1266
1267   if (frame_type == CATCH_RETRY_FRAME) {
1268     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1269     ASSERT(outer != NO_TREC);
1270     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1271       // Retry in the first code: try the alternative
1272       "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1273       StgTSO_trec(CurrentTSO) = trec;
1274       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1275       R1 = StgCatchRetryFrame_alt_code(frame);
1276       Sp_adj(-1);
1277       jump RET_LBL(stg_ap_v);
1278     } else {
1279       // Retry in the alternative code: propagate
1280       W_ other_trec;
1281       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1282       r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", other_trec "ptr");
1283       if (r) {
1284         r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
1285       }
1286       if (r) {
1287         // Merge between siblings succeeded: commit it back to enclosing transaction
1288         // and then propagate the retry
1289         StgTSO_trec(CurrentTSO) = outer;
1290         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1291         goto retry_pop_stack;
1292       } else {
1293         // Merge failed: we musn't propagate the retry.  Try both paths again.
1294         "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1295         StgCatchRetryFrame_first_code_trec(frame) = trec;
1296         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1297         StgTSO_trec(CurrentTSO) = trec;
1298         R1 = StgCatchRetryFrame_first_code(frame);
1299         Sp_adj(-1);
1300         jump RET_LBL(stg_ap_v);
1301       }
1302     }
1303   }
1304
1305   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1306   ASSERT(frame_type == ATOMICALLY_FRAME);
1307   ASSERT(outer == NO_TREC);
1308   r = foreign "C" stmWait(BaseReg "ptr", CurrentTSO "ptr", trec "ptr");
1309   if (r) {
1310     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1311     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1312     Sp = frame;
1313     // Fix up the stack in the unregisterised case: the return convention is different.
1314     IF_NOT_REG_R1(Sp_adj(-2); 
1315                   Sp(1) = stg_NO_FINALIZER_closure;
1316                   Sp(0) = stg_ut_1_0_unreg_info;)
1317     jump stg_block_noregs;
1318   } else {
1319     // Transaction was not valid: retry immediately
1320     "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1321     StgTSO_trec(CurrentTSO) = trec;
1322     R1 = StgAtomicallyFrame_code(frame);
1323     Sp = frame;
1324     Sp_adj(-1);
1325     jump RET_LBL(stg_ap_v);
1326   }
1327 }
1328
1329
1330 newTVarzh_fast
1331 {
1332   W_ tv;
1333   W_ new_value;
1334
1335   /* Args: R1 = initialisation value */
1336
1337   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1338   new_value = R1;
1339   tv = foreign "C" stmNewTVar(BaseReg "ptr", new_value "ptr");
1340   RET_P(tv);
1341 }
1342
1343
1344 readTVarzh_fast
1345 {
1346   W_ trec;
1347   W_ tvar;
1348   W_ result;
1349
1350   /* Args: R1 = TVar closure */
1351
1352   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1353   trec = StgTSO_trec(CurrentTSO);
1354   tvar = R1;
1355   "ptr" result = foreign "C" stmReadTVar(BaseReg "ptr", trec "ptr", tvar "ptr");
1356
1357   RET_P(result);
1358 }
1359
1360
1361 writeTVarzh_fast
1362 {
1363   W_ trec;
1364   W_ tvar;
1365   W_ new_value;
1366   
1367   /* Args: R1 = TVar closure */
1368   /*       R2 = New value    */
1369
1370   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1371   trec = StgTSO_trec(CurrentTSO);
1372   tvar = R1;
1373   new_value = R2;
1374   foreign "C" stmWriteTVar(BaseReg "ptr", trec "ptr", tvar "ptr", new_value "ptr");
1375
1376   jump %ENTRY_CODE(Sp(0));
1377 }
1378
1379
1380 /* -----------------------------------------------------------------------------
1381  * MVar primitives
1382  *
1383  * take & putMVar work as follows.  Firstly, an important invariant:
1384  *
1385  *    If the MVar is full, then the blocking queue contains only
1386  *    threads blocked on putMVar, and if the MVar is empty then the
1387  *    blocking queue contains only threads blocked on takeMVar.
1388  *
1389  * takeMvar:
1390  *    MVar empty : then add ourselves to the blocking queue
1391  *    MVar full  : remove the value from the MVar, and
1392  *                 blocking queue empty     : return
1393  *                 blocking queue non-empty : perform the first blocked putMVar
1394  *                                            from the queue, and wake up the
1395  *                                            thread (MVar is now full again)
1396  *
1397  * putMVar is just the dual of the above algorithm.
1398  *
1399  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1400  * the stack of the thread waiting to do the putMVar.  See
1401  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1402  * the stack layout, and the PerformPut and PerformTake macros below.
1403  *
1404  * It is important that a blocked take or put is woken up with the
1405  * take/put already performed, because otherwise there would be a
1406  * small window of vulnerability where the thread could receive an
1407  * exception and never perform its take or put, and we'd end up with a
1408  * deadlock.
1409  *
1410  * -------------------------------------------------------------------------- */
1411
1412 isEmptyMVarzh_fast
1413 {
1414     /* args: R1 = MVar closure */
1415
1416     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1417         RET_N(1);
1418     } else {
1419         RET_N(0);
1420     }
1421 }
1422
1423 newMVarzh_fast
1424 {
1425     /* args: none */
1426     W_ mvar;
1427
1428     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1429   
1430     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1431     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1432     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1433     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1434     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1435     RET_P(mvar);
1436 }
1437
1438
1439 /* If R1 isn't available, pass it on the stack */
1440 #ifdef REG_R1
1441 #define PerformTake(tso, value)                         \
1442     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1443     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1444 #else
1445 #define PerformTake(tso, value)                                 \
1446     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1447     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1448 #endif
1449
1450 #define PerformPut(tso,lval)                    \
1451     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1452     lval = W_[StgTSO_sp(tso) - WDS(1)];
1453
1454 takeMVarzh_fast
1455 {
1456     W_ mvar, val, info, tso;
1457
1458     /* args: R1 = MVar closure */
1459     mvar = R1;
1460
1461 #if defined(SMP)
1462     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1463 #else
1464     info = GET_INFO(mvar);
1465 #endif
1466
1467     /* If the MVar is empty, put ourselves on its blocking queue,
1468      * and wait until we're woken up.
1469      */
1470     if (info == stg_EMPTY_MVAR_info) {
1471         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1472             StgMVar_head(mvar) = CurrentTSO;
1473         } else {
1474             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1475         }
1476         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1477         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1478         StgTSO_block_info(CurrentTSO)  = mvar;
1479         StgMVar_tail(mvar) = CurrentTSO;
1480         
1481         jump stg_block_takemvar;
1482   }
1483
1484   /* we got the value... */
1485   val = StgMVar_value(mvar);
1486
1487   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1488   {
1489       /* There are putMVar(s) waiting... 
1490        * wake up the first thread on the queue
1491        */
1492       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1493
1494       /* actually perform the putMVar for the thread that we just woke up */
1495       tso = StgMVar_head(mvar);
1496       PerformPut(tso,StgMVar_value(mvar));
1497
1498 #if defined(GRAN) || defined(PAR)
1499       /* ToDo: check 2nd arg (mvar) is right */
1500       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1501       StgMVar_head(mvar) = tso;
1502 #else
1503       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1504       StgMVar_head(mvar) = tso;
1505 #endif
1506
1507       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1508           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1509       }
1510
1511 #if defined(SMP)
1512       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1513 #endif
1514       RET_P(val);
1515   } 
1516   else
1517   {
1518       /* No further putMVars, MVar is now empty */
1519       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1520  
1521 #if defined(SMP)
1522       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1523 #else
1524       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1525 #endif
1526
1527       RET_P(val);
1528   }
1529 }
1530
1531
1532 tryTakeMVarzh_fast
1533 {
1534     W_ mvar, val, info, tso;
1535
1536     /* args: R1 = MVar closure */
1537
1538     mvar = R1;
1539
1540 #if defined(SMP)
1541     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1542 #else
1543     info = GET_INFO(mvar);
1544 #endif
1545
1546     if (info == stg_EMPTY_MVAR_info) {
1547 #if defined(SMP)
1548         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1549 #endif
1550         /* HACK: we need a pointer to pass back, 
1551          * so we abuse NO_FINALIZER_closure
1552          */
1553         RET_NP(0, stg_NO_FINALIZER_closure);
1554     }
1555
1556     /* we got the value... */
1557     val = StgMVar_value(mvar);
1558
1559     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1560
1561         /* There are putMVar(s) waiting... 
1562          * wake up the first thread on the queue
1563          */
1564         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1565
1566         /* actually perform the putMVar for the thread that we just woke up */
1567         tso = StgMVar_head(mvar);
1568         PerformPut(tso,StgMVar_value(mvar));
1569
1570 #if defined(GRAN) || defined(PAR)
1571         /* ToDo: check 2nd arg (mvar) is right */
1572         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1573         StgMVar_head(mvar) = tso;
1574 #else
1575         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1576         StgMVar_head(mvar) = tso;
1577 #endif
1578
1579         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1580             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1581         }
1582 #if defined(SMP)
1583         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1584 #endif
1585     }
1586     else 
1587     {
1588         /* No further putMVars, MVar is now empty */
1589         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1590 #if defined(SMP)
1591         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1592 #else
1593         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1594 #endif
1595     }
1596     
1597     RET_NP(1, val);
1598 }
1599
1600
1601 putMVarzh_fast
1602 {
1603     W_ mvar, info, tso;
1604
1605     /* args: R1 = MVar, R2 = value */
1606     mvar = R1;
1607
1608 #if defined(SMP)
1609     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1610 #else
1611     info = GET_INFO(mvar);
1612 #endif
1613
1614     if (info == stg_FULL_MVAR_info) {
1615         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1616             StgMVar_head(mvar) = CurrentTSO;
1617         } else {
1618             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1619         }
1620         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1621         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1622         StgTSO_block_info(CurrentTSO)  = mvar;
1623         StgMVar_tail(mvar) = CurrentTSO;
1624         
1625         jump stg_block_putmvar;
1626     }
1627   
1628     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1629
1630         /* There are takeMVar(s) waiting: wake up the first one
1631          */
1632         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1633
1634         /* actually perform the takeMVar */
1635         tso = StgMVar_head(mvar);
1636         PerformTake(tso, R2);
1637       
1638 #if defined(GRAN) || defined(PAR)
1639         /* ToDo: check 2nd arg (mvar) is right */
1640         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1641         StgMVar_head(mvar) = tso;
1642 #else
1643         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1644         StgMVar_head(mvar) = tso;
1645 #endif
1646
1647         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1648             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1649         }
1650
1651 #if defined(SMP)
1652         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1653 #endif
1654         jump %ENTRY_CODE(Sp(0));
1655     }
1656     else
1657     {
1658         /* No further takes, the MVar is now full. */
1659         StgMVar_value(mvar) = R2;
1660
1661 #if defined(SMP)
1662         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1663 #else
1664         SET_INFO(mvar,stg_FULL_MVAR_info);
1665 #endif
1666         jump %ENTRY_CODE(Sp(0));
1667     }
1668     
1669     /* ToDo: yield afterward for better communication performance? */
1670 }
1671
1672
1673 tryPutMVarzh_fast
1674 {
1675     W_ mvar, info, tso;
1676
1677     /* args: R1 = MVar, R2 = value */
1678     mvar = R1;
1679
1680 #if defined(SMP)
1681     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1682 #else
1683     info = GET_INFO(mvar);
1684 #endif
1685
1686     if (info == stg_FULL_MVAR_info) {
1687 #if defined(SMP)
1688         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1689 #endif
1690         RET_N(0);
1691     }
1692   
1693     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1694
1695         /* There are takeMVar(s) waiting: wake up the first one
1696          */
1697         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1698         
1699         /* actually perform the takeMVar */
1700         tso = StgMVar_head(mvar);
1701         PerformTake(tso, R2);
1702       
1703 #if defined(GRAN) || defined(PAR)
1704         /* ToDo: check 2nd arg (mvar) is right */
1705         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1706         StgMVar_head(mvar) = tso;
1707 #else
1708         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1709         StgMVar_head(mvar) = tso;
1710 #endif
1711
1712         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1713             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1714         }
1715
1716 #if defined(SMP)
1717         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1718 #endif
1719         jump %ENTRY_CODE(Sp(0));
1720     }
1721     else
1722     {
1723         /* No further takes, the MVar is now full. */
1724         StgMVar_value(mvar) = R2;
1725
1726 #if defined(SMP)
1727         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1728 #else
1729         SET_INFO(mvar,stg_FULL_MVAR_info);
1730 #endif
1731         jump %ENTRY_CODE(Sp(0));
1732     }
1733     
1734     /* ToDo: yield afterward for better communication performance? */
1735 }
1736
1737
1738 /* -----------------------------------------------------------------------------
1739    Stable pointer primitives
1740    -------------------------------------------------------------------------  */
1741
1742 makeStableNamezh_fast
1743 {
1744     W_ index, sn_obj;
1745
1746     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1747   
1748     index = foreign "C" lookupStableName(R1 "ptr");
1749
1750     /* Is there already a StableName for this heap object?
1751      *  stable_ptr_table is a pointer to an array of snEntry structs.
1752      */
1753     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1754         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1755         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1756         StgStableName_sn(sn_obj) = index;
1757         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1758     } else {
1759         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1760     }
1761     
1762     RET_P(sn_obj);
1763 }
1764
1765
1766 makeStablePtrzh_fast
1767 {
1768     /* Args: R1 = a */
1769     W_ sp;
1770     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1771     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1772     RET_N(sp);
1773 }
1774
1775 deRefStablePtrzh_fast
1776 {
1777     /* Args: R1 = the stable ptr */
1778     W_ r, sp;
1779     sp = R1;
1780     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1781     RET_P(r);
1782 }
1783
1784 /* -----------------------------------------------------------------------------
1785    Bytecode object primitives
1786    -------------------------------------------------------------------------  */
1787
1788 newBCOzh_fast
1789 {
1790     /* R1 = instrs
1791        R2 = literals
1792        R3 = ptrs
1793        R4 = itbls
1794        R5 = arity
1795        R6 = bitmap array
1796     */
1797     W_ bco, bitmap_arr, bytes, words;
1798     
1799     bitmap_arr = R6;
1800     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1801     bytes = WDS(words);
1802
1803     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1804
1805     bco = Hp - bytes + WDS(1);
1806     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1807     
1808     StgBCO_instrs(bco)     = R1;
1809     StgBCO_literals(bco)   = R2;
1810     StgBCO_ptrs(bco)       = R3;
1811     StgBCO_itbls(bco)      = R4;
1812     StgBCO_arity(bco)      = HALF_W_(R5);
1813     StgBCO_size(bco)       = HALF_W_(words);
1814     
1815     // Copy the arity/bitmap info into the BCO
1816     W_ i;
1817     i = 0;
1818 for:
1819     if (i < StgArrWords_words(bitmap_arr)) {
1820         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1821         i = i + 1;
1822         goto for;
1823     }
1824     
1825     RET_P(bco);
1826 }
1827
1828
1829 mkApUpd0zh_fast
1830 {
1831     // R1 = the BCO# for the AP
1832     //  
1833     W_ ap;
1834
1835     // This function is *only* used to wrap zero-arity BCOs in an
1836     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1837     // saturated and always points directly to a FUN or BCO.
1838     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1839            StgBCO_arity(R1) == HALF_W_(0));
1840
1841     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1842     TICK_ALLOC_UP_THK(0, 0);
1843     CCCS_ALLOC(SIZEOF_StgAP);
1844
1845     ap = Hp - SIZEOF_StgAP + WDS(1);
1846     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1847     
1848     StgAP_n_args(ap) = HALF_W_(0);
1849     StgAP_fun(ap) = R1;
1850     
1851     RET_P(ap);
1852 }
1853
1854 /* -----------------------------------------------------------------------------
1855    Thread I/O blocking primitives
1856    -------------------------------------------------------------------------- */
1857
1858 /* Add a thread to the end of the blocked queue. (C-- version of the C
1859  * macro in Schedule.h).
1860  */
1861 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1862     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1863     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1864       W_[blocked_queue_hd] = tso;                       \
1865     } else {                                            \
1866       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1867     }                                                   \
1868     W_[blocked_queue_tl] = tso;
1869
1870 waitReadzh_fast
1871 {
1872     /* args: R1 */
1873 #ifdef THREADED_RTS
1874     foreign "C" barf("waitRead# on threaded RTS");
1875 #endif
1876
1877     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1878     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1879     StgTSO_block_info(CurrentTSO) = R1;
1880     // No locking - we're not going to use this interface in the
1881     // threaded RTS anyway.
1882     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1883     jump stg_block_noregs;
1884 }
1885
1886 waitWritezh_fast
1887 {
1888     /* args: R1 */
1889 #ifdef THREADED_RTS
1890     foreign "C" barf("waitWrite# on threaded RTS");
1891 #endif
1892
1893     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1894     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1895     StgTSO_block_info(CurrentTSO) = R1;
1896     // No locking - we're not going to use this interface in the
1897     // threaded RTS anyway.
1898     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1899     jump stg_block_noregs;
1900 }
1901
1902
1903 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1904 delayzh_fast
1905 {
1906 #ifdef mingw32_HOST_OS
1907     W_ ares;
1908     CInt reqID;
1909 #else
1910     W_ t, prev, target;
1911 #endif
1912
1913 #ifdef THREADED_RTS
1914     foreign "C" barf("delay# on threaded RTS");
1915 #endif
1916
1917     /* args: R1 (microsecond delay amount) */
1918     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1919     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1920
1921 #ifdef mingw32_HOST_OS
1922
1923     /* could probably allocate this on the heap instead */
1924     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1925                                             stg_delayzh_malloc_str);
1926     reqID = foreign "C" addDelayRequest(R1);
1927     StgAsyncIOResult_reqID(ares)   = reqID;
1928     StgAsyncIOResult_len(ares)     = 0;
1929     StgAsyncIOResult_errCode(ares) = 0;
1930     StgTSO_block_info(CurrentTSO)  = ares;
1931
1932     /* Having all async-blocked threads reside on the blocked_queue
1933      * simplifies matters, so change the status to OnDoProc put the
1934      * delayed thread on the blocked_queue.
1935      */
1936     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1937     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1938     jump stg_block_async_void;
1939
1940 #else
1941
1942     W_ time;
1943     time = foreign "C" getourtimeofday();
1944     target = (R1 / (TICK_MILLISECS*1000)) + time;
1945     StgTSO_block_info(CurrentTSO) = target;
1946
1947     /* Insert the new thread in the sleeping queue. */
1948     prev = NULL;
1949     t = W_[sleeping_queue];
1950 while:
1951     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1952         prev = t;
1953         t = StgTSO_link(t);
1954         goto while;
1955     }
1956
1957     StgTSO_link(CurrentTSO) = t;
1958     if (prev == NULL) {
1959         W_[sleeping_queue] = CurrentTSO;
1960     } else {
1961         StgTSO_link(prev) = CurrentTSO;
1962     }
1963     jump stg_block_noregs;
1964 #endif
1965 }
1966
1967
1968 #ifdef mingw32_HOST_OS
1969 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1970 asyncReadzh_fast
1971 {
1972     W_ ares;
1973     CInt reqID;
1974
1975 #ifdef THREADED_RTS
1976     foreign "C" barf("asyncRead# on threaded RTS");
1977 #endif
1978
1979     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1980     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1981     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1982
1983     /* could probably allocate this on the heap instead */
1984     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1985                                             stg_asyncReadzh_malloc_str);
1986     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1987     StgAsyncIOResult_reqID(ares)   = reqID;
1988     StgAsyncIOResult_len(ares)     = 0;
1989     StgAsyncIOResult_errCode(ares) = 0;
1990     StgTSO_block_info(CurrentTSO)  = ares;
1991     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1992     jump stg_block_async;
1993 }
1994
1995 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1996 asyncWritezh_fast
1997 {
1998     W_ ares;
1999     CInt reqID;
2000
2001 #ifdef THREADED_RTS
2002     foreign "C" barf("asyncWrite# on threaded RTS");
2003 #endif
2004
2005     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2006     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2007     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2008
2009     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2010                                             stg_asyncWritezh_malloc_str);
2011     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
2012
2013     StgAsyncIOResult_reqID(ares)   = reqID;
2014     StgAsyncIOResult_len(ares)     = 0;
2015     StgAsyncIOResult_errCode(ares) = 0;
2016     StgTSO_block_info(CurrentTSO)  = ares;
2017     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2018     jump stg_block_async;
2019 }
2020
2021 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2022 asyncDoProczh_fast
2023 {
2024     W_ ares;
2025     CInt reqID;
2026
2027     /* args: R1 = proc, R2 = param */
2028     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2029     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2030
2031     /* could probably allocate this on the heap instead */
2032     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2033                                             stg_asyncDoProczh_malloc_str);
2034     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2035     StgAsyncIOResult_reqID(ares)   = reqID;
2036     StgAsyncIOResult_len(ares)     = 0;
2037     StgAsyncIOResult_errCode(ares) = 0;
2038     StgTSO_block_info(CurrentTSO) = ares;
2039     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2040     jump stg_block_async;
2041 }
2042 #endif
2043
2044 /* -----------------------------------------------------------------------------
2045   ** temporary **
2046
2047    classes CCallable and CReturnable don't really exist, but the
2048    compiler insists on generating dictionaries containing references
2049    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2050    for these.  Some C compilers can't cope with zero-length static arrays,
2051    so we have to make these one element long.
2052   --------------------------------------------------------------------------- */
2053
2054 section "rodata" {
2055   GHC_ZCCCallable_static_info:   W_ 0;
2056 }
2057
2058 section "rodata" {
2059   GHC_ZCCReturnable_static_info: W_ 0;
2060 }