[project @ 2005-05-27 14:47:08 by tharris]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 #ifndef SMP
531 section "bss" {
532   mp_tmp1:
533     bits8 [SIZEOF_MP_INT];
534 }
535
536 section "bss" {
537   mp_tmp2:
538     bits8 [SIZEOF_MP_INT];
539 }
540
541 section "bss" {
542   mp_result1:
543     bits8 [SIZEOF_MP_INT];
544 }
545
546 section "bss" {
547   mp_result2:
548     bits8 [SIZEOF_MP_INT];
549 }
550 #endif
551
552 #ifdef SMP
553 #define FETCH_MP_TEMP(X) \
554 W_ X; \
555 X = BaseReg + (OFFSET_StgRegTable_r ## X);
556 #else
557 #define FETCH_MP_TEMP(X) /* Nothing */
558 #endif
559
560 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
561 name                                                                    \
562 {                                                                       \
563   CInt s1, s2;                                                          \
564   W_ d1, d2;                                                            \
565   FETCH_MP_TEMP(mp_tmp1);                                               \
566   FETCH_MP_TEMP(mp_tmp2);                                               \
567   FETCH_MP_TEMP(mp_result1)                                             \
568   FETCH_MP_TEMP(mp_result2);                                            \
569                                                                         \
570   /* call doYouWantToGC() */                                            \
571   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
572                                                                         \
573   s1 = W_TO_INT(R1);                                                    \
574   d1 = R2;                                                              \
575   s2 = W_TO_INT(R3);                                                    \
576   d2 = R4;                                                              \
577                                                                         \
578   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
579   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
580   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
581   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
582   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
583   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
584                                                                         \
585   foreign "C" mpz_init(mp_result1 "ptr");                               \
586                                                                         \
587   /* Perform the operation */                                           \
588   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr");   \
589                                                                         \
590   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
591          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
592 }
593
594 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
595 name                                                                    \
596 {                                                                       \
597   CInt s1;                                                              \
598   W_ d1;                                                                \
599   FETCH_MP_TEMP(mp_tmp1);                                               \
600   FETCH_MP_TEMP(mp_result1)                                             \
601                                                                         \
602   /* call doYouWantToGC() */                                            \
603   MAYBE_GC(R2_PTR, name);                                               \
604                                                                         \
605   d1 = R2;                                                              \
606   s1 = W_TO_INT(R1);                                                    \
607                                                                         \
608   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
609   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
610   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
611                                                                         \
612   foreign "C" mpz_init(mp_result1 "ptr");                               \
613                                                                         \
614   /* Perform the operation */                                           \
615   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr");                   \
616                                                                         \
617   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
618          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
619 }
620
621 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
622 name                                                                                    \
623 {                                                                                       \
624   CInt s1, s2;                                                                          \
625   W_ d1, d2;                                                                            \
626   FETCH_MP_TEMP(mp_tmp1);                                                               \
627   FETCH_MP_TEMP(mp_tmp2);                                                               \
628   FETCH_MP_TEMP(mp_result1)                                                             \
629   FETCH_MP_TEMP(mp_result2)                                                             \
630                                                                                         \
631   /* call doYouWantToGC() */                                                            \
632   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
633                                                                                         \
634   s1 = W_TO_INT(R1);                                                                    \
635   d1 = R2;                                                                              \
636   s2 = W_TO_INT(R3);                                                                    \
637   d2 = R4;                                                                              \
638                                                                                         \
639   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
640   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
641   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
642   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
643   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
644   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
645                                                                                         \
646   foreign "C" mpz_init(mp_result1 "ptr");                                               \
647   foreign "C" mpz_init(mp_result2 "ptr");                                               \
648                                                                                         \
649   /* Perform the operation */                                                           \
650   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr");    \
651                                                                                         \
652   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
653            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
654            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
655            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
656 }
657
658 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
659 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
660 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
661 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
662 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
663 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
664 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
665 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
666 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
667 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
668 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
669
670 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
671 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
672
673 #ifndef SMP
674 section "bss" {
675   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
676 }
677 #endif
678
679 gcdIntzh_fast
680 {
681     /* R1 = the first Int#; R2 = the second Int# */
682     W_ r; 
683     FETCH_MP_TEMP(mp_tmp_w);
684
685     W_[mp_tmp_w] = R1;
686     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2);
687
688     R1 = r;
689     /* Result parked in R1, return via info-pointer at TOS */
690     jump %ENTRY_CODE(Sp(0));
691 }
692
693
694 gcdIntegerIntzh_fast
695 {
696     /* R1 = s1; R2 = d1; R3 = the int */
697     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
698     
699     /* Result parked in R1, return via info-pointer at TOS */
700     jump %ENTRY_CODE(Sp(0));
701 }
702
703
704 cmpIntegerIntzh_fast
705 {
706     /* R1 = s1; R2 = d1; R3 = the int */
707     W_ usize, vsize, v_digit, u_digit;
708
709     usize = R1;
710     vsize = 0;
711     v_digit = R3;
712
713     // paraphrased from mpz_cmp_si() in the GMP sources
714     if (%gt(v_digit,0)) {
715         vsize = 1;
716     } else { 
717         if (%lt(v_digit,0)) {
718             vsize = -1;
719             v_digit = -v_digit;
720         }
721     }
722
723     if (usize != vsize) {
724         R1 = usize - vsize; 
725         jump %ENTRY_CODE(Sp(0));
726     }
727
728     if (usize == 0) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     u_digit = W_[BYTE_ARR_CTS(R2)];
734
735     if (u_digit == v_digit) {
736         R1 = 0; 
737         jump %ENTRY_CODE(Sp(0));
738     }
739
740     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
741         R1 = usize; 
742     } else {
743         R1 = -usize; 
744     }
745
746     jump %ENTRY_CODE(Sp(0));
747 }
748
749 cmpIntegerzh_fast
750 {
751     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
752     W_ usize, vsize, size, up, vp;
753     CInt cmp;
754
755     // paraphrased from mpz_cmp() in the GMP sources
756     usize = R1;
757     vsize = R3;
758
759     if (usize != vsize) {
760         R1 = usize - vsize; 
761         jump %ENTRY_CODE(Sp(0));
762     }
763
764     if (usize == 0) {
765         R1 = 0; 
766         jump %ENTRY_CODE(Sp(0));
767     }
768
769     if (%lt(usize,0)) { // NB. not <, which is unsigned
770         size = -usize;
771     } else {
772         size = usize;
773     }
774
775     up = BYTE_ARR_CTS(R2);
776     vp = BYTE_ARR_CTS(R4);
777
778     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
779
780     if (cmp == 0 :: CInt) {
781         R1 = 0; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
786         R1 = 1;
787     } else {
788         R1 = (-1); 
789     }
790     /* Result parked in R1, return via info-pointer at TOS */
791     jump %ENTRY_CODE(Sp(0));
792 }
793
794 integer2Intzh_fast
795 {
796     /* R1 = s; R2 = d */
797     W_ r, s;
798
799     s = R1;
800     if (s == 0) {
801         r = 0;
802     } else {
803         r = W_[R2 + SIZEOF_StgArrWords];
804         if (%lt(s,0)) {
805             r = -r;
806         }
807     }
808     /* Result parked in R1, return via info-pointer at TOS */
809     R1 = r;
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Wordzh_fast
814 {
815   /* R1 = s; R2 = d */
816   W_ r, s;
817
818   s = R1;
819   if (s == 0) {
820     r = 0;
821   } else {
822     r = W_[R2 + SIZEOF_StgArrWords];
823     if (%lt(s,0)) {
824         r = -r;
825     }
826   }
827   /* Result parked in R1, return via info-pointer at TOS */
828   R1 = r;
829   jump %ENTRY_CODE(Sp(0));
830 }
831
832 decodeFloatzh_fast
833
834     W_ p;
835     F_ arg;
836     FETCH_MP_TEMP(mp_tmp1);
837     FETCH_MP_TEMP(mp_tmp_w);
838     
839     /* arguments: F1 = Float# */
840     arg = F1;
841     
842     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
843     
844     /* Be prepared to tell Lennart-coded __decodeFloat
845        where mantissa._mp_d can be put (it does not care about the rest) */
846     p = Hp - SIZEOF_StgArrWords;
847     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
848     StgArrWords_words(p) = 1;
849     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
850     
851     /* Perform the operation */
852     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg);
853     
854     /* returns: (Int# (expn), Int#, ByteArray#) */
855     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
856 }
857
858 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
859 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
860
861 decodeDoublezh_fast
862
863     D_ arg;
864     W_ p;
865     FETCH_MP_TEMP(mp_tmp1);
866     FETCH_MP_TEMP(mp_tmp_w);
867
868     /* arguments: D1 = Double# */
869     arg = D1;
870
871     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
872     
873     /* Be prepared to tell Lennart-coded __decodeDouble
874        where mantissa.d can be put (it does not care about the rest) */
875     p = Hp - ARR_SIZE + WDS(1);
876     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
877     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
878     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
879
880     /* Perform the operation */
881     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg);
882     
883     /* returns: (Int# (expn), Int#, ByteArray#) */
884     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
885 }
886
887 /* -----------------------------------------------------------------------------
888  * Concurrency primitives
889  * -------------------------------------------------------------------------- */
890
891 forkzh_fast
892 {
893   /* args: R1 = closure to spark */
894   
895   MAYBE_GC(R1_PTR, forkzh_fast);
896
897   // create it right now, return ThreadID in R1
898   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
899                                    R1 "ptr");
900   foreign "C" scheduleThread(R1 "ptr");
901
902   // switch at the earliest opportunity
903   CInt[context_switch] = 1 :: CInt;
904   
905   RET_P(R1);
906 }
907
908 yieldzh_fast
909 {
910   jump stg_yield_noregs;
911 }
912
913 myThreadIdzh_fast
914 {
915   /* no args. */
916   RET_P(CurrentTSO);
917 }
918
919 labelThreadzh_fast
920 {
921   /* args: 
922         R1 = ThreadId#
923         R2 = Addr# */
924 #ifdef DEBUG
925   foreign "C" labelThread(R1 "ptr", R2 "ptr");
926 #endif
927   jump %ENTRY_CODE(Sp(0));
928 }
929
930 isCurrentThreadBoundzh_fast
931 {
932   /* no args */
933   W_ r;
934   r = foreign "C" isThreadBound(CurrentTSO);
935   RET_N(r);
936 }
937
938
939 /* -----------------------------------------------------------------------------
940  * TVar primitives
941  * -------------------------------------------------------------------------- */
942
943 #ifdef REG_R1
944 #define SP_OFF 0
945 #define IF_NOT_REG_R1(x) 
946 #else
947 #define SP_OFF 1
948 #define IF_NOT_REG_R1(x) x
949 #endif
950
951 // Catch retry frame ------------------------------------------------------------
952
953 #define CATCH_RETRY_FRAME_ERROR(label) \
954   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
955
956 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
957 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
958 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
959 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
960 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
961 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
962 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
963 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
964
965 #if MAX_VECTORED_RTN > 8
966 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
967 #endif
968
969 #if defined(PROFILING)
970 #define CATCH_RETRY_FRAME_BITMAP 7
971 #define CATCH_RETRY_FRAME_WORDS  6
972 #else
973 #define CATCH_RETRY_FRAME_BITMAP 1
974 #define CATCH_RETRY_FRAME_WORDS  4
975 #endif
976
977 INFO_TABLE_RET(stg_catch_retry_frame,
978                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
979                CATCH_RETRY_FRAME,
980                stg_catch_retry_frame_0_ret,
981                stg_catch_retry_frame_1_ret,
982                stg_catch_retry_frame_2_ret,
983                stg_catch_retry_frame_3_ret,
984                stg_catch_retry_frame_4_ret,
985                stg_catch_retry_frame_5_ret,
986                stg_catch_retry_frame_6_ret,
987                stg_catch_retry_frame_7_ret)
988 {
989    W_ r, frame, trec, outer;
990    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
991
992    frame = Sp;
993    trec = StgTSO_trec(CurrentTSO);
994    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
995    r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
996    if (r) {
997      /* Succeeded (either first branch or second branch) */
998      StgTSO_trec(CurrentTSO) = outer;
999      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1000      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1001      jump %ENTRY_CODE(Sp(SP_OFF));
1002    } else {
1003      /* Did not commit: retry */
1004      W_ new_trec;
1005      "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1006      StgTSO_trec(CurrentTSO) = new_trec;
1007      if (StgCatchRetryFrame_running_alt_code(frame)) {
1008        R1 = StgCatchRetryFrame_alt_code(frame);
1009      } else {
1010        R1 = StgCatchRetryFrame_first_code(frame);
1011        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1012      }
1013      Sp_adj(-1);
1014      jump RET_LBL(stg_ap_v);
1015    }
1016 }
1017
1018
1019 // Atomically frame -------------------------------------------------------------
1020
1021
1022 #define ATOMICALLY_FRAME_ERROR(label) \
1023   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1024
1025 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1026 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1027 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1028 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1029 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1030 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1031 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1032 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1033
1034 #if MAX_VECTORED_RTN > 8
1035 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1036 #endif
1037
1038 #if defined(PROFILING)
1039 #define ATOMICALLY_FRAME_BITMAP 7
1040 #define ATOMICALLY_FRAME_WORDS  4
1041 #else
1042 #define ATOMICALLY_FRAME_BITMAP 1
1043 #define ATOMICALLY_FRAME_WORDS  2
1044 #endif
1045
1046
1047 INFO_TABLE_RET(stg_atomically_frame,
1048                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1049                ATOMICALLY_FRAME,
1050                stg_atomically_frame_0_ret,
1051                stg_atomically_frame_1_ret,
1052                stg_atomically_frame_2_ret,
1053                stg_atomically_frame_3_ret,
1054                stg_atomically_frame_4_ret,
1055                stg_atomically_frame_5_ret,
1056                stg_atomically_frame_6_ret,
1057                stg_atomically_frame_7_ret)
1058 {
1059    W_ frame, trec, valid;
1060    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1061
1062    frame = Sp;
1063    trec = StgTSO_trec(CurrentTSO);
1064    if (StgAtomicallyFrame_waiting(frame)) {
1065      /* The TSO is currently waiting: should we stop waiting? */
1066      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1067      if (valid) {
1068        /* Previous attempt is still valid: no point trying again yet */
1069           IF_NOT_REG_R1(Sp_adj(-2);
1070                         Sp(1) = stg_NO_FINALIZER_closure;
1071                         Sp(0) = stg_ut_1_0_unreg_info;)
1072        jump stg_block_noregs;
1073      } else {
1074        /* Previous attempt is no longer valid: try again */
1075        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1076        StgTSO_trec(CurrentTSO) = trec;
1077        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1078        R1 = StgAtomicallyFrame_code(frame);
1079        Sp_adj(-1);
1080        jump RET_LBL(stg_ap_v);
1081      }
1082    } else {
1083      /* The TSO is not currently waiting: try to commit the transaction */
1084      valid = foreign "C" stmCommitTransaction(BaseReg "ptr", trec "ptr");
1085      if (valid) {
1086        /* Transaction was valid: commit succeeded */
1087        StgTSO_trec(CurrentTSO) = NO_TREC;
1088        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1089        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1090        jump %ENTRY_CODE(Sp(SP_OFF));
1091      } else {
1092        /* Transaction was not valid: try again */
1093        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1094        StgTSO_trec(CurrentTSO) = trec;
1095        R1 = StgAtomicallyFrame_code(frame);
1096        Sp_adj(-1);
1097        jump RET_LBL(stg_ap_v);
1098      }
1099    }
1100 }
1101
1102
1103 // STM catch frame --------------------------------------------------------------
1104
1105 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1106    label                                                   \
1107    {                                                       \
1108       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1109       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1110       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1111       jump ret;                                            \
1112    }
1113
1114 #ifdef REG_R1
1115 #define SP_OFF 0
1116 #else
1117 #define SP_OFF 1
1118 #endif
1119
1120 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1121 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1123 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1124 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1125 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1126 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1127 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1128
1129 #if MAX_VECTORED_RTN > 8
1130 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1131 #endif
1132
1133 #if defined(PROFILING)
1134 #define CATCH_STM_FRAME_BITMAP 3
1135 #define CATCH_STM_FRAME_WORDS  3
1136 #else
1137 #define CATCH_STM_FRAME_BITMAP 0
1138 #define CATCH_STM_FRAME_WORDS  1
1139 #endif
1140
1141 /* Catch frames are very similar to update frames, but when entering
1142  * one we just pop the frame off the stack and perform the correct
1143  * kind of return to the activation record underneath us on the stack.
1144  */
1145
1146 INFO_TABLE_RET(stg_catch_stm_frame,
1147                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1148                CATCH_STM_FRAME,
1149                stg_catch_stm_frame_0_ret,
1150                stg_catch_stm_frame_1_ret,
1151                stg_catch_stm_frame_2_ret,
1152                stg_catch_stm_frame_3_ret,
1153                stg_catch_stm_frame_4_ret,
1154                stg_catch_stm_frame_5_ret,
1155                stg_catch_stm_frame_6_ret,
1156                stg_catch_stm_frame_7_ret)
1157 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1158
1159
1160 // Primop definition ------------------------------------------------------------
1161
1162 atomicallyzh_fast
1163 {
1164   W_ frame;
1165   W_ old_trec;
1166   W_ new_trec;
1167   
1168   // stmStartTransaction may allocate
1169   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1170
1171   /* Args: R1 = m :: STM a */
1172   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1173
1174   /* Set up the atomically frame */
1175   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1176   frame = Sp;
1177
1178   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1179   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1180   StgAtomicallyFrame_code(frame) = R1;
1181
1182   /* Start the memory transcation */
1183   old_trec = StgTSO_trec(CurrentTSO);
1184   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", old_trec "ptr");
1185   StgTSO_trec(CurrentTSO) = new_trec;
1186
1187   /* Apply R1 to the realworld token */
1188   Sp_adj(-1);
1189   jump RET_LBL(stg_ap_v);
1190 }
1191
1192
1193 catchSTMzh_fast
1194 {
1195   W_ frame;
1196   
1197   /* Args: R1 :: STM a */
1198   /* Args: R2 :: Exception -> STM a */
1199   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1200
1201   /* Set up the catch frame */
1202   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1203   frame = Sp;
1204
1205   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1206   StgCatchSTMFrame_handler(frame) = R2;
1207
1208   /* Apply R1 to the realworld token */
1209   Sp_adj(-1);
1210   jump RET_LBL(stg_ap_v);
1211 }
1212
1213
1214 catchRetryzh_fast
1215 {
1216   W_ frame;
1217   W_ new_trec;
1218   W_ trec;
1219
1220   // stmStartTransaction may allocate
1221   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1222
1223   /* Args: R1 :: STM a */
1224   /* Args: R2 :: STM a */
1225   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1226
1227   /* Start a nested transaction within which to run the first code */
1228   trec = StgTSO_trec(CurrentTSO);
1229   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", trec "ptr");
1230   StgTSO_trec(CurrentTSO) = new_trec;
1231
1232   /* Set up the catch-retry frame */
1233   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1234   frame = Sp;
1235   
1236   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1237   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1238   StgCatchRetryFrame_first_code(frame) = R1;
1239   StgCatchRetryFrame_alt_code(frame) = R2;
1240   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1241
1242   /* Apply R1 to the realworld token */
1243   Sp_adj(-1);
1244   jump RET_LBL(stg_ap_v);  
1245 }
1246
1247
1248 retryzh_fast
1249 {
1250   W_ frame_type;
1251   W_ frame;
1252   W_ trec;
1253   W_ outer;
1254   W_ r;
1255
1256   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1257
1258   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1259 retry_pop_stack:
1260   trec = StgTSO_trec(CurrentTSO);
1261   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1262   StgTSO_sp(CurrentTSO) = Sp;
1263   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1264   Sp = StgTSO_sp(CurrentTSO);
1265   frame = Sp;
1266
1267   if (frame_type == CATCH_RETRY_FRAME) {
1268     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1269     ASSERT(outer != NO_TREC);
1270     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1271       // Retry in the first code: try the alternative
1272       "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1273       StgTSO_trec(CurrentTSO) = trec;
1274       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1275       R1 = StgCatchRetryFrame_alt_code(frame);
1276       Sp_adj(-1);
1277       jump RET_LBL(stg_ap_v);
1278     } else {
1279       // Retry in the alternative code: propagate
1280       W_ other_trec;
1281       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1282       r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", other_trec "ptr");
1283       if (r) {
1284         r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
1285       }
1286       if (r) {
1287         // Merge between siblings succeeded: commit it back to enclosing transaction
1288         // and then propagate the retry
1289         StgTSO_trec(CurrentTSO) = outer;
1290         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1291         goto retry_pop_stack;
1292       } else {
1293         // Merge failed: we musn't propagate the retry.  Try both paths again.
1294         "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1295         StgCatchRetryFrame_first_code_trec(frame) = trec;
1296         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1297         StgTSO_trec(CurrentTSO) = trec;
1298         R1 = StgCatchRetryFrame_first_code(frame);
1299         Sp_adj(-1);
1300         jump RET_LBL(stg_ap_v);
1301       }
1302     }
1303   }
1304
1305   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1306   ASSERT(frame_type == ATOMICALLY_FRAME);
1307   ASSERT(outer == NO_TREC);
1308   r = foreign "C" stmWait(BaseReg "ptr", CurrentTSO "ptr", trec "ptr");
1309   if (r) {
1310     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1311     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1312     Sp = frame;
1313     // Fix up the stack in the unregisterised case: the return convention is different.
1314     IF_NOT_REG_R1(Sp_adj(-2); 
1315                   Sp(1) = stg_NO_FINALIZER_closure;
1316                   Sp(0) = stg_ut_1_0_unreg_info;)
1317     jump stg_block_noregs;
1318   } else {
1319     // Transaction was not valid: retry immediately
1320     "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1321     StgTSO_trec(CurrentTSO) = trec;
1322     R1 = StgAtomicallyFrame_code(frame);
1323     Sp = frame;
1324     Sp_adj(-1);
1325     jump RET_LBL(stg_ap_v);
1326   }
1327 }
1328
1329
1330 newTVarzh_fast
1331 {
1332   W_ tv;
1333   W_ trec;
1334
1335   /* Args: R1 = initialisation value */
1336
1337   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1338   tv = Hp - SIZEOF_StgTVar + WDS(1);
1339   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1340   StgTVar_current_value(tv) = R1;
1341   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1342 #if defined(SMP)
1343   trec = StgTSO_trec(CurrentTSO);
1344   StgTVar_last_update_by(tv) = trec;
1345 #else
1346   StgTVar_last_update_by(tv) = NO_TREC;
1347 #endif
1348     
1349   RET_P(tv);
1350 }
1351
1352
1353 readTVarzh_fast
1354 {
1355   W_ trec;
1356   W_ tvar;
1357   W_ result;
1358
1359   /* Args: R1 = TVar closure */
1360
1361   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1362   trec = StgTSO_trec(CurrentTSO);
1363   tvar = R1;
1364   "ptr" result = foreign "C" stmReadTVar(BaseReg "ptr", trec "ptr", tvar "ptr");
1365
1366   RET_P(result);
1367 }
1368
1369
1370 writeTVarzh_fast
1371 {
1372   W_ trec;
1373   W_ tvar;
1374   W_ new_value;
1375   
1376   /* Args: R1 = TVar closure */
1377   /*       R2 = New value    */
1378
1379   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1380   trec = StgTSO_trec(CurrentTSO);
1381   tvar = R1;
1382   new_value = R2;
1383   foreign "C" stmWriteTVar(BaseReg "ptr", trec "ptr", tvar "ptr", new_value "ptr");
1384
1385   jump %ENTRY_CODE(Sp(0));
1386 }
1387
1388
1389 /* -----------------------------------------------------------------------------
1390  * MVar primitives
1391  *
1392  * take & putMVar work as follows.  Firstly, an important invariant:
1393  *
1394  *    If the MVar is full, then the blocking queue contains only
1395  *    threads blocked on putMVar, and if the MVar is empty then the
1396  *    blocking queue contains only threads blocked on takeMVar.
1397  *
1398  * takeMvar:
1399  *    MVar empty : then add ourselves to the blocking queue
1400  *    MVar full  : remove the value from the MVar, and
1401  *                 blocking queue empty     : return
1402  *                 blocking queue non-empty : perform the first blocked putMVar
1403  *                                            from the queue, and wake up the
1404  *                                            thread (MVar is now full again)
1405  *
1406  * putMVar is just the dual of the above algorithm.
1407  *
1408  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1409  * the stack of the thread waiting to do the putMVar.  See
1410  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1411  * the stack layout, and the PerformPut and PerformTake macros below.
1412  *
1413  * It is important that a blocked take or put is woken up with the
1414  * take/put already performed, because otherwise there would be a
1415  * small window of vulnerability where the thread could receive an
1416  * exception and never perform its take or put, and we'd end up with a
1417  * deadlock.
1418  *
1419  * -------------------------------------------------------------------------- */
1420
1421 isEmptyMVarzh_fast
1422 {
1423     /* args: R1 = MVar closure */
1424
1425     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1426         RET_N(1);
1427     } else {
1428         RET_N(0);
1429     }
1430 }
1431
1432 newMVarzh_fast
1433 {
1434     /* args: none */
1435     W_ mvar;
1436
1437     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1438   
1439     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1440     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1441     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1442     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1443     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1444     RET_P(mvar);
1445 }
1446
1447
1448 /* If R1 isn't available, pass it on the stack */
1449 #ifdef REG_R1
1450 #define PerformTake(tso, value)                         \
1451     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1452     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1453 #else
1454 #define PerformTake(tso, value)                                 \
1455     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1456     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1457 #endif
1458
1459 #define PerformPut(tso,lval)                    \
1460     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1461     lval = W_[StgTSO_sp(tso) - WDS(1)];
1462
1463 takeMVarzh_fast
1464 {
1465     W_ mvar, val, info, tso;
1466
1467     /* args: R1 = MVar closure */
1468     mvar = R1;
1469
1470 #if defined(SMP)
1471     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1472 #else
1473     info = GET_INFO(mvar);
1474 #endif
1475
1476     /* If the MVar is empty, put ourselves on its blocking queue,
1477      * and wait until we're woken up.
1478      */
1479     if (info == stg_EMPTY_MVAR_info) {
1480         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1481             StgMVar_head(mvar) = CurrentTSO;
1482         } else {
1483             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1484         }
1485         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1486         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1487         StgTSO_block_info(CurrentTSO)  = mvar;
1488         StgMVar_tail(mvar) = CurrentTSO;
1489         
1490         jump stg_block_takemvar;
1491   }
1492
1493   /* we got the value... */
1494   val = StgMVar_value(mvar);
1495
1496   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1497   {
1498       /* There are putMVar(s) waiting... 
1499        * wake up the first thread on the queue
1500        */
1501       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1502
1503       /* actually perform the putMVar for the thread that we just woke up */
1504       tso = StgMVar_head(mvar);
1505       PerformPut(tso,StgMVar_value(mvar));
1506
1507 #if defined(GRAN) || defined(PAR)
1508       /* ToDo: check 2nd arg (mvar) is right */
1509       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1510       StgMVar_head(mvar) = tso;
1511 #else
1512       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1513       StgMVar_head(mvar) = tso;
1514 #endif
1515
1516       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1517           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1518       }
1519
1520 #if defined(SMP)
1521       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1522 #endif
1523       RET_P(val);
1524   } 
1525   else
1526   {
1527       /* No further putMVars, MVar is now empty */
1528       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1529  
1530 #if defined(SMP)
1531       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1532 #else
1533       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1534 #endif
1535
1536       RET_P(val);
1537   }
1538 }
1539
1540
1541 tryTakeMVarzh_fast
1542 {
1543     W_ mvar, val, info, tso;
1544
1545     /* args: R1 = MVar closure */
1546
1547     mvar = R1;
1548
1549 #if defined(SMP)
1550     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1551 #else
1552     info = GET_INFO(mvar);
1553 #endif
1554
1555     if (info == stg_EMPTY_MVAR_info) {
1556 #if defined(SMP)
1557         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1558 #endif
1559         /* HACK: we need a pointer to pass back, 
1560          * so we abuse NO_FINALIZER_closure
1561          */
1562         RET_NP(0, stg_NO_FINALIZER_closure);
1563     }
1564
1565     /* we got the value... */
1566     val = StgMVar_value(mvar);
1567
1568     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1569
1570         /* There are putMVar(s) waiting... 
1571          * wake up the first thread on the queue
1572          */
1573         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1574
1575         /* actually perform the putMVar for the thread that we just woke up */
1576         tso = StgMVar_head(mvar);
1577         PerformPut(tso,StgMVar_value(mvar));
1578
1579 #if defined(GRAN) || defined(PAR)
1580         /* ToDo: check 2nd arg (mvar) is right */
1581         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1582         StgMVar_head(mvar) = tso;
1583 #else
1584         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1585         StgMVar_head(mvar) = tso;
1586 #endif
1587
1588         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1589             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1590         }
1591 #if defined(SMP)
1592         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1593 #endif
1594     }
1595     else 
1596     {
1597         /* No further putMVars, MVar is now empty */
1598         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1599 #if defined(SMP)
1600         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1601 #else
1602         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1603 #endif
1604     }
1605     
1606     RET_NP(1, val);
1607 }
1608
1609
1610 putMVarzh_fast
1611 {
1612     W_ mvar, info, tso;
1613
1614     /* args: R1 = MVar, R2 = value */
1615     mvar = R1;
1616
1617 #if defined(SMP)
1618     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1619 #else
1620     info = GET_INFO(mvar);
1621 #endif
1622
1623     if (info == stg_FULL_MVAR_info) {
1624         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1625             StgMVar_head(mvar) = CurrentTSO;
1626         } else {
1627             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1628         }
1629         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1630         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1631         StgTSO_block_info(CurrentTSO)  = mvar;
1632         StgMVar_tail(mvar) = CurrentTSO;
1633         
1634         jump stg_block_putmvar;
1635     }
1636   
1637     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1638
1639         /* There are takeMVar(s) waiting: wake up the first one
1640          */
1641         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1642
1643         /* actually perform the takeMVar */
1644         tso = StgMVar_head(mvar);
1645         PerformTake(tso, R2);
1646       
1647 #if defined(GRAN) || defined(PAR)
1648         /* ToDo: check 2nd arg (mvar) is right */
1649         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1650         StgMVar_head(mvar) = tso;
1651 #else
1652         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1653         StgMVar_head(mvar) = tso;
1654 #endif
1655
1656         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1657             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1658         }
1659
1660 #if defined(SMP)
1661         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1662 #endif
1663         jump %ENTRY_CODE(Sp(0));
1664     }
1665     else
1666     {
1667         /* No further takes, the MVar is now full. */
1668         StgMVar_value(mvar) = R2;
1669
1670 #if defined(SMP)
1671         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1672 #else
1673         SET_INFO(mvar,stg_FULL_MVAR_info);
1674 #endif
1675         jump %ENTRY_CODE(Sp(0));
1676     }
1677     
1678     /* ToDo: yield afterward for better communication performance? */
1679 }
1680
1681
1682 tryPutMVarzh_fast
1683 {
1684     W_ mvar, info, tso;
1685
1686     /* args: R1 = MVar, R2 = value */
1687     mvar = R1;
1688
1689 #if defined(SMP)
1690     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1691 #else
1692     info = GET_INFO(mvar);
1693 #endif
1694
1695     if (info == stg_FULL_MVAR_info) {
1696 #if defined(SMP)
1697         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1698 #endif
1699         RET_N(0);
1700     }
1701   
1702     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1703
1704         /* There are takeMVar(s) waiting: wake up the first one
1705          */
1706         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1707         
1708         /* actually perform the takeMVar */
1709         tso = StgMVar_head(mvar);
1710         PerformTake(tso, R2);
1711       
1712 #if defined(GRAN) || defined(PAR)
1713         /* ToDo: check 2nd arg (mvar) is right */
1714         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1715         StgMVar_head(mvar) = tso;
1716 #else
1717         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1718         StgMVar_head(mvar) = tso;
1719 #endif
1720
1721         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1722             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1723         }
1724
1725 #if defined(SMP)
1726         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1727 #endif
1728         jump %ENTRY_CODE(Sp(0));
1729     }
1730     else
1731     {
1732         /* No further takes, the MVar is now full. */
1733         StgMVar_value(mvar) = R2;
1734
1735 #if defined(SMP)
1736         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1737 #else
1738         SET_INFO(mvar,stg_FULL_MVAR_info);
1739 #endif
1740         jump %ENTRY_CODE(Sp(0));
1741     }
1742     
1743     /* ToDo: yield afterward for better communication performance? */
1744 }
1745
1746
1747 /* -----------------------------------------------------------------------------
1748    Stable pointer primitives
1749    -------------------------------------------------------------------------  */
1750
1751 makeStableNamezh_fast
1752 {
1753     W_ index, sn_obj;
1754
1755     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1756   
1757     index = foreign "C" lookupStableName(R1 "ptr");
1758
1759     /* Is there already a StableName for this heap object?
1760      *  stable_ptr_table is a pointer to an array of snEntry structs.
1761      */
1762     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1763         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1764         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1765         StgStableName_sn(sn_obj) = index;
1766         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1767     } else {
1768         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1769     }
1770     
1771     RET_P(sn_obj);
1772 }
1773
1774
1775 makeStablePtrzh_fast
1776 {
1777     /* Args: R1 = a */
1778     W_ sp;
1779     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1780     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1781     RET_N(sp);
1782 }
1783
1784 deRefStablePtrzh_fast
1785 {
1786     /* Args: R1 = the stable ptr */
1787     W_ r, sp;
1788     sp = R1;
1789     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1790     RET_P(r);
1791 }
1792
1793 /* -----------------------------------------------------------------------------
1794    Bytecode object primitives
1795    -------------------------------------------------------------------------  */
1796
1797 newBCOzh_fast
1798 {
1799     /* R1 = instrs
1800        R2 = literals
1801        R3 = ptrs
1802        R4 = itbls
1803        R5 = arity
1804        R6 = bitmap array
1805     */
1806     W_ bco, bitmap_arr, bytes, words;
1807     
1808     bitmap_arr = R6;
1809     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1810     bytes = WDS(words);
1811
1812     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1813
1814     bco = Hp - bytes + WDS(1);
1815     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1816     
1817     StgBCO_instrs(bco)     = R1;
1818     StgBCO_literals(bco)   = R2;
1819     StgBCO_ptrs(bco)       = R3;
1820     StgBCO_itbls(bco)      = R4;
1821     StgBCO_arity(bco)      = HALF_W_(R5);
1822     StgBCO_size(bco)       = HALF_W_(words);
1823     
1824     // Copy the arity/bitmap info into the BCO
1825     W_ i;
1826     i = 0;
1827 for:
1828     if (i < StgArrWords_words(bitmap_arr)) {
1829         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1830         i = i + 1;
1831         goto for;
1832     }
1833     
1834     RET_P(bco);
1835 }
1836
1837
1838 mkApUpd0zh_fast
1839 {
1840     // R1 = the BCO# for the AP
1841     //  
1842     W_ ap;
1843
1844     // This function is *only* used to wrap zero-arity BCOs in an
1845     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1846     // saturated and always points directly to a FUN or BCO.
1847     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1848            StgBCO_arity(R1) == HALF_W_(0));
1849
1850     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1851     TICK_ALLOC_UP_THK(0, 0);
1852     CCCS_ALLOC(SIZEOF_StgAP);
1853
1854     ap = Hp - SIZEOF_StgAP + WDS(1);
1855     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1856     
1857     StgAP_n_args(ap) = HALF_W_(0);
1858     StgAP_fun(ap) = R1;
1859     
1860     RET_P(ap);
1861 }
1862
1863 /* -----------------------------------------------------------------------------
1864    Thread I/O blocking primitives
1865    -------------------------------------------------------------------------- */
1866
1867 /* Add a thread to the end of the blocked queue. (C-- version of the C
1868  * macro in Schedule.h).
1869  */
1870 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1871     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1872     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1873       W_[blocked_queue_hd] = tso;                       \
1874     } else {                                            \
1875       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1876     }                                                   \
1877     W_[blocked_queue_tl] = tso;
1878
1879 waitReadzh_fast
1880 {
1881     /* args: R1 */
1882 #ifdef THREADED_RTS
1883     foreign "C" barf("waitRead# on threaded RTS");
1884 #endif
1885
1886     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1887     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1888     StgTSO_block_info(CurrentTSO) = R1;
1889     // No locking - we're not going to use this interface in the
1890     // threaded RTS anyway.
1891     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1892     jump stg_block_noregs;
1893 }
1894
1895 waitWritezh_fast
1896 {
1897     /* args: R1 */
1898 #ifdef THREADED_RTS
1899     foreign "C" barf("waitWrite# on threaded RTS");
1900 #endif
1901
1902     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1903     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1904     StgTSO_block_info(CurrentTSO) = R1;
1905     // No locking - we're not going to use this interface in the
1906     // threaded RTS anyway.
1907     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1908     jump stg_block_noregs;
1909 }
1910
1911
1912 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1913 delayzh_fast
1914 {
1915 #ifdef mingw32_HOST_OS
1916     W_ ares;
1917     CInt reqID;
1918 #else
1919     W_ t, prev, target;
1920 #endif
1921
1922 #ifdef THREADED_RTS
1923     foreign "C" barf("delay# on threaded RTS");
1924 #endif
1925
1926     /* args: R1 (microsecond delay amount) */
1927     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1928     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1929
1930 #ifdef mingw32_HOST_OS
1931
1932     /* could probably allocate this on the heap instead */
1933     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1934                                             stg_delayzh_malloc_str);
1935     reqID = foreign "C" addDelayRequest(R1);
1936     StgAsyncIOResult_reqID(ares)   = reqID;
1937     StgAsyncIOResult_len(ares)     = 0;
1938     StgAsyncIOResult_errCode(ares) = 0;
1939     StgTSO_block_info(CurrentTSO)  = ares;
1940
1941     /* Having all async-blocked threads reside on the blocked_queue
1942      * simplifies matters, so change the status to OnDoProc put the
1943      * delayed thread on the blocked_queue.
1944      */
1945     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1946     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1947     jump stg_block_async_void;
1948
1949 #else
1950
1951     W_ time;
1952     time = foreign "C" getourtimeofday();
1953     target = (R1 / (TICK_MILLISECS*1000)) + time;
1954     StgTSO_block_info(CurrentTSO) = target;
1955
1956     /* Insert the new thread in the sleeping queue. */
1957     prev = NULL;
1958     t = W_[sleeping_queue];
1959 while:
1960     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1961         prev = t;
1962         t = StgTSO_link(t);
1963         goto while;
1964     }
1965
1966     StgTSO_link(CurrentTSO) = t;
1967     if (prev == NULL) {
1968         W_[sleeping_queue] = CurrentTSO;
1969     } else {
1970         StgTSO_link(prev) = CurrentTSO;
1971     }
1972     jump stg_block_noregs;
1973 #endif
1974 }
1975
1976
1977 #ifdef mingw32_HOST_OS
1978 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1979 asyncReadzh_fast
1980 {
1981     W_ ares;
1982     CInt reqID;
1983
1984 #ifdef THREADED_RTS
1985     foreign "C" barf("asyncRead# on threaded RTS");
1986 #endif
1987
1988     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1989     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1990     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1991
1992     /* could probably allocate this on the heap instead */
1993     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1994                                             stg_asyncReadzh_malloc_str);
1995     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1996     StgAsyncIOResult_reqID(ares)   = reqID;
1997     StgAsyncIOResult_len(ares)     = 0;
1998     StgAsyncIOResult_errCode(ares) = 0;
1999     StgTSO_block_info(CurrentTSO)  = ares;
2000     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2001     jump stg_block_async;
2002 }
2003
2004 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2005 asyncWritezh_fast
2006 {
2007     W_ ares;
2008     CInt reqID;
2009
2010 #ifdef THREADED_RTS
2011     foreign "C" barf("asyncWrite# on threaded RTS");
2012 #endif
2013
2014     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2015     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2016     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2017
2018     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2019                                             stg_asyncWritezh_malloc_str);
2020     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
2021
2022     StgAsyncIOResult_reqID(ares)   = reqID;
2023     StgAsyncIOResult_len(ares)     = 0;
2024     StgAsyncIOResult_errCode(ares) = 0;
2025     StgTSO_block_info(CurrentTSO)  = ares;
2026     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2027     jump stg_block_async;
2028 }
2029
2030 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2031 asyncDoProczh_fast
2032 {
2033     W_ ares;
2034     CInt reqID;
2035
2036     /* args: R1 = proc, R2 = param */
2037     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2038     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2039
2040     /* could probably allocate this on the heap instead */
2041     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2042                                             stg_asyncDoProczh_malloc_str);
2043     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2044     StgAsyncIOResult_reqID(ares)   = reqID;
2045     StgAsyncIOResult_len(ares)     = 0;
2046     StgAsyncIOResult_errCode(ares) = 0;
2047     StgTSO_block_info(CurrentTSO) = ares;
2048     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2049     jump stg_block_async;
2050 }
2051 #endif
2052
2053 /* -----------------------------------------------------------------------------
2054   ** temporary **
2055
2056    classes CCallable and CReturnable don't really exist, but the
2057    compiler insists on generating dictionaries containing references
2058    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2059    for these.  Some C compilers can't cope with zero-length static arrays,
2060    so we have to make these one element long.
2061   --------------------------------------------------------------------------- */
2062
2063 section "rodata" {
2064   GHC_ZCCCallable_static_info:   W_ 0;
2065 }
2066
2067 section "rodata" {
2068   GHC_ZCCReturnable_static_info: W_ 0;
2069 }