a3f5144df0628234f1497664401213f5dae6d2e8
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_info);
141         foreign "C" recordMutableLock(R1 "ptr") [R1];
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(SMP)
210     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231
232    TICK_ALLOC_THUNK_1();
233    CCCS_ALLOC(THUNK_1_SIZE);
234    r = y - THUNK_1_SIZE;
235    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
236    LDV_RECORD_CREATE(r);
237    StgThunk_payload(r,0) = z;
238
239 #if defined(SMP)
240     foreign "C" RELEASE_LOCK(sm_mutex "ptr") [];
241 #endif
242
243    RET_P(r);
244 }
245
246 /* -----------------------------------------------------------------------------
247    Weak Pointer Primitives
248    -------------------------------------------------------------------------- */
249
250 STRING(stg_weak_msg,"New weak pointer at %p\n")
251
252 mkWeakzh_fast
253 {
254   /* R1 = key
255      R2 = value
256      R3 = finalizer (or NULL)
257   */
258   W_ w;
259
260   if (R3 == NULL) {
261     R3 = stg_NO_FINALIZER_closure;
262   }
263
264   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
265
266   w = Hp - SIZEOF_StgWeak + WDS(1);
267   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
268
269   StgWeak_key(w)       = R1;
270   StgWeak_value(w)     = R2;
271   StgWeak_finalizer(w) = R3;
272
273   StgWeak_link(w)       = W_[weak_ptr_list];
274   W_[weak_ptr_list]     = w;
275
276   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
277
278   RET_P(w);
279 }
280
281
282 finalizzeWeakzh_fast
283 {
284   /* R1 = weak ptr
285    */
286   W_ w, f;
287
288   w = R1;
289
290   // already dead?
291   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
292       RET_NP(0,stg_NO_FINALIZER_closure);
293   }
294
295   // kill it
296 #ifdef PROFILING
297   // @LDV profiling
298   // A weak pointer is inherently used, so we do not need to call
299   // LDV_recordDead_FILL_SLOP_DYNAMIC():
300   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
301   // or, LDV_recordDead():
302   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
303   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
304   // large as weak pointers, so there is no need to fill the slop, either.
305   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
306 #endif
307
308   //
309   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
310   //
311   SET_INFO(w,stg_DEAD_WEAK_info);
312   LDV_RECORD_CREATE(w);
313
314   f = StgWeak_finalizer(w);
315   StgDeadWeak_link(w) = StgWeak_link(w);
316
317   /* return the finalizer */
318   if (f == stg_NO_FINALIZER_closure) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   } else {
321       RET_NP(1,f);
322   }
323 }
324
325 deRefWeakzh_fast
326 {
327   /* R1 = weak ptr */
328   W_ w, code, val;
329
330   w = R1;
331   if (GET_INFO(w) == stg_WEAK_info) {
332     code = 1;
333     val = StgWeak_value(w);
334   } else {
335     code = 0;
336     val = w;
337   }
338   RET_NP(code,val);
339 }
340
341 /* -----------------------------------------------------------------------------
342    Arbitrary-precision Integer operations.
343
344    There are some assumptions in this code that mp_limb_t == W_.  This is
345    the case for all the platforms that GHC supports, currently.
346    -------------------------------------------------------------------------- */
347
348 int2Integerzh_fast
349 {
350    /* arguments: R1 = Int# */
351
352    W_ val, s, p;        /* to avoid aliasing */
353
354    val = R1;
355    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
356
357    p = Hp - SIZEOF_StgArrWords;
358    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
359    StgArrWords_words(p) = 1;
360
361    /* mpz_set_si is inlined here, makes things simpler */
362    if (%lt(val,0)) { 
363         s  = -1;
364         Hp(0) = -val;
365    } else { 
366      if (%gt(val,0)) {
367         s = 1;
368         Hp(0) = val;
369      } else {
370         s = 0;
371      }
372   }
373
374    /* returns (# size  :: Int#, 
375                  data  :: ByteArray# 
376                #)
377    */
378    RET_NP(s,p);
379 }
380
381 word2Integerzh_fast
382 {
383    /* arguments: R1 = Word# */
384
385    W_ val, s, p;        /* to avoid aliasing */
386
387    val = R1;
388
389    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
390
391    p = Hp - SIZEOF_StgArrWords;
392    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
393    StgArrWords_words(p) = 1;
394
395    if (val != 0) {
396         s = 1;
397         W_[Hp] = val;
398    } else {
399         s = 0;
400    }
401
402    /* returns (# size  :: Int#, 
403                  data  :: ByteArray# #)
404    */
405    RET_NP(s,p);
406 }
407
408
409 /*
410  * 'long long' primops for converting to/from Integers.
411  */
412
413 #ifdef SUPPORT_LONG_LONGS
414
415 int64ToIntegerzh_fast
416 {
417    /* arguments: L1 = Int64# */
418
419    L_ val;
420    W_ hi, s, neg, words_needed, p;
421
422    val = L1;
423    neg = 0;
424
425    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
426        words_needed = 2;
427    } else { 
428        // minimum is one word
429        words_needed = 1;
430    }
431
432    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
433                NO_PTRS, int64ToIntegerzh_fast );
434
435    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
436    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
437    StgArrWords_words(p) = words_needed;
438
439    if ( %lt(val,0::L_) ) {
440      neg = 1;
441      val = -val;
442    }
443
444    hi = TO_W_(val >> 32);
445
446    if ( words_needed == 2 )  { 
447       s = 2;
448       Hp(-1) = TO_W_(val);
449       Hp(0) = hi;
450    } else { 
451        if ( val != 0::L_ ) {
452            s = 1;
453            Hp(0) = TO_W_(val);
454        } else /* val==0 */  {
455            s = 0;
456        }
457    }
458    if ( neg != 0 ) {
459         s = -s;
460    }
461
462    /* returns (# size  :: Int#, 
463                  data  :: ByteArray# #)
464    */
465    RET_NP(s,p);
466 }
467
468 word64ToIntegerzh_fast
469 {
470    /* arguments: L1 = Word64# */
471
472    L_ val;
473    W_ hi, s, words_needed, p;
474
475    val = L1;
476    if ( val >= 0x100000000::L_ ) {
477       words_needed = 2;
478    } else {
479       words_needed = 1;
480    }
481
482    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
483                NO_PTRS, word64ToIntegerzh_fast );
484
485    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
486    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
487    StgArrWords_words(p) = words_needed;
488
489    hi = TO_W_(val >> 32);
490    if ( val >= 0x100000000::L_ ) { 
491      s = 2;
492      Hp(-1) = TO_W_(val);
493      Hp(0)  = hi;
494    } else {
495       if ( val != 0::L_ ) {
496         s = 1;
497         Hp(0) = TO_W_(val);
498      } else /* val==0 */  {
499       s = 0;
500      }
501   }
502
503    /* returns (# size  :: Int#, 
504                  data  :: ByteArray# #)
505    */
506    RET_NP(s,p);
507 }
508
509
510 #endif /* SUPPORT_LONG_LONGS */
511
512 /* ToDo: this is shockingly inefficient */
513
514 #ifndef SMP
515 section "bss" {
516   mp_tmp1:
517     bits8 [SIZEOF_MP_INT];
518 }
519
520 section "bss" {
521   mp_tmp2:
522     bits8 [SIZEOF_MP_INT];
523 }
524
525 section "bss" {
526   mp_result1:
527     bits8 [SIZEOF_MP_INT];
528 }
529
530 section "bss" {
531   mp_result2:
532     bits8 [SIZEOF_MP_INT];
533 }
534 #endif
535
536 #ifdef SMP
537 #define FETCH_MP_TEMP(X) \
538 W_ X; \
539 X = BaseReg + (OFFSET_StgRegTable_r ## X);
540 #else
541 #define FETCH_MP_TEMP(X) /* Nothing */
542 #endif
543
544 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
545 name                                                                    \
546 {                                                                       \
547   CInt s1, s2;                                                          \
548   W_ d1, d2;                                                            \
549   FETCH_MP_TEMP(mp_tmp1);                                               \
550   FETCH_MP_TEMP(mp_tmp2);                                               \
551   FETCH_MP_TEMP(mp_result1)                                             \
552   FETCH_MP_TEMP(mp_result2);                                            \
553                                                                         \
554   /* call doYouWantToGC() */                                            \
555   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
556                                                                         \
557   s1 = W_TO_INT(R1);                                                    \
558   d1 = R2;                                                              \
559   s2 = W_TO_INT(R3);                                                    \
560   d2 = R4;                                                              \
561                                                                         \
562   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
563   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
564   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
565   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
566   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
567   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
568                                                                         \
569   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
570                                                                         \
571   /* Perform the operation */                                           \
572   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
573                                                                         \
574   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
575          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
576 }
577
578 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
579 name                                                                    \
580 {                                                                       \
581   CInt s1;                                                              \
582   W_ d1;                                                                \
583   FETCH_MP_TEMP(mp_tmp1);                                               \
584   FETCH_MP_TEMP(mp_result1)                                             \
585                                                                         \
586   /* call doYouWantToGC() */                                            \
587   MAYBE_GC(R2_PTR, name);                                               \
588                                                                         \
589   d1 = R2;                                                              \
590   s1 = W_TO_INT(R1);                                                    \
591                                                                         \
592   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
593   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
594   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
595                                                                         \
596   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
597                                                                         \
598   /* Perform the operation */                                           \
599   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
600                                                                         \
601   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
602          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
603 }
604
605 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
606 name                                                                                    \
607 {                                                                                       \
608   CInt s1, s2;                                                                          \
609   W_ d1, d2;                                                                            \
610   FETCH_MP_TEMP(mp_tmp1);                                                               \
611   FETCH_MP_TEMP(mp_tmp2);                                                               \
612   FETCH_MP_TEMP(mp_result1)                                                             \
613   FETCH_MP_TEMP(mp_result2)                                                             \
614                                                                                         \
615   /* call doYouWantToGC() */                                                            \
616   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
617                                                                                         \
618   s1 = W_TO_INT(R1);                                                                    \
619   d1 = R2;                                                                              \
620   s2 = W_TO_INT(R3);                                                                    \
621   d2 = R4;                                                                              \
622                                                                                         \
623   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
624   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
625   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
626   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
627   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
628   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
629                                                                                         \
630   foreign "C" mpz_init(mp_result1 "ptr") [];                                               \
631   foreign "C" mpz_init(mp_result2 "ptr") [];                                               \
632                                                                                         \
633   /* Perform the operation */                                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
635                                                                                         \
636   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
637            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
638            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
639            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
640 }
641
642 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
643 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
644 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
645 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
646 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
647 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
648 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
649 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
650 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
651 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
652 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
653
654 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
655 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
656
657 #ifndef SMP
658 section "bss" {
659   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
660 }
661 #endif
662
663 gcdIntzh_fast
664 {
665     /* R1 = the first Int#; R2 = the second Int# */
666     W_ r; 
667     FETCH_MP_TEMP(mp_tmp_w);
668
669     W_[mp_tmp_w] = R1;
670     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
671
672     R1 = r;
673     /* Result parked in R1, return via info-pointer at TOS */
674     jump %ENTRY_CODE(Sp(0));
675 }
676
677
678 gcdIntegerIntzh_fast
679 {
680     /* R1 = s1; R2 = d1; R3 = the int */
681     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
682     
683     /* Result parked in R1, return via info-pointer at TOS */
684     jump %ENTRY_CODE(Sp(0));
685 }
686
687
688 cmpIntegerIntzh_fast
689 {
690     /* R1 = s1; R2 = d1; R3 = the int */
691     W_ usize, vsize, v_digit, u_digit;
692
693     usize = R1;
694     vsize = 0;
695     v_digit = R3;
696
697     // paraphrased from mpz_cmp_si() in the GMP sources
698     if (%gt(v_digit,0)) {
699         vsize = 1;
700     } else { 
701         if (%lt(v_digit,0)) {
702             vsize = -1;
703             v_digit = -v_digit;
704         }
705     }
706
707     if (usize != vsize) {
708         R1 = usize - vsize; 
709         jump %ENTRY_CODE(Sp(0));
710     }
711
712     if (usize == 0) {
713         R1 = 0; 
714         jump %ENTRY_CODE(Sp(0));
715     }
716
717     u_digit = W_[BYTE_ARR_CTS(R2)];
718
719     if (u_digit == v_digit) {
720         R1 = 0; 
721         jump %ENTRY_CODE(Sp(0));
722     }
723
724     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
725         R1 = usize; 
726     } else {
727         R1 = -usize; 
728     }
729
730     jump %ENTRY_CODE(Sp(0));
731 }
732
733 cmpIntegerzh_fast
734 {
735     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
736     W_ usize, vsize, size, up, vp;
737     CInt cmp;
738
739     // paraphrased from mpz_cmp() in the GMP sources
740     usize = R1;
741     vsize = R3;
742
743     if (usize != vsize) {
744         R1 = usize - vsize; 
745         jump %ENTRY_CODE(Sp(0));
746     }
747
748     if (usize == 0) {
749         R1 = 0; 
750         jump %ENTRY_CODE(Sp(0));
751     }
752
753     if (%lt(usize,0)) { // NB. not <, which is unsigned
754         size = -usize;
755     } else {
756         size = usize;
757     }
758
759     up = BYTE_ARR_CTS(R2);
760     vp = BYTE_ARR_CTS(R4);
761
762     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
763
764     if (cmp == 0 :: CInt) {
765         R1 = 0; 
766         jump %ENTRY_CODE(Sp(0));
767     }
768
769     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
770         R1 = 1;
771     } else {
772         R1 = (-1); 
773     }
774     /* Result parked in R1, return via info-pointer at TOS */
775     jump %ENTRY_CODE(Sp(0));
776 }
777
778 integer2Intzh_fast
779 {
780     /* R1 = s; R2 = d */
781     W_ r, s;
782
783     s = R1;
784     if (s == 0) {
785         r = 0;
786     } else {
787         r = W_[R2 + SIZEOF_StgArrWords];
788         if (%lt(s,0)) {
789             r = -r;
790         }
791     }
792     /* Result parked in R1, return via info-pointer at TOS */
793     R1 = r;
794     jump %ENTRY_CODE(Sp(0));
795 }
796
797 integer2Wordzh_fast
798 {
799   /* R1 = s; R2 = d */
800   W_ r, s;
801
802   s = R1;
803   if (s == 0) {
804     r = 0;
805   } else {
806     r = W_[R2 + SIZEOF_StgArrWords];
807     if (%lt(s,0)) {
808         r = -r;
809     }
810   }
811   /* Result parked in R1, return via info-pointer at TOS */
812   R1 = r;
813   jump %ENTRY_CODE(Sp(0));
814 }
815
816 decodeFloatzh_fast
817
818     W_ p;
819     F_ arg;
820     FETCH_MP_TEMP(mp_tmp1);
821     FETCH_MP_TEMP(mp_tmp_w);
822     
823     /* arguments: F1 = Float# */
824     arg = F1;
825     
826     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
827     
828     /* Be prepared to tell Lennart-coded __decodeFloat
829        where mantissa._mp_d can be put (it does not care about the rest) */
830     p = Hp - SIZEOF_StgArrWords;
831     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
832     StgArrWords_words(p) = 1;
833     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
834     
835     /* Perform the operation */
836     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
837     
838     /* returns: (Int# (expn), Int#, ByteArray#) */
839     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
840 }
841
842 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
843 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
844
845 decodeDoublezh_fast
846
847     D_ arg;
848     W_ p;
849     FETCH_MP_TEMP(mp_tmp1);
850     FETCH_MP_TEMP(mp_tmp_w);
851
852     /* arguments: D1 = Double# */
853     arg = D1;
854
855     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
856     
857     /* Be prepared to tell Lennart-coded __decodeDouble
858        where mantissa.d can be put (it does not care about the rest) */
859     p = Hp - ARR_SIZE + WDS(1);
860     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
861     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
862     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
863
864     /* Perform the operation */
865     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
866     
867     /* returns: (Int# (expn), Int#, ByteArray#) */
868     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
869 }
870
871 /* -----------------------------------------------------------------------------
872  * Concurrency primitives
873  * -------------------------------------------------------------------------- */
874
875 forkzh_fast
876 {
877   /* args: R1 = closure to spark */
878   
879   MAYBE_GC(R1_PTR, forkzh_fast);
880
881   // create it right now, return ThreadID in R1
882   "ptr" R1 = foreign "C" createIOThread( MyCapability() "ptr", 
883                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
884                                 R1 "ptr");
885   foreign "C" scheduleThread(MyCapability() "ptr", R1 "ptr");
886
887   // switch at the earliest opportunity
888   CInt[context_switch] = 1 :: CInt;
889   
890   RET_P(R1);
891 }
892
893 yieldzh_fast
894 {
895   jump stg_yield_noregs;
896 }
897
898 myThreadIdzh_fast
899 {
900   /* no args. */
901   RET_P(CurrentTSO);
902 }
903
904 labelThreadzh_fast
905 {
906   /* args: 
907         R1 = ThreadId#
908         R2 = Addr# */
909 #ifdef DEBUG
910   foreign "C" labelThread(R1 "ptr", R2 "ptr");
911 #endif
912   jump %ENTRY_CODE(Sp(0));
913 }
914
915 isCurrentThreadBoundzh_fast
916 {
917   /* no args */
918   W_ r;
919   r = foreign "C" isThreadBound(CurrentTSO) [];
920   RET_N(r);
921 }
922
923
924 /* -----------------------------------------------------------------------------
925  * TVar primitives
926  * -------------------------------------------------------------------------- */
927
928 #ifdef REG_R1
929 #define SP_OFF 0
930 #define IF_NOT_REG_R1(x) 
931 #else
932 #define SP_OFF 1
933 #define IF_NOT_REG_R1(x) x
934 #endif
935
936 // Catch retry frame ------------------------------------------------------------
937
938 #define CATCH_RETRY_FRAME_ERROR(label) \
939   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
940
941 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
942 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
943 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
944 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
945 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
946 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
947 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
948 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
949
950 #if MAX_VECTORED_RTN > 8
951 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
952 #endif
953
954 #if defined(PROFILING)
955 #define CATCH_RETRY_FRAME_BITMAP 7
956 #define CATCH_RETRY_FRAME_WORDS  6
957 #else
958 #define CATCH_RETRY_FRAME_BITMAP 1
959 #define CATCH_RETRY_FRAME_WORDS  4
960 #endif
961
962 INFO_TABLE_RET(stg_catch_retry_frame,
963                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
964                CATCH_RETRY_FRAME,
965                stg_catch_retry_frame_0_ret,
966                stg_catch_retry_frame_1_ret,
967                stg_catch_retry_frame_2_ret,
968                stg_catch_retry_frame_3_ret,
969                stg_catch_retry_frame_4_ret,
970                stg_catch_retry_frame_5_ret,
971                stg_catch_retry_frame_6_ret,
972                stg_catch_retry_frame_7_ret)
973 {
974    W_ r, frame, trec, outer;
975    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
976
977    frame = Sp;
978    trec = StgTSO_trec(CurrentTSO);
979    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
980    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
981    if (r) {
982      /* Succeeded (either first branch or second branch) */
983      StgTSO_trec(CurrentTSO) = outer;
984      Sp = Sp + SIZEOF_StgCatchRetryFrame;
985      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
986      jump %ENTRY_CODE(Sp(SP_OFF));
987    } else {
988      /* Did not commit: retry */
989      W_ new_trec;
990      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
991      StgTSO_trec(CurrentTSO) = new_trec;
992      if (StgCatchRetryFrame_running_alt_code(frame)) {
993        R1 = StgCatchRetryFrame_alt_code(frame);
994      } else {
995        R1 = StgCatchRetryFrame_first_code(frame);
996        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
997      }
998      Sp_adj(-1);
999      jump RET_LBL(stg_ap_v);
1000    }
1001 }
1002
1003
1004 // Atomically frame -------------------------------------------------------------
1005
1006
1007 #define ATOMICALLY_FRAME_ERROR(label) \
1008   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1009
1010 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1011 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1012 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1013 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1014 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1015 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1016 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1017 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1018
1019 #if MAX_VECTORED_RTN > 8
1020 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1021 #endif
1022
1023 #if defined(PROFILING)
1024 #define ATOMICALLY_FRAME_BITMAP 3
1025 #define ATOMICALLY_FRAME_WORDS  3
1026 #else
1027 #define ATOMICALLY_FRAME_BITMAP 0
1028 #define ATOMICALLY_FRAME_WORDS  1
1029 #endif
1030
1031
1032 INFO_TABLE_RET(stg_atomically_frame,
1033                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1034                ATOMICALLY_FRAME,
1035                stg_atomically_frame_0_ret,
1036                stg_atomically_frame_1_ret,
1037                stg_atomically_frame_2_ret,
1038                stg_atomically_frame_3_ret,
1039                stg_atomically_frame_4_ret,
1040                stg_atomically_frame_5_ret,
1041                stg_atomically_frame_6_ret,
1042                stg_atomically_frame_7_ret)
1043 {
1044   W_ frame, trec, valid;
1045   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1046
1047   frame = Sp;
1048   trec = StgTSO_trec(CurrentTSO);
1049
1050   /* The TSO is not currently waiting: try to commit the transaction */
1051   valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr");
1052   if (valid) {
1053     /* Transaction was valid: commit succeeded */
1054     StgTSO_trec(CurrentTSO) = NO_TREC;
1055     Sp = Sp + SIZEOF_StgAtomicallyFrame;
1056     IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1057     jump %ENTRY_CODE(Sp(SP_OFF));
1058   } else {
1059     /* Transaction was not valid: try again */
1060     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
1061     StgTSO_trec(CurrentTSO) = trec;
1062     R1 = StgAtomicallyFrame_code(frame);
1063     Sp_adj(-1);
1064     jump RET_LBL(stg_ap_v);
1065   }
1066 }
1067
1068 INFO_TABLE_RET(stg_atomically_waiting_frame,
1069                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1070                ATOMICALLY_FRAME,
1071                stg_atomically_frame_0_ret,
1072                stg_atomically_frame_1_ret,
1073                stg_atomically_frame_2_ret,
1074                stg_atomically_frame_3_ret,
1075                stg_atomically_frame_4_ret,
1076                stg_atomically_frame_5_ret,
1077                stg_atomically_frame_6_ret,
1078                stg_atomically_frame_7_ret)
1079 {
1080   W_ frame, trec, valid;
1081   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1082
1083   frame = Sp;
1084
1085   /* The TSO is currently waiting: should we stop waiting? */
1086   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr");
1087   if (valid) {
1088     /* Previous attempt is still valid: no point trying again yet */
1089           IF_NOT_REG_R1(Sp_adj(-2);
1090                         Sp(1) = stg_NO_FINALIZER_closure;
1091                         Sp(0) = stg_ut_1_0_unreg_info;)
1092     jump stg_block_noregs;
1093   } else {
1094     /* Previous attempt is no longer valid: try again */
1095     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
1096     StgTSO_trec(CurrentTSO) = trec;
1097     StgHeader_info(frame) = stg_atomically_frame_info;
1098     R1 = StgAtomicallyFrame_code(frame);
1099     Sp_adj(-1);
1100     jump RET_LBL(stg_ap_v);
1101   }
1102 }
1103
1104 // STM catch frame --------------------------------------------------------------
1105
1106 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1107    label                                                   \
1108    {                                                       \
1109       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1110       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1111       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1112       jump ret;                                            \
1113    }
1114
1115 #ifdef REG_R1
1116 #define SP_OFF 0
1117 #else
1118 #define SP_OFF 1
1119 #endif
1120
1121 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1123 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1124 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1125 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1126 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1127 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1128 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1129
1130 #if MAX_VECTORED_RTN > 8
1131 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1132 #endif
1133
1134 #if defined(PROFILING)
1135 #define CATCH_STM_FRAME_BITMAP 3
1136 #define CATCH_STM_FRAME_WORDS  3
1137 #else
1138 #define CATCH_STM_FRAME_BITMAP 0
1139 #define CATCH_STM_FRAME_WORDS  1
1140 #endif
1141
1142 /* Catch frames are very similar to update frames, but when entering
1143  * one we just pop the frame off the stack and perform the correct
1144  * kind of return to the activation record underneath us on the stack.
1145  */
1146
1147 INFO_TABLE_RET(stg_catch_stm_frame,
1148                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1149                CATCH_STM_FRAME,
1150                stg_catch_stm_frame_0_ret,
1151                stg_catch_stm_frame_1_ret,
1152                stg_catch_stm_frame_2_ret,
1153                stg_catch_stm_frame_3_ret,
1154                stg_catch_stm_frame_4_ret,
1155                stg_catch_stm_frame_5_ret,
1156                stg_catch_stm_frame_6_ret,
1157                stg_catch_stm_frame_7_ret)
1158 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1159
1160
1161 // Primop definition ------------------------------------------------------------
1162
1163 atomicallyzh_fast
1164 {
1165   W_ frame;
1166   W_ old_trec;
1167   W_ new_trec;
1168   
1169   // stmStartTransaction may allocate
1170   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1171
1172   /* Args: R1 = m :: STM a */
1173   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1174
1175   old_trec = StgTSO_trec(CurrentTSO);
1176
1177   /* Nested transactions are not allowed; raise an exception */
1178   if (old_trec != NO_TREC) {
1179      R1 = GHCziIOBase_NestedAtomically_closure;
1180      jump raisezh_fast;
1181   }
1182
1183   /* Set up the atomically frame */
1184   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1185   frame = Sp;
1186
1187   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1188   StgAtomicallyFrame_code(frame) = R1;
1189
1190   /* Start the memory transcation */
1191   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr");
1192   StgTSO_trec(CurrentTSO) = new_trec;
1193
1194   /* Apply R1 to the realworld token */
1195   Sp_adj(-1);
1196   jump RET_LBL(stg_ap_v);
1197 }
1198
1199
1200 catchSTMzh_fast
1201 {
1202   W_ frame;
1203   
1204   /* Args: R1 :: STM a */
1205   /* Args: R2 :: Exception -> STM a */
1206   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1207
1208   /* Set up the catch frame */
1209   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1210   frame = Sp;
1211
1212   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1213   StgCatchSTMFrame_handler(frame) = R2;
1214
1215   /* Apply R1 to the realworld token */
1216   Sp_adj(-1);
1217   jump RET_LBL(stg_ap_v);
1218 }
1219
1220
1221 catchRetryzh_fast
1222 {
1223   W_ frame;
1224   W_ new_trec;
1225   W_ trec;
1226
1227   // stmStartTransaction may allocate
1228   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1229
1230   /* Args: R1 :: STM a */
1231   /* Args: R2 :: STM a */
1232   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1233
1234   /* Start a nested transaction within which to run the first code */
1235   trec = StgTSO_trec(CurrentTSO);
1236   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr");
1237   StgTSO_trec(CurrentTSO) = new_trec;
1238
1239   /* Set up the catch-retry frame */
1240   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1241   frame = Sp;
1242   
1243   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1244   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1245   StgCatchRetryFrame_first_code(frame) = R1;
1246   StgCatchRetryFrame_alt_code(frame) = R2;
1247   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1248
1249   /* Apply R1 to the realworld token */
1250   Sp_adj(-1);
1251   jump RET_LBL(stg_ap_v);  
1252 }
1253
1254
1255 retryzh_fast
1256 {
1257   W_ frame_type;
1258   W_ frame;
1259   W_ trec;
1260   W_ outer;
1261   W_ r;
1262
1263   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1264
1265   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1266 retry_pop_stack:
1267   trec = StgTSO_trec(CurrentTSO);
1268   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1269   StgTSO_sp(CurrentTSO) = Sp;
1270   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1271   Sp = StgTSO_sp(CurrentTSO);
1272   frame = Sp;
1273
1274   if (frame_type == CATCH_RETRY_FRAME) {
1275     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1276     ASSERT(outer != NO_TREC);
1277     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1278       // Retry in the first code: try the alternative
1279       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1280       StgTSO_trec(CurrentTSO) = trec;
1281       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1282       R1 = StgCatchRetryFrame_alt_code(frame);
1283       Sp_adj(-1);
1284       jump RET_LBL(stg_ap_v);
1285     } else {
1286       // Retry in the alternative code: propagate
1287       W_ other_trec;
1288       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1289       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr");
1290       if (r) {
1291         r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
1292       } else {
1293         foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr");
1294       }
1295       if (r) {
1296         // Merge between siblings succeeded: commit it back to enclosing transaction
1297         // and then propagate the retry
1298         StgTSO_trec(CurrentTSO) = outer;
1299         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1300         goto retry_pop_stack;
1301       } else {
1302         // Merge failed: we musn't propagate the retry.  Try both paths again.
1303         "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1304         StgCatchRetryFrame_first_code_trec(frame) = trec;
1305         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1306         StgTSO_trec(CurrentTSO) = trec;
1307         R1 = StgCatchRetryFrame_first_code(frame);
1308         Sp_adj(-1);
1309         jump RET_LBL(stg_ap_v);
1310       }
1311     }
1312   }
1313
1314   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1315   ASSERT(frame_type == ATOMICALLY_FRAME);
1316   ASSERT(outer == NO_TREC);
1317   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr");
1318   if (r) {
1319     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1320     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1321     Sp = frame;
1322     // Fix up the stack in the unregisterised case: the return convention is different.
1323     IF_NOT_REG_R1(Sp_adj(-2); 
1324                   Sp(1) = stg_NO_FINALIZER_closure;
1325                   Sp(0) = stg_ut_1_0_unreg_info;)
1326     R3 = trec; // passing to stmWaitUnblock()
1327     jump stg_block_stmwait;
1328   } else {
1329     // Transaction was not valid: retry immediately
1330     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr");
1331     StgTSO_trec(CurrentTSO) = trec;
1332     R1 = StgAtomicallyFrame_code(frame);
1333     Sp = frame;
1334     Sp_adj(-1);
1335     jump RET_LBL(stg_ap_v);
1336   }
1337 }
1338
1339
1340 newTVarzh_fast
1341 {
1342   W_ tv;
1343   W_ new_value;
1344
1345   /* Args: R1 = initialisation value */
1346
1347   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1348   new_value = R1;
1349   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr");
1350   RET_P(tv);
1351 }
1352
1353
1354 readTVarzh_fast
1355 {
1356   W_ trec;
1357   W_ tvar;
1358   W_ result;
1359
1360   /* Args: R1 = TVar closure */
1361
1362   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1363   trec = StgTSO_trec(CurrentTSO);
1364   tvar = R1;
1365   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1366
1367   RET_P(result);
1368 }
1369
1370
1371 writeTVarzh_fast
1372 {
1373   W_ trec;
1374   W_ tvar;
1375   W_ new_value;
1376   
1377   /* Args: R1 = TVar closure */
1378   /*       R2 = New value    */
1379
1380   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1381   trec = StgTSO_trec(CurrentTSO);
1382   tvar = R1;
1383   new_value = R2;
1384   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1385
1386   jump %ENTRY_CODE(Sp(0));
1387 }
1388
1389
1390 /* -----------------------------------------------------------------------------
1391  * MVar primitives
1392  *
1393  * take & putMVar work as follows.  Firstly, an important invariant:
1394  *
1395  *    If the MVar is full, then the blocking queue contains only
1396  *    threads blocked on putMVar, and if the MVar is empty then the
1397  *    blocking queue contains only threads blocked on takeMVar.
1398  *
1399  * takeMvar:
1400  *    MVar empty : then add ourselves to the blocking queue
1401  *    MVar full  : remove the value from the MVar, and
1402  *                 blocking queue empty     : return
1403  *                 blocking queue non-empty : perform the first blocked putMVar
1404  *                                            from the queue, and wake up the
1405  *                                            thread (MVar is now full again)
1406  *
1407  * putMVar is just the dual of the above algorithm.
1408  *
1409  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1410  * the stack of the thread waiting to do the putMVar.  See
1411  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1412  * the stack layout, and the PerformPut and PerformTake macros below.
1413  *
1414  * It is important that a blocked take or put is woken up with the
1415  * take/put already performed, because otherwise there would be a
1416  * small window of vulnerability where the thread could receive an
1417  * exception and never perform its take or put, and we'd end up with a
1418  * deadlock.
1419  *
1420  * -------------------------------------------------------------------------- */
1421
1422 isEmptyMVarzh_fast
1423 {
1424     /* args: R1 = MVar closure */
1425
1426     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1427         RET_N(1);
1428     } else {
1429         RET_N(0);
1430     }
1431 }
1432
1433 newMVarzh_fast
1434 {
1435     /* args: none */
1436     W_ mvar;
1437
1438     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1439   
1440     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1441     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1442     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1443     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1444     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1445     RET_P(mvar);
1446 }
1447
1448
1449 /* If R1 isn't available, pass it on the stack */
1450 #ifdef REG_R1
1451 #define PerformTake(tso, value)                         \
1452     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1453     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1454 #else
1455 #define PerformTake(tso, value)                                 \
1456     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1457     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1458 #endif
1459
1460 #define PerformPut(tso,lval)                    \
1461     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1462     lval = W_[StgTSO_sp(tso) - WDS(1)];
1463
1464 takeMVarzh_fast
1465 {
1466     W_ mvar, val, info, tso;
1467
1468     /* args: R1 = MVar closure */
1469     mvar = R1;
1470
1471 #if defined(SMP)
1472     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1473 #else
1474     info = GET_INFO(mvar);
1475 #endif
1476
1477     /* If the MVar is empty, put ourselves on its blocking queue,
1478      * and wait until we're woken up.
1479      */
1480     if (info == stg_EMPTY_MVAR_info) {
1481         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1482             StgMVar_head(mvar) = CurrentTSO;
1483         } else {
1484             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1485         }
1486         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1487         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1488         StgTSO_block_info(CurrentTSO)  = mvar;
1489         StgMVar_tail(mvar) = CurrentTSO;
1490         
1491         jump stg_block_takemvar;
1492   }
1493
1494   /* we got the value... */
1495   val = StgMVar_value(mvar);
1496
1497   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1498   {
1499       /* There are putMVar(s) waiting... 
1500        * wake up the first thread on the queue
1501        */
1502       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1503
1504       /* actually perform the putMVar for the thread that we just woke up */
1505       tso = StgMVar_head(mvar);
1506       PerformPut(tso,StgMVar_value(mvar));
1507
1508 #if defined(GRAN) || defined(PAR)
1509       /* ToDo: check 2nd arg (mvar) is right */
1510       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1511       StgMVar_head(mvar) = tso;
1512 #else
1513       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1514                                          StgMVar_head(mvar) "ptr") [];
1515       StgMVar_head(mvar) = tso;
1516 #endif
1517
1518       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1519           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1520       }
1521
1522 #if defined(SMP)
1523       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1524 #endif
1525       RET_P(val);
1526   } 
1527   else
1528   {
1529       /* No further putMVars, MVar is now empty */
1530       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1531  
1532 #if defined(SMP)
1533       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1534 #else
1535       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1536 #endif
1537
1538       RET_P(val);
1539   }
1540 }
1541
1542
1543 tryTakeMVarzh_fast
1544 {
1545     W_ mvar, val, info, tso;
1546
1547     /* args: R1 = MVar closure */
1548
1549     mvar = R1;
1550
1551 #if defined(SMP)
1552     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1553 #else
1554     info = GET_INFO(mvar);
1555 #endif
1556
1557     if (info == stg_EMPTY_MVAR_info) {
1558 #if defined(SMP)
1559         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1560 #endif
1561         /* HACK: we need a pointer to pass back, 
1562          * so we abuse NO_FINALIZER_closure
1563          */
1564         RET_NP(0, stg_NO_FINALIZER_closure);
1565     }
1566
1567     /* we got the value... */
1568     val = StgMVar_value(mvar);
1569
1570     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1571
1572         /* There are putMVar(s) waiting... 
1573          * wake up the first thread on the queue
1574          */
1575         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1576
1577         /* actually perform the putMVar for the thread that we just woke up */
1578         tso = StgMVar_head(mvar);
1579         PerformPut(tso,StgMVar_value(mvar));
1580
1581 #if defined(GRAN) || defined(PAR)
1582         /* ToDo: check 2nd arg (mvar) is right */
1583         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1584         StgMVar_head(mvar) = tso;
1585 #else
1586         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1587                                            StgMVar_head(mvar) "ptr") [];
1588         StgMVar_head(mvar) = tso;
1589 #endif
1590
1591         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1592             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1593         }
1594 #if defined(SMP)
1595         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1596 #endif
1597     }
1598     else 
1599     {
1600         /* No further putMVars, MVar is now empty */
1601         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1602 #if defined(SMP)
1603         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1604 #else
1605         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1606 #endif
1607     }
1608     
1609     RET_NP(1, val);
1610 }
1611
1612
1613 putMVarzh_fast
1614 {
1615     W_ mvar, info, tso;
1616
1617     /* args: R1 = MVar, R2 = value */
1618     mvar = R1;
1619
1620 #if defined(SMP)
1621     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1622 #else
1623     info = GET_INFO(mvar);
1624 #endif
1625
1626     if (info == stg_FULL_MVAR_info) {
1627         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1628             StgMVar_head(mvar) = CurrentTSO;
1629         } else {
1630             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1631         }
1632         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1633         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1634         StgTSO_block_info(CurrentTSO)  = mvar;
1635         StgMVar_tail(mvar) = CurrentTSO;
1636         
1637         jump stg_block_putmvar;
1638     }
1639   
1640     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1641
1642         /* There are takeMVar(s) waiting: wake up the first one
1643          */
1644         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1645
1646         /* actually perform the takeMVar */
1647         tso = StgMVar_head(mvar);
1648         PerformTake(tso, R2);
1649       
1650 #if defined(GRAN) || defined(PAR)
1651         /* ToDo: check 2nd arg (mvar) is right */
1652         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1653         StgMVar_head(mvar) = tso;
1654 #else
1655         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1656         StgMVar_head(mvar) = tso;
1657 #endif
1658
1659         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1660             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1661         }
1662
1663 #if defined(SMP)
1664         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1665 #endif
1666         jump %ENTRY_CODE(Sp(0));
1667     }
1668     else
1669     {
1670         /* No further takes, the MVar is now full. */
1671         StgMVar_value(mvar) = R2;
1672
1673 #if defined(SMP)
1674         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1675 #else
1676         SET_INFO(mvar,stg_FULL_MVAR_info);
1677 #endif
1678         jump %ENTRY_CODE(Sp(0));
1679     }
1680     
1681     /* ToDo: yield afterward for better communication performance? */
1682 }
1683
1684
1685 tryPutMVarzh_fast
1686 {
1687     W_ mvar, info, tso;
1688
1689     /* args: R1 = MVar, R2 = value */
1690     mvar = R1;
1691
1692 #if defined(SMP)
1693     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1694 #else
1695     info = GET_INFO(mvar);
1696 #endif
1697
1698     if (info == stg_FULL_MVAR_info) {
1699 #if defined(SMP)
1700         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1701 #endif
1702         RET_N(0);
1703     }
1704   
1705     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1706
1707         /* There are takeMVar(s) waiting: wake up the first one
1708          */
1709         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1710         
1711         /* actually perform the takeMVar */
1712         tso = StgMVar_head(mvar);
1713         PerformTake(tso, R2);
1714       
1715 #if defined(GRAN) || defined(PAR)
1716         /* ToDo: check 2nd arg (mvar) is right */
1717         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1718         StgMVar_head(mvar) = tso;
1719 #else
1720         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1721         StgMVar_head(mvar) = tso;
1722 #endif
1723
1724         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1725             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1726         }
1727
1728 #if defined(SMP)
1729         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1730 #endif
1731         jump %ENTRY_CODE(Sp(0));
1732     }
1733     else
1734     {
1735         /* No further takes, the MVar is now full. */
1736         StgMVar_value(mvar) = R2;
1737
1738 #if defined(SMP)
1739         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1740 #else
1741         SET_INFO(mvar,stg_FULL_MVAR_info);
1742 #endif
1743         jump %ENTRY_CODE(Sp(0));
1744     }
1745     
1746     /* ToDo: yield afterward for better communication performance? */
1747 }
1748
1749
1750 /* -----------------------------------------------------------------------------
1751    Stable pointer primitives
1752    -------------------------------------------------------------------------  */
1753
1754 makeStableNamezh_fast
1755 {
1756     W_ index, sn_obj;
1757
1758     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1759   
1760     index = foreign "C" lookupStableName(R1 "ptr") [];
1761
1762     /* Is there already a StableName for this heap object?
1763      *  stable_ptr_table is a pointer to an array of snEntry structs.
1764      */
1765     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1766         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1767         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1768         StgStableName_sn(sn_obj) = index;
1769         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1770     } else {
1771         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1772     }
1773     
1774     RET_P(sn_obj);
1775 }
1776
1777
1778 makeStablePtrzh_fast
1779 {
1780     /* Args: R1 = a */
1781     W_ sp;
1782     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1783     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1784     RET_N(sp);
1785 }
1786
1787 deRefStablePtrzh_fast
1788 {
1789     /* Args: R1 = the stable ptr */
1790     W_ r, sp;
1791     sp = R1;
1792     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1793     RET_P(r);
1794 }
1795
1796 /* -----------------------------------------------------------------------------
1797    Bytecode object primitives
1798    -------------------------------------------------------------------------  */
1799
1800 newBCOzh_fast
1801 {
1802     /* R1 = instrs
1803        R2 = literals
1804        R3 = ptrs
1805        R4 = itbls
1806        R5 = arity
1807        R6 = bitmap array
1808     */
1809     W_ bco, bitmap_arr, bytes, words;
1810     
1811     bitmap_arr = R6;
1812     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1813     bytes = WDS(words);
1814
1815     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1816
1817     bco = Hp - bytes + WDS(1);
1818     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1819     
1820     StgBCO_instrs(bco)     = R1;
1821     StgBCO_literals(bco)   = R2;
1822     StgBCO_ptrs(bco)       = R3;
1823     StgBCO_itbls(bco)      = R4;
1824     StgBCO_arity(bco)      = HALF_W_(R5);
1825     StgBCO_size(bco)       = HALF_W_(words);
1826     
1827     // Copy the arity/bitmap info into the BCO
1828     W_ i;
1829     i = 0;
1830 for:
1831     if (i < StgArrWords_words(bitmap_arr)) {
1832         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1833         i = i + 1;
1834         goto for;
1835     }
1836     
1837     RET_P(bco);
1838 }
1839
1840
1841 mkApUpd0zh_fast
1842 {
1843     // R1 = the BCO# for the AP
1844     //  
1845     W_ ap;
1846
1847     // This function is *only* used to wrap zero-arity BCOs in an
1848     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1849     // saturated and always points directly to a FUN or BCO.
1850     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1851            StgBCO_arity(R1) == HALF_W_(0));
1852
1853     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1854     TICK_ALLOC_UP_THK(0, 0);
1855     CCCS_ALLOC(SIZEOF_StgAP);
1856
1857     ap = Hp - SIZEOF_StgAP + WDS(1);
1858     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1859     
1860     StgAP_n_args(ap) = HALF_W_(0);
1861     StgAP_fun(ap) = R1;
1862     
1863     RET_P(ap);
1864 }
1865
1866 /* -----------------------------------------------------------------------------
1867    Thread I/O blocking primitives
1868    -------------------------------------------------------------------------- */
1869
1870 /* Add a thread to the end of the blocked queue. (C-- version of the C
1871  * macro in Schedule.h).
1872  */
1873 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1874     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1875     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1876       W_[blocked_queue_hd] = tso;                       \
1877     } else {                                            \
1878       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1879     }                                                   \
1880     W_[blocked_queue_tl] = tso;
1881
1882 waitReadzh_fast
1883 {
1884     /* args: R1 */
1885 #ifdef THREADED_RTS
1886     foreign "C" barf("waitRead# on threaded RTS");
1887 #else
1888
1889     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1890     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1891     StgTSO_block_info(CurrentTSO) = R1;
1892     // No locking - we're not going to use this interface in the
1893     // threaded RTS anyway.
1894     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1895     jump stg_block_noregs;
1896 #endif
1897 }
1898
1899 waitWritezh_fast
1900 {
1901     /* args: R1 */
1902 #ifdef THREADED_RTS
1903     foreign "C" barf("waitWrite# on threaded RTS");
1904 #else
1905
1906     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1907     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1908     StgTSO_block_info(CurrentTSO) = R1;
1909     // No locking - we're not going to use this interface in the
1910     // threaded RTS anyway.
1911     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1912     jump stg_block_noregs;
1913 #endif
1914 }
1915
1916
1917 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1918 delayzh_fast
1919 {
1920 #ifdef mingw32_HOST_OS
1921     W_ ares;
1922     CInt reqID;
1923 #else
1924     W_ t, prev, target;
1925 #endif
1926
1927 #ifdef THREADED_RTS
1928     foreign "C" barf("delay# on threaded RTS");
1929 #else
1930
1931     /* args: R1 (microsecond delay amount) */
1932     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1933     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1934
1935 #ifdef mingw32_HOST_OS
1936
1937     /* could probably allocate this on the heap instead */
1938     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1939                                             stg_delayzh_malloc_str);
1940     reqID = foreign "C" addDelayRequest(R1);
1941     StgAsyncIOResult_reqID(ares)   = reqID;
1942     StgAsyncIOResult_len(ares)     = 0;
1943     StgAsyncIOResult_errCode(ares) = 0;
1944     StgTSO_block_info(CurrentTSO)  = ares;
1945
1946     /* Having all async-blocked threads reside on the blocked_queue
1947      * simplifies matters, so change the status to OnDoProc put the
1948      * delayed thread on the blocked_queue.
1949      */
1950     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1951     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1952     jump stg_block_async_void;
1953
1954 #else
1955
1956     W_ time;
1957     time = foreign "C" getourtimeofday();
1958     target = (R1 / (TICK_MILLISECS*1000)) + time;
1959     StgTSO_block_info(CurrentTSO) = target;
1960
1961     /* Insert the new thread in the sleeping queue. */
1962     prev = NULL;
1963     t = W_[sleeping_queue];
1964 while:
1965     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1966         prev = t;
1967         t = StgTSO_link(t);
1968         goto while;
1969     }
1970
1971     StgTSO_link(CurrentTSO) = t;
1972     if (prev == NULL) {
1973         W_[sleeping_queue] = CurrentTSO;
1974     } else {
1975         StgTSO_link(prev) = CurrentTSO;
1976     }
1977     jump stg_block_noregs;
1978 #endif
1979 #endif /* !THREADED_RTS */
1980 }
1981
1982
1983 #ifdef mingw32_HOST_OS
1984 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1985 asyncReadzh_fast
1986 {
1987     W_ ares;
1988     CInt reqID;
1989
1990 #ifdef THREADED_RTS
1991     foreign "C" barf("asyncRead# on threaded RTS");
1992 #else
1993
1994     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1995     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1996     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1997
1998     /* could probably allocate this on the heap instead */
1999     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2000                                             stg_asyncReadzh_malloc_str);
2001     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
2002     StgAsyncIOResult_reqID(ares)   = reqID;
2003     StgAsyncIOResult_len(ares)     = 0;
2004     StgAsyncIOResult_errCode(ares) = 0;
2005     StgTSO_block_info(CurrentTSO)  = ares;
2006     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2007     jump stg_block_async;
2008 #endif
2009 }
2010
2011 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2012 asyncWritezh_fast
2013 {
2014     W_ ares;
2015     CInt reqID;
2016
2017 #ifdef THREADED_RTS
2018     foreign "C" barf("asyncWrite# on threaded RTS");
2019 #else
2020
2021     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2022     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2023     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2024
2025     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2026                                             stg_asyncWritezh_malloc_str);
2027     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
2028
2029     StgAsyncIOResult_reqID(ares)   = reqID;
2030     StgAsyncIOResult_len(ares)     = 0;
2031     StgAsyncIOResult_errCode(ares) = 0;
2032     StgTSO_block_info(CurrentTSO)  = ares;
2033     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2034     jump stg_block_async;
2035 #endif
2036 }
2037
2038 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2039 asyncDoProczh_fast
2040 {
2041     W_ ares;
2042     CInt reqID;
2043
2044 #ifdef THREADED_RTS
2045     foreign "C" barf("asyncDoProc# on threaded RTS");
2046 #else
2047
2048     /* args: R1 = proc, R2 = param */
2049     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2050     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2051
2052     /* could probably allocate this on the heap instead */
2053     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2054                                             stg_asyncDoProczh_malloc_str);
2055     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2056     StgAsyncIOResult_reqID(ares)   = reqID;
2057     StgAsyncIOResult_len(ares)     = 0;
2058     StgAsyncIOResult_errCode(ares) = 0;
2059     StgTSO_block_info(CurrentTSO) = ares;
2060     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2061     jump stg_block_async;
2062 #endif
2063 }
2064 #endif
2065
2066 /* -----------------------------------------------------------------------------
2067   ** temporary **
2068
2069    classes CCallable and CReturnable don't really exist, but the
2070    compiler insists on generating dictionaries containing references
2071    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2072    for these.  Some C compilers can't cope with zero-length static arrays,
2073    so we have to make these one element long.
2074   --------------------------------------------------------------------------- */
2075
2076 section "rodata" {
2077   GHC_ZCCCallable_static_info:   W_ 0;
2078 }
2079
2080 section "rodata" {
2081   GHC_ZCCReturnable_static_info: W_ 0;
2082 }