f657a2499829bb3feb5b2b5ea76e36984478feea
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         foreign "C" recordMutableLock(R1 "ptr") [R1];
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
427        words_needed = 2;
428    } else { 
429        // minimum is one word
430        words_needed = 1;
431    }
432
433    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
434                NO_PTRS, int64ToIntegerzh_fast );
435
436    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
437    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
438    StgArrWords_words(p) = words_needed;
439
440    if ( %lt(val,0::L_) ) {
441      neg = 1;
442      val = -val;
443    }
444
445    hi = TO_W_(val >> 32);
446
447    if ( words_needed == 2 )  { 
448       s = 2;
449       Hp(-1) = TO_W_(val);
450       Hp(0) = hi;
451    } else { 
452        if ( val != 0::L_ ) {
453            s = 1;
454            Hp(0) = TO_W_(val);
455        } else /* val==0 */  {
456            s = 0;
457        }
458    }
459    if ( neg != 0 ) {
460         s = -s;
461    }
462
463    /* returns (# size  :: Int#, 
464                  data  :: ByteArray# #)
465    */
466    RET_NP(s,p);
467 }
468
469 word64ToIntegerzh_fast
470 {
471    /* arguments: L1 = Word64# */
472
473    L_ val;
474    W_ hi, s, words_needed, p;
475
476    val = L1;
477    if ( val >= 0x100000000::L_ ) {
478       words_needed = 2;
479    } else {
480       words_needed = 1;
481    }
482
483    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
484                NO_PTRS, word64ToIntegerzh_fast );
485
486    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
487    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
488    StgArrWords_words(p) = words_needed;
489
490    hi = TO_W_(val >> 32);
491    if ( val >= 0x100000000::L_ ) { 
492      s = 2;
493      Hp(-1) = TO_W_(val);
494      Hp(0)  = hi;
495    } else {
496       if ( val != 0::L_ ) {
497         s = 1;
498         Hp(0) = TO_W_(val);
499      } else /* val==0 */  {
500       s = 0;
501      }
502   }
503
504    /* returns (# size  :: Int#, 
505                  data  :: ByteArray# #)
506    */
507    RET_NP(s,p);
508 }
509
510
511 #endif /* SUPPORT_LONG_LONGS */
512
513 /* ToDo: this is shockingly inefficient */
514
515 #ifndef THREADED_RTS
516 section "bss" {
517   mp_tmp1:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_tmp2:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result1:
528     bits8 [SIZEOF_MP_INT];
529 }
530
531 section "bss" {
532   mp_result2:
533     bits8 [SIZEOF_MP_INT];
534 }
535 #endif
536
537 #ifdef THREADED_RTS
538 #define FETCH_MP_TEMP(X) \
539 W_ X; \
540 X = BaseReg + (OFFSET_StgRegTable_r ## X);
541 #else
542 #define FETCH_MP_TEMP(X) /* Nothing */
543 #endif
544
545 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
546 name                                                                    \
547 {                                                                       \
548   CInt s1, s2;                                                          \
549   W_ d1, d2;                                                            \
550   FETCH_MP_TEMP(mp_tmp1);                                               \
551   FETCH_MP_TEMP(mp_tmp2);                                               \
552   FETCH_MP_TEMP(mp_result1)                                             \
553   FETCH_MP_TEMP(mp_result2);                                            \
554                                                                         \
555   /* call doYouWantToGC() */                                            \
556   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
557                                                                         \
558   s1 = W_TO_INT(R1);                                                    \
559   d1 = R2;                                                              \
560   s2 = W_TO_INT(R3);                                                    \
561   d2 = R4;                                                              \
562                                                                         \
563   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
564   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
565   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
566   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
567   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
568   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
569                                                                         \
570   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
571                                                                         \
572   /* Perform the operation */                                           \
573   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
574                                                                         \
575   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
576          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
577 }
578
579 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1;                                                              \
583   W_ d1;                                                                \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_result1)                                             \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR, name);                                               \
589                                                                         \
590   d1 = R2;                                                              \
591   s1 = W_TO_INT(R1);                                                    \
592                                                                         \
593   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
594   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
595   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
596                                                                         \
597   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
598                                                                         \
599   /* Perform the operation */                                           \
600   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
601                                                                         \
602   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
603          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
604 }
605
606 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
607 name                                                                                    \
608 {                                                                                       \
609   CInt s1, s2;                                                                          \
610   W_ d1, d2;                                                                            \
611   FETCH_MP_TEMP(mp_tmp1);                                                               \
612   FETCH_MP_TEMP(mp_tmp2);                                                               \
613   FETCH_MP_TEMP(mp_result1)                                                             \
614   FETCH_MP_TEMP(mp_result2)                                                             \
615                                                                                         \
616   /* call doYouWantToGC() */                                                            \
617   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
618                                                                                         \
619   s1 = W_TO_INT(R1);                                                                    \
620   d1 = R2;                                                                              \
621   s2 = W_TO_INT(R3);                                                                    \
622   d2 = R4;                                                                              \
623                                                                                         \
624   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
625   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
626   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
627   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
628   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
629   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
630                                                                                         \
631   foreign "C" mpz_init(mp_result1 "ptr") [];                                               \
632   foreign "C" mpz_init(mp_result2 "ptr") [];                                               \
633                                                                                         \
634   /* Perform the operation */                                                           \
635   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
636                                                                                         \
637   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
638            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
639            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
640            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
641 }
642
643 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
644 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
645 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
646 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
647 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
648 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
649 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
650 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
651 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
652 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
653 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
654
655 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
656 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
657
658 #ifndef THREADED_RTS
659 section "bss" {
660   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
661 }
662 #endif
663
664 gcdIntzh_fast
665 {
666     /* R1 = the first Int#; R2 = the second Int# */
667     W_ r; 
668     FETCH_MP_TEMP(mp_tmp_w);
669
670     W_[mp_tmp_w] = R1;
671     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
672
673     R1 = r;
674     /* Result parked in R1, return via info-pointer at TOS */
675     jump %ENTRY_CODE(Sp(0));
676 }
677
678
679 gcdIntegerIntzh_fast
680 {
681     /* R1 = s1; R2 = d1; R3 = the int */
682     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
683     
684     /* Result parked in R1, return via info-pointer at TOS */
685     jump %ENTRY_CODE(Sp(0));
686 }
687
688
689 cmpIntegerIntzh_fast
690 {
691     /* R1 = s1; R2 = d1; R3 = the int */
692     W_ usize, vsize, v_digit, u_digit;
693
694     usize = R1;
695     vsize = 0;
696     v_digit = R3;
697
698     // paraphrased from mpz_cmp_si() in the GMP sources
699     if (%gt(v_digit,0)) {
700         vsize = 1;
701     } else { 
702         if (%lt(v_digit,0)) {
703             vsize = -1;
704             v_digit = -v_digit;
705         }
706     }
707
708     if (usize != vsize) {
709         R1 = usize - vsize; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     if (usize == 0) {
714         R1 = 0; 
715         jump %ENTRY_CODE(Sp(0));
716     }
717
718     u_digit = W_[BYTE_ARR_CTS(R2)];
719
720     if (u_digit == v_digit) {
721         R1 = 0; 
722         jump %ENTRY_CODE(Sp(0));
723     }
724
725     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
726         R1 = usize; 
727     } else {
728         R1 = -usize; 
729     }
730
731     jump %ENTRY_CODE(Sp(0));
732 }
733
734 cmpIntegerzh_fast
735 {
736     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
737     W_ usize, vsize, size, up, vp;
738     CInt cmp;
739
740     // paraphrased from mpz_cmp() in the GMP sources
741     usize = R1;
742     vsize = R3;
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     if (%lt(usize,0)) { // NB. not <, which is unsigned
755         size = -usize;
756     } else {
757         size = usize;
758     }
759
760     up = BYTE_ARR_CTS(R2);
761     vp = BYTE_ARR_CTS(R4);
762
763     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
764
765     if (cmp == 0 :: CInt) {
766         R1 = 0; 
767         jump %ENTRY_CODE(Sp(0));
768     }
769
770     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
771         R1 = 1;
772     } else {
773         R1 = (-1); 
774     }
775     /* Result parked in R1, return via info-pointer at TOS */
776     jump %ENTRY_CODE(Sp(0));
777 }
778
779 integer2Intzh_fast
780 {
781     /* R1 = s; R2 = d */
782     W_ r, s;
783
784     s = R1;
785     if (s == 0) {
786         r = 0;
787     } else {
788         r = W_[R2 + SIZEOF_StgArrWords];
789         if (%lt(s,0)) {
790             r = -r;
791         }
792     }
793     /* Result parked in R1, return via info-pointer at TOS */
794     R1 = r;
795     jump %ENTRY_CODE(Sp(0));
796 }
797
798 integer2Wordzh_fast
799 {
800   /* R1 = s; R2 = d */
801   W_ r, s;
802
803   s = R1;
804   if (s == 0) {
805     r = 0;
806   } else {
807     r = W_[R2 + SIZEOF_StgArrWords];
808     if (%lt(s,0)) {
809         r = -r;
810     }
811   }
812   /* Result parked in R1, return via info-pointer at TOS */
813   R1 = r;
814   jump %ENTRY_CODE(Sp(0));
815 }
816
817 decodeFloatzh_fast
818
819     W_ p;
820     F_ arg;
821     FETCH_MP_TEMP(mp_tmp1);
822     FETCH_MP_TEMP(mp_tmp_w);
823     
824     /* arguments: F1 = Float# */
825     arg = F1;
826     
827     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
828     
829     /* Be prepared to tell Lennart-coded __decodeFloat
830        where mantissa._mp_d can be put (it does not care about the rest) */
831     p = Hp - SIZEOF_StgArrWords;
832     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
833     StgArrWords_words(p) = 1;
834     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
835     
836     /* Perform the operation */
837     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
838     
839     /* returns: (Int# (expn), Int#, ByteArray#) */
840     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
841 }
842
843 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
844 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
845
846 decodeDoublezh_fast
847
848     D_ arg;
849     W_ p;
850     FETCH_MP_TEMP(mp_tmp1);
851     FETCH_MP_TEMP(mp_tmp_w);
852
853     /* arguments: D1 = Double# */
854     arg = D1;
855
856     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
857     
858     /* Be prepared to tell Lennart-coded __decodeDouble
859        where mantissa.d can be put (it does not care about the rest) */
860     p = Hp - ARR_SIZE + WDS(1);
861     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
862     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
863     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
864
865     /* Perform the operation */
866     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
867     
868     /* returns: (Int# (expn), Int#, ByteArray#) */
869     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
870 }
871
872 /* -----------------------------------------------------------------------------
873  * Concurrency primitives
874  * -------------------------------------------------------------------------- */
875
876 forkzh_fast
877 {
878   /* args: R1 = closure to spark */
879   
880   MAYBE_GC(R1_PTR, forkzh_fast);
881
882   // create it right now, return ThreadID in R1
883   "ptr" R1 = foreign "C" createIOThread( MyCapability() "ptr", 
884                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
885                                 R1 "ptr") [R1];
886   foreign "C" scheduleThread(MyCapability() "ptr", R1 "ptr") [R1];
887
888   // switch at the earliest opportunity
889   CInt[context_switch] = 1 :: CInt;
890   
891   RET_P(R1);
892 }
893
894 yieldzh_fast
895 {
896   jump stg_yield_noregs;
897 }
898
899 myThreadIdzh_fast
900 {
901   /* no args. */
902   RET_P(CurrentTSO);
903 }
904
905 labelThreadzh_fast
906 {
907   /* args: 
908         R1 = ThreadId#
909         R2 = Addr# */
910 #ifdef DEBUG
911   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
912 #endif
913   jump %ENTRY_CODE(Sp(0));
914 }
915
916 isCurrentThreadBoundzh_fast
917 {
918   /* no args */
919   W_ r;
920   r = foreign "C" isThreadBound(CurrentTSO) [];
921   RET_N(r);
922 }
923
924
925 /* -----------------------------------------------------------------------------
926  * TVar primitives
927  * -------------------------------------------------------------------------- */
928
929 #ifdef REG_R1
930 #define SP_OFF 0
931 #define IF_NOT_REG_R1(x) 
932 #else
933 #define SP_OFF 1
934 #define IF_NOT_REG_R1(x) x
935 #endif
936
937 // Catch retry frame ------------------------------------------------------------
938
939 #define CATCH_RETRY_FRAME_ERROR(label) \
940   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
941
942 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
943 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
944 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
945 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
946 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
947 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
948 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
949 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
950
951 #if MAX_VECTORED_RTN > 8
952 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
953 #endif
954
955 #if defined(PROFILING)
956 #define CATCH_RETRY_FRAME_BITMAP 7
957 #define CATCH_RETRY_FRAME_WORDS  6
958 #else
959 #define CATCH_RETRY_FRAME_BITMAP 1
960 #define CATCH_RETRY_FRAME_WORDS  4
961 #endif
962
963 INFO_TABLE_RET(stg_catch_retry_frame,
964                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
965                CATCH_RETRY_FRAME,
966                stg_catch_retry_frame_0_ret,
967                stg_catch_retry_frame_1_ret,
968                stg_catch_retry_frame_2_ret,
969                stg_catch_retry_frame_3_ret,
970                stg_catch_retry_frame_4_ret,
971                stg_catch_retry_frame_5_ret,
972                stg_catch_retry_frame_6_ret,
973                stg_catch_retry_frame_7_ret)
974 {
975    W_ r, frame, trec, outer;
976    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
977
978    frame = Sp;
979    trec = StgTSO_trec(CurrentTSO);
980    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
981    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
982    if (r) {
983      /* Succeeded (either first branch or second branch) */
984      StgTSO_trec(CurrentTSO) = outer;
985      Sp = Sp + SIZEOF_StgCatchRetryFrame;
986      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
987      jump %ENTRY_CODE(Sp(SP_OFF));
988    } else {
989      /* Did not commit: retry */
990      W_ new_trec;
991      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
992      StgTSO_trec(CurrentTSO) = new_trec;
993      if (StgCatchRetryFrame_running_alt_code(frame)) {
994        R1 = StgCatchRetryFrame_alt_code(frame);
995      } else {
996        R1 = StgCatchRetryFrame_first_code(frame);
997        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
998      }
999      Sp_adj(-1);
1000      jump RET_LBL(stg_ap_v);
1001    }
1002 }
1003
1004
1005 // Atomically frame -------------------------------------------------------------
1006
1007
1008 #define ATOMICALLY_FRAME_ERROR(label) \
1009   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1010
1011 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1012 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1013 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1014 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1015 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1016 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1017 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1018 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1019
1020 #if MAX_VECTORED_RTN > 8
1021 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1022 #endif
1023
1024 #if defined(PROFILING)
1025 #define ATOMICALLY_FRAME_BITMAP 3
1026 #define ATOMICALLY_FRAME_WORDS  3
1027 #else
1028 #define ATOMICALLY_FRAME_BITMAP 0
1029 #define ATOMICALLY_FRAME_WORDS  1
1030 #endif
1031
1032
1033 INFO_TABLE_RET(stg_atomically_frame,
1034                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1035                ATOMICALLY_FRAME,
1036                stg_atomically_frame_0_ret,
1037                stg_atomically_frame_1_ret,
1038                stg_atomically_frame_2_ret,
1039                stg_atomically_frame_3_ret,
1040                stg_atomically_frame_4_ret,
1041                stg_atomically_frame_5_ret,
1042                stg_atomically_frame_6_ret,
1043                stg_atomically_frame_7_ret)
1044 {
1045   W_ frame, trec, valid;
1046   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1047
1048   frame = Sp;
1049   trec = StgTSO_trec(CurrentTSO);
1050
1051   /* The TSO is not currently waiting: try to commit the transaction */
1052   valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1053   if (valid) {
1054     /* Transaction was valid: commit succeeded */
1055     StgTSO_trec(CurrentTSO) = NO_TREC;
1056     Sp = Sp + SIZEOF_StgAtomicallyFrame;
1057     IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1058     jump %ENTRY_CODE(Sp(SP_OFF));
1059   } else {
1060     /* Transaction was not valid: try again */
1061     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1062     StgTSO_trec(CurrentTSO) = trec;
1063     R1 = StgAtomicallyFrame_code(frame);
1064     Sp_adj(-1);
1065     jump RET_LBL(stg_ap_v);
1066   }
1067 }
1068
1069 INFO_TABLE_RET(stg_atomically_waiting_frame,
1070                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1071                ATOMICALLY_FRAME,
1072                stg_atomically_frame_0_ret,
1073                stg_atomically_frame_1_ret,
1074                stg_atomically_frame_2_ret,
1075                stg_atomically_frame_3_ret,
1076                stg_atomically_frame_4_ret,
1077                stg_atomically_frame_5_ret,
1078                stg_atomically_frame_6_ret,
1079                stg_atomically_frame_7_ret)
1080 {
1081   W_ frame, trec, valid;
1082   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1083
1084   frame = Sp;
1085
1086   /* The TSO is currently waiting: should we stop waiting? */
1087   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1088   if (valid) {
1089     /* Previous attempt is still valid: no point trying again yet */
1090           IF_NOT_REG_R1(Sp_adj(-2);
1091                         Sp(1) = stg_NO_FINALIZER_closure;
1092                         Sp(0) = stg_ut_1_0_unreg_info;)
1093     jump stg_block_noregs;
1094   } else {
1095     /* Previous attempt is no longer valid: try again */
1096     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1097     StgTSO_trec(CurrentTSO) = trec;
1098     StgHeader_info(frame) = stg_atomically_frame_info;
1099     R1 = StgAtomicallyFrame_code(frame);
1100     Sp_adj(-1);
1101     jump RET_LBL(stg_ap_v);
1102   }
1103 }
1104
1105 // STM catch frame --------------------------------------------------------------
1106
1107 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1108    label                                                   \
1109    {                                                       \
1110       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1111       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1112       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1113       jump ret;                                            \
1114    }
1115
1116 #ifdef REG_R1
1117 #define SP_OFF 0
1118 #else
1119 #define SP_OFF 1
1120 #endif
1121
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1123 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1124 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1125 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1126 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1127 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1128 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1129 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1130
1131 #if MAX_VECTORED_RTN > 8
1132 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1133 #endif
1134
1135 #if defined(PROFILING)
1136 #define CATCH_STM_FRAME_BITMAP 3
1137 #define CATCH_STM_FRAME_WORDS  3
1138 #else
1139 #define CATCH_STM_FRAME_BITMAP 0
1140 #define CATCH_STM_FRAME_WORDS  1
1141 #endif
1142
1143 /* Catch frames are very similar to update frames, but when entering
1144  * one we just pop the frame off the stack and perform the correct
1145  * kind of return to the activation record underneath us on the stack.
1146  */
1147
1148 INFO_TABLE_RET(stg_catch_stm_frame,
1149                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1150                CATCH_STM_FRAME,
1151                stg_catch_stm_frame_0_ret,
1152                stg_catch_stm_frame_1_ret,
1153                stg_catch_stm_frame_2_ret,
1154                stg_catch_stm_frame_3_ret,
1155                stg_catch_stm_frame_4_ret,
1156                stg_catch_stm_frame_5_ret,
1157                stg_catch_stm_frame_6_ret,
1158                stg_catch_stm_frame_7_ret)
1159 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1160
1161
1162 // Primop definition ------------------------------------------------------------
1163
1164 atomicallyzh_fast
1165 {
1166   W_ frame;
1167   W_ old_trec;
1168   W_ new_trec;
1169   
1170   // stmStartTransaction may allocate
1171   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1172
1173   /* Args: R1 = m :: STM a */
1174   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1175
1176   old_trec = StgTSO_trec(CurrentTSO);
1177
1178   /* Nested transactions are not allowed; raise an exception */
1179   if (old_trec != NO_TREC) {
1180      R1 = GHCziIOBase_NestedAtomically_closure;
1181      jump raisezh_fast;
1182   }
1183
1184   /* Set up the atomically frame */
1185   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1186   frame = Sp;
1187
1188   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1189   StgAtomicallyFrame_code(frame) = R1;
1190
1191   /* Start the memory transcation */
1192   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1193   StgTSO_trec(CurrentTSO) = new_trec;
1194
1195   /* Apply R1 to the realworld token */
1196   Sp_adj(-1);
1197   jump RET_LBL(stg_ap_v);
1198 }
1199
1200
1201 catchSTMzh_fast
1202 {
1203   W_ frame;
1204   
1205   /* Args: R1 :: STM a */
1206   /* Args: R2 :: Exception -> STM a */
1207   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1208
1209   /* Set up the catch frame */
1210   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1211   frame = Sp;
1212
1213   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1214   StgCatchSTMFrame_handler(frame) = R2;
1215
1216   /* Apply R1 to the realworld token */
1217   Sp_adj(-1);
1218   jump RET_LBL(stg_ap_v);
1219 }
1220
1221
1222 catchRetryzh_fast
1223 {
1224   W_ frame;
1225   W_ new_trec;
1226   W_ trec;
1227
1228   // stmStartTransaction may allocate
1229   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1230
1231   /* Args: R1 :: STM a */
1232   /* Args: R2 :: STM a */
1233   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1234
1235   /* Start a nested transaction within which to run the first code */
1236   trec = StgTSO_trec(CurrentTSO);
1237   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1238   StgTSO_trec(CurrentTSO) = new_trec;
1239
1240   /* Set up the catch-retry frame */
1241   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1242   frame = Sp;
1243   
1244   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1245   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1246   StgCatchRetryFrame_first_code(frame) = R1;
1247   StgCatchRetryFrame_alt_code(frame) = R2;
1248   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1249
1250   /* Apply R1 to the realworld token */
1251   Sp_adj(-1);
1252   jump RET_LBL(stg_ap_v);  
1253 }
1254
1255
1256 retryzh_fast
1257 {
1258   W_ frame_type;
1259   W_ frame;
1260   W_ trec;
1261   W_ outer;
1262   W_ r;
1263
1264   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1265
1266   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1267 retry_pop_stack:
1268   trec = StgTSO_trec(CurrentTSO);
1269   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1270   StgTSO_sp(CurrentTSO) = Sp;
1271   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1272   Sp = StgTSO_sp(CurrentTSO);
1273   frame = Sp;
1274
1275   if (frame_type == CATCH_RETRY_FRAME) {
1276     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1277     ASSERT(outer != NO_TREC);
1278     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1279       // Retry in the first code: try the alternative
1280       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1281       StgTSO_trec(CurrentTSO) = trec;
1282       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1283       R1 = StgCatchRetryFrame_alt_code(frame);
1284       Sp_adj(-1);
1285       jump RET_LBL(stg_ap_v);
1286     } else {
1287       // Retry in the alternative code: propagate
1288       W_ other_trec;
1289       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1290       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr") [];
1291       if (r) {
1292         r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1293       } else {
1294         foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1295       }
1296       if (r) {
1297         // Merge between siblings succeeded: commit it back to enclosing transaction
1298         // and then propagate the retry
1299         StgTSO_trec(CurrentTSO) = outer;
1300         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1301         goto retry_pop_stack;
1302       } else {
1303         // Merge failed: we musn't propagate the retry.  Try both paths again.
1304         "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1305         StgCatchRetryFrame_first_code_trec(frame) = trec;
1306         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1307         StgTSO_trec(CurrentTSO) = trec;
1308         R1 = StgCatchRetryFrame_first_code(frame);
1309         Sp_adj(-1);
1310         jump RET_LBL(stg_ap_v);
1311       }
1312     }
1313   }
1314
1315   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1316   ASSERT(frame_type == ATOMICALLY_FRAME);
1317   ASSERT(outer == NO_TREC);
1318   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1319   if (r) {
1320     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1321     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1322     Sp = frame;
1323     // Fix up the stack in the unregisterised case: the return convention is different.
1324     IF_NOT_REG_R1(Sp_adj(-2); 
1325                   Sp(1) = stg_NO_FINALIZER_closure;
1326                   Sp(0) = stg_ut_1_0_unreg_info;)
1327     R3 = trec; // passing to stmWaitUnblock()
1328     jump stg_block_stmwait;
1329   } else {
1330     // Transaction was not valid: retry immediately
1331     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1332     StgTSO_trec(CurrentTSO) = trec;
1333     R1 = StgAtomicallyFrame_code(frame);
1334     Sp = frame;
1335     Sp_adj(-1);
1336     jump RET_LBL(stg_ap_v);
1337   }
1338 }
1339
1340
1341 newTVarzh_fast
1342 {
1343   W_ tv;
1344   W_ new_value;
1345
1346   /* Args: R1 = initialisation value */
1347
1348   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1349   new_value = R1;
1350   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1351   RET_P(tv);
1352 }
1353
1354
1355 readTVarzh_fast
1356 {
1357   W_ trec;
1358   W_ tvar;
1359   W_ result;
1360
1361   /* Args: R1 = TVar closure */
1362
1363   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1364   trec = StgTSO_trec(CurrentTSO);
1365   tvar = R1;
1366   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1367
1368   RET_P(result);
1369 }
1370
1371
1372 writeTVarzh_fast
1373 {
1374   W_ trec;
1375   W_ tvar;
1376   W_ new_value;
1377   
1378   /* Args: R1 = TVar closure */
1379   /*       R2 = New value    */
1380
1381   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1382   trec = StgTSO_trec(CurrentTSO);
1383   tvar = R1;
1384   new_value = R2;
1385   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1386
1387   jump %ENTRY_CODE(Sp(0));
1388 }
1389
1390
1391 /* -----------------------------------------------------------------------------
1392  * MVar primitives
1393  *
1394  * take & putMVar work as follows.  Firstly, an important invariant:
1395  *
1396  *    If the MVar is full, then the blocking queue contains only
1397  *    threads blocked on putMVar, and if the MVar is empty then the
1398  *    blocking queue contains only threads blocked on takeMVar.
1399  *
1400  * takeMvar:
1401  *    MVar empty : then add ourselves to the blocking queue
1402  *    MVar full  : remove the value from the MVar, and
1403  *                 blocking queue empty     : return
1404  *                 blocking queue non-empty : perform the first blocked putMVar
1405  *                                            from the queue, and wake up the
1406  *                                            thread (MVar is now full again)
1407  *
1408  * putMVar is just the dual of the above algorithm.
1409  *
1410  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1411  * the stack of the thread waiting to do the putMVar.  See
1412  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1413  * the stack layout, and the PerformPut and PerformTake macros below.
1414  *
1415  * It is important that a blocked take or put is woken up with the
1416  * take/put already performed, because otherwise there would be a
1417  * small window of vulnerability where the thread could receive an
1418  * exception and never perform its take or put, and we'd end up with a
1419  * deadlock.
1420  *
1421  * -------------------------------------------------------------------------- */
1422
1423 isEmptyMVarzh_fast
1424 {
1425     /* args: R1 = MVar closure */
1426
1427     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1428         RET_N(1);
1429     } else {
1430         RET_N(0);
1431     }
1432 }
1433
1434 newMVarzh_fast
1435 {
1436     /* args: none */
1437     W_ mvar;
1438
1439     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1440   
1441     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1442     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1443     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1444     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1445     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1446     RET_P(mvar);
1447 }
1448
1449
1450 /* If R1 isn't available, pass it on the stack */
1451 #ifdef REG_R1
1452 #define PerformTake(tso, value)                         \
1453     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1454     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1455 #else
1456 #define PerformTake(tso, value)                                 \
1457     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1458     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1459 #endif
1460
1461 #define PerformPut(tso,lval)                    \
1462     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1463     lval = W_[StgTSO_sp(tso) - WDS(1)];
1464
1465 takeMVarzh_fast
1466 {
1467     W_ mvar, val, info, tso;
1468
1469     /* args: R1 = MVar closure */
1470     mvar = R1;
1471
1472 #if defined(THREADED_RTS)
1473     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1474 #else
1475     info = GET_INFO(mvar);
1476 #endif
1477
1478     /* If the MVar is empty, put ourselves on its blocking queue,
1479      * and wait until we're woken up.
1480      */
1481     if (info == stg_EMPTY_MVAR_info) {
1482         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1483             StgMVar_head(mvar) = CurrentTSO;
1484         } else {
1485             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1486         }
1487         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1488         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1489         StgTSO_block_info(CurrentTSO)  = mvar;
1490         StgMVar_tail(mvar) = CurrentTSO;
1491         
1492         jump stg_block_takemvar;
1493   }
1494
1495   /* we got the value... */
1496   val = StgMVar_value(mvar);
1497
1498   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1499   {
1500       /* There are putMVar(s) waiting... 
1501        * wake up the first thread on the queue
1502        */
1503       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1504
1505       /* actually perform the putMVar for the thread that we just woke up */
1506       tso = StgMVar_head(mvar);
1507       PerformPut(tso,StgMVar_value(mvar));
1508
1509 #if defined(GRAN) || defined(PAR)
1510       /* ToDo: check 2nd arg (mvar) is right */
1511       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1512       StgMVar_head(mvar) = tso;
1513 #else
1514       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1515                                          StgMVar_head(mvar) "ptr") [];
1516       StgMVar_head(mvar) = tso;
1517 #endif
1518
1519       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1520           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1521       }
1522
1523 #if defined(THREADED_RTS)
1524       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1525 #endif
1526       RET_P(val);
1527   } 
1528   else
1529   {
1530       /* No further putMVars, MVar is now empty */
1531       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1532  
1533 #if defined(THREADED_RTS)
1534       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1535 #else
1536       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1537 #endif
1538
1539       RET_P(val);
1540   }
1541 }
1542
1543
1544 tryTakeMVarzh_fast
1545 {
1546     W_ mvar, val, info, tso;
1547
1548     /* args: R1 = MVar closure */
1549
1550     mvar = R1;
1551
1552 #if defined(THREADED_RTS)
1553     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1554 #else
1555     info = GET_INFO(mvar);
1556 #endif
1557
1558     if (info == stg_EMPTY_MVAR_info) {
1559 #if defined(THREADED_RTS)
1560         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1561 #endif
1562         /* HACK: we need a pointer to pass back, 
1563          * so we abuse NO_FINALIZER_closure
1564          */
1565         RET_NP(0, stg_NO_FINALIZER_closure);
1566     }
1567
1568     /* we got the value... */
1569     val = StgMVar_value(mvar);
1570
1571     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1572
1573         /* There are putMVar(s) waiting... 
1574          * wake up the first thread on the queue
1575          */
1576         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1577
1578         /* actually perform the putMVar for the thread that we just woke up */
1579         tso = StgMVar_head(mvar);
1580         PerformPut(tso,StgMVar_value(mvar));
1581
1582 #if defined(GRAN) || defined(PAR)
1583         /* ToDo: check 2nd arg (mvar) is right */
1584         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1585         StgMVar_head(mvar) = tso;
1586 #else
1587         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1588                                            StgMVar_head(mvar) "ptr") [];
1589         StgMVar_head(mvar) = tso;
1590 #endif
1591
1592         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1593             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1594         }
1595 #if defined(THREADED_RTS)
1596         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1597 #endif
1598     }
1599     else 
1600     {
1601         /* No further putMVars, MVar is now empty */
1602         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1603 #if defined(THREADED_RTS)
1604         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1605 #else
1606         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1607 #endif
1608     }
1609     
1610     RET_NP(1, val);
1611 }
1612
1613
1614 putMVarzh_fast
1615 {
1616     W_ mvar, info, tso;
1617
1618     /* args: R1 = MVar, R2 = value */
1619     mvar = R1;
1620
1621 #if defined(THREADED_RTS)
1622     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1623 #else
1624     info = GET_INFO(mvar);
1625 #endif
1626
1627     if (info == stg_FULL_MVAR_info) {
1628         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1629             StgMVar_head(mvar) = CurrentTSO;
1630         } else {
1631             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1632         }
1633         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1634         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1635         StgTSO_block_info(CurrentTSO)  = mvar;
1636         StgMVar_tail(mvar) = CurrentTSO;
1637         
1638         jump stg_block_putmvar;
1639     }
1640   
1641     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1642
1643         /* There are takeMVar(s) waiting: wake up the first one
1644          */
1645         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1646
1647         /* actually perform the takeMVar */
1648         tso = StgMVar_head(mvar);
1649         PerformTake(tso, R2);
1650       
1651 #if defined(GRAN) || defined(PAR)
1652         /* ToDo: check 2nd arg (mvar) is right */
1653         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1654         StgMVar_head(mvar) = tso;
1655 #else
1656         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1657         StgMVar_head(mvar) = tso;
1658 #endif
1659
1660         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1661             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1662         }
1663
1664 #if defined(THREADED_RTS)
1665         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1666 #endif
1667         jump %ENTRY_CODE(Sp(0));
1668     }
1669     else
1670     {
1671         /* No further takes, the MVar is now full. */
1672         StgMVar_value(mvar) = R2;
1673
1674 #if defined(THREADED_RTS)
1675         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1676 #else
1677         SET_INFO(mvar,stg_FULL_MVAR_info);
1678 #endif
1679         jump %ENTRY_CODE(Sp(0));
1680     }
1681     
1682     /* ToDo: yield afterward for better communication performance? */
1683 }
1684
1685
1686 tryPutMVarzh_fast
1687 {
1688     W_ mvar, info, tso;
1689
1690     /* args: R1 = MVar, R2 = value */
1691     mvar = R1;
1692
1693 #if defined(THREADED_RTS)
1694     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1695 #else
1696     info = GET_INFO(mvar);
1697 #endif
1698
1699     if (info == stg_FULL_MVAR_info) {
1700 #if defined(THREADED_RTS)
1701         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1702 #endif
1703         RET_N(0);
1704     }
1705   
1706     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1707
1708         /* There are takeMVar(s) waiting: wake up the first one
1709          */
1710         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1711         
1712         /* actually perform the takeMVar */
1713         tso = StgMVar_head(mvar);
1714         PerformTake(tso, R2);
1715       
1716 #if defined(GRAN) || defined(PAR)
1717         /* ToDo: check 2nd arg (mvar) is right */
1718         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1719         StgMVar_head(mvar) = tso;
1720 #else
1721         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1722         StgMVar_head(mvar) = tso;
1723 #endif
1724
1725         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1726             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1727         }
1728
1729 #if defined(THREADED_RTS)
1730         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1731 #endif
1732     }
1733     else
1734     {
1735         /* No further takes, the MVar is now full. */
1736         StgMVar_value(mvar) = R2;
1737
1738 #if defined(THREADED_RTS)
1739         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1740 #else
1741         SET_INFO(mvar,stg_FULL_MVAR_info);
1742 #endif
1743     }
1744     
1745     RET_N(1);
1746     /* ToDo: yield afterward for better communication performance? */
1747 }
1748
1749
1750 /* -----------------------------------------------------------------------------
1751    Stable pointer primitives
1752    -------------------------------------------------------------------------  */
1753
1754 makeStableNamezh_fast
1755 {
1756     W_ index, sn_obj;
1757
1758     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1759   
1760     index = foreign "C" lookupStableName(R1 "ptr") [];
1761
1762     /* Is there already a StableName for this heap object?
1763      *  stable_ptr_table is a pointer to an array of snEntry structs.
1764      */
1765     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1766         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1767         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1768         StgStableName_sn(sn_obj) = index;
1769         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1770     } else {
1771         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1772     }
1773     
1774     RET_P(sn_obj);
1775 }
1776
1777
1778 makeStablePtrzh_fast
1779 {
1780     /* Args: R1 = a */
1781     W_ sp;
1782     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1783     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1784     RET_N(sp);
1785 }
1786
1787 deRefStablePtrzh_fast
1788 {
1789     /* Args: R1 = the stable ptr */
1790     W_ r, sp;
1791     sp = R1;
1792     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1793     RET_P(r);
1794 }
1795
1796 /* -----------------------------------------------------------------------------
1797    Bytecode object primitives
1798    -------------------------------------------------------------------------  */
1799
1800 newBCOzh_fast
1801 {
1802     /* R1 = instrs
1803        R2 = literals
1804        R3 = ptrs
1805        R4 = itbls
1806        R5 = arity
1807        R6 = bitmap array
1808     */
1809     W_ bco, bitmap_arr, bytes, words;
1810     
1811     bitmap_arr = R6;
1812     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1813     bytes = WDS(words);
1814
1815     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1816
1817     bco = Hp - bytes + WDS(1);
1818     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1819     
1820     StgBCO_instrs(bco)     = R1;
1821     StgBCO_literals(bco)   = R2;
1822     StgBCO_ptrs(bco)       = R3;
1823     StgBCO_itbls(bco)      = R4;
1824     StgBCO_arity(bco)      = HALF_W_(R5);
1825     StgBCO_size(bco)       = HALF_W_(words);
1826     
1827     // Copy the arity/bitmap info into the BCO
1828     W_ i;
1829     i = 0;
1830 for:
1831     if (i < StgArrWords_words(bitmap_arr)) {
1832         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1833         i = i + 1;
1834         goto for;
1835     }
1836     
1837     RET_P(bco);
1838 }
1839
1840
1841 mkApUpd0zh_fast
1842 {
1843     // R1 = the BCO# for the AP
1844     //  
1845     W_ ap;
1846
1847     // This function is *only* used to wrap zero-arity BCOs in an
1848     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1849     // saturated and always points directly to a FUN or BCO.
1850     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1851            StgBCO_arity(R1) == HALF_W_(0));
1852
1853     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1854     TICK_ALLOC_UP_THK(0, 0);
1855     CCCS_ALLOC(SIZEOF_StgAP);
1856
1857     ap = Hp - SIZEOF_StgAP + WDS(1);
1858     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1859     
1860     StgAP_n_args(ap) = HALF_W_(0);
1861     StgAP_fun(ap) = R1;
1862     
1863     RET_P(ap);
1864 }
1865
1866 /* -----------------------------------------------------------------------------
1867    Thread I/O blocking primitives
1868    -------------------------------------------------------------------------- */
1869
1870 /* Add a thread to the end of the blocked queue. (C-- version of the C
1871  * macro in Schedule.h).
1872  */
1873 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1874     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1875     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1876       W_[blocked_queue_hd] = tso;                       \
1877     } else {                                            \
1878       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1879     }                                                   \
1880     W_[blocked_queue_tl] = tso;
1881
1882 waitReadzh_fast
1883 {
1884     /* args: R1 */
1885 #ifdef THREADED_RTS
1886     foreign "C" barf("waitRead# on threaded RTS");
1887 #else
1888
1889     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1890     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1891     StgTSO_block_info(CurrentTSO) = R1;
1892     // No locking - we're not going to use this interface in the
1893     // threaded RTS anyway.
1894     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1895     jump stg_block_noregs;
1896 #endif
1897 }
1898
1899 waitWritezh_fast
1900 {
1901     /* args: R1 */
1902 #ifdef THREADED_RTS
1903     foreign "C" barf("waitWrite# on threaded RTS");
1904 #else
1905
1906     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1907     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1908     StgTSO_block_info(CurrentTSO) = R1;
1909     // No locking - we're not going to use this interface in the
1910     // threaded RTS anyway.
1911     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1912     jump stg_block_noregs;
1913 #endif
1914 }
1915
1916
1917 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1918 delayzh_fast
1919 {
1920 #ifdef mingw32_HOST_OS
1921     W_ ares;
1922     CInt reqID;
1923 #else
1924     W_ t, prev, target;
1925 #endif
1926
1927 #ifdef THREADED_RTS
1928     foreign "C" barf("delay# on threaded RTS");
1929 #else
1930
1931     /* args: R1 (microsecond delay amount) */
1932     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1933     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1934
1935 #ifdef mingw32_HOST_OS
1936
1937     /* could probably allocate this on the heap instead */
1938     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1939                                             stg_delayzh_malloc_str);
1940     reqID = foreign "C" addDelayRequest(R1);
1941     StgAsyncIOResult_reqID(ares)   = reqID;
1942     StgAsyncIOResult_len(ares)     = 0;
1943     StgAsyncIOResult_errCode(ares) = 0;
1944     StgTSO_block_info(CurrentTSO)  = ares;
1945
1946     /* Having all async-blocked threads reside on the blocked_queue
1947      * simplifies matters, so change the status to OnDoProc put the
1948      * delayed thread on the blocked_queue.
1949      */
1950     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1951     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1952     jump stg_block_async_void;
1953
1954 #else
1955
1956     W_ time;
1957     time = foreign "C" getourtimeofday();
1958     target = (R1 / (TICK_MILLISECS*1000)) + time;
1959     StgTSO_block_info(CurrentTSO) = target;
1960
1961     /* Insert the new thread in the sleeping queue. */
1962     prev = NULL;
1963     t = W_[sleeping_queue];
1964 while:
1965     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1966         prev = t;
1967         t = StgTSO_link(t);
1968         goto while;
1969     }
1970
1971     StgTSO_link(CurrentTSO) = t;
1972     if (prev == NULL) {
1973         W_[sleeping_queue] = CurrentTSO;
1974     } else {
1975         StgTSO_link(prev) = CurrentTSO;
1976     }
1977     jump stg_block_noregs;
1978 #endif
1979 #endif /* !THREADED_RTS */
1980 }
1981
1982
1983 #ifdef mingw32_HOST_OS
1984 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1985 asyncReadzh_fast
1986 {
1987     W_ ares;
1988     CInt reqID;
1989
1990 #ifdef THREADED_RTS
1991     foreign "C" barf("asyncRead# on threaded RTS");
1992 #else
1993
1994     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1995     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1996     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1997
1998     /* could probably allocate this on the heap instead */
1999     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2000                                             stg_asyncReadzh_malloc_str)
2001                         [R1,R2,R3,R4];
2002     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2003     StgAsyncIOResult_reqID(ares)   = reqID;
2004     StgAsyncIOResult_len(ares)     = 0;
2005     StgAsyncIOResult_errCode(ares) = 0;
2006     StgTSO_block_info(CurrentTSO)  = ares;
2007     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2008     jump stg_block_async;
2009 #endif
2010 }
2011
2012 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2013 asyncWritezh_fast
2014 {
2015     W_ ares;
2016     CInt reqID;
2017
2018 #ifdef THREADED_RTS
2019     foreign "C" barf("asyncWrite# on threaded RTS");
2020 #else
2021
2022     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2023     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2024     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2025
2026     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2027                                             stg_asyncWritezh_malloc_str)
2028                         [R1,R2,R3,R4];
2029     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2030
2031     StgAsyncIOResult_reqID(ares)   = reqID;
2032     StgAsyncIOResult_len(ares)     = 0;
2033     StgAsyncIOResult_errCode(ares) = 0;
2034     StgTSO_block_info(CurrentTSO)  = ares;
2035     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2036     jump stg_block_async;
2037 #endif
2038 }
2039
2040 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2041 asyncDoProczh_fast
2042 {
2043     W_ ares;
2044     CInt reqID;
2045
2046 #ifdef THREADED_RTS
2047     foreign "C" barf("asyncDoProc# on threaded RTS");
2048 #else
2049
2050     /* args: R1 = proc, R2 = param */
2051     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2052     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2053
2054     /* could probably allocate this on the heap instead */
2055     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2056                                             stg_asyncDoProczh_malloc_str) 
2057                                 [R1,R2];
2058     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2059     StgAsyncIOResult_reqID(ares)   = reqID;
2060     StgAsyncIOResult_len(ares)     = 0;
2061     StgAsyncIOResult_errCode(ares) = 0;
2062     StgTSO_block_info(CurrentTSO) = ares;
2063     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2064     jump stg_block_async;
2065 #endif
2066 }
2067 #endif
2068
2069 /* -----------------------------------------------------------------------------
2070   ** temporary **
2071
2072    classes CCallable and CReturnable don't really exist, but the
2073    compiler insists on generating dictionaries containing references
2074    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2075    for these.  Some C compilers can't cope with zero-length static arrays,
2076    so we have to make these one element long.
2077   --------------------------------------------------------------------------- */
2078
2079 section "rodata" {
2080   GHC_ZCCCallable_static_info:   W_ 0;
2081 }
2082
2083 section "rodata" {
2084   GHC_ZCCReturnable_static_info: W_ 0;
2085 }