[project @ 2005-06-14 10:02:11 by tharris]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 #ifndef SMP
531 section "bss" {
532   mp_tmp1:
533     bits8 [SIZEOF_MP_INT];
534 }
535
536 section "bss" {
537   mp_tmp2:
538     bits8 [SIZEOF_MP_INT];
539 }
540
541 section "bss" {
542   mp_result1:
543     bits8 [SIZEOF_MP_INT];
544 }
545
546 section "bss" {
547   mp_result2:
548     bits8 [SIZEOF_MP_INT];
549 }
550 #endif
551
552 #ifdef SMP
553 #define FETCH_MP_TEMP(X) \
554 W_ X; \
555 X = BaseReg + (OFFSET_StgRegTable_r ## X);
556 #else
557 #define FETCH_MP_TEMP(X) /* Nothing */
558 #endif
559
560 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
561 name                                                                    \
562 {                                                                       \
563   CInt s1, s2;                                                          \
564   W_ d1, d2;                                                            \
565   FETCH_MP_TEMP(mp_tmp1);                                               \
566   FETCH_MP_TEMP(mp_tmp2);                                               \
567   FETCH_MP_TEMP(mp_result1)                                             \
568   FETCH_MP_TEMP(mp_result2);                                            \
569                                                                         \
570   /* call doYouWantToGC() */                                            \
571   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
572                                                                         \
573   s1 = W_TO_INT(R1);                                                    \
574   d1 = R2;                                                              \
575   s2 = W_TO_INT(R3);                                                    \
576   d2 = R4;                                                              \
577                                                                         \
578   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
579   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
580   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
581   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
582   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
583   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
584                                                                         \
585   foreign "C" mpz_init(mp_result1 "ptr");                               \
586                                                                         \
587   /* Perform the operation */                                           \
588   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr");   \
589                                                                         \
590   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
591          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
592 }
593
594 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
595 name                                                                    \
596 {                                                                       \
597   CInt s1;                                                              \
598   W_ d1;                                                                \
599   FETCH_MP_TEMP(mp_tmp1);                                               \
600   FETCH_MP_TEMP(mp_result1)                                             \
601                                                                         \
602   /* call doYouWantToGC() */                                            \
603   MAYBE_GC(R2_PTR, name);                                               \
604                                                                         \
605   d1 = R2;                                                              \
606   s1 = W_TO_INT(R1);                                                    \
607                                                                         \
608   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
609   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
610   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
611                                                                         \
612   foreign "C" mpz_init(mp_result1 "ptr");                               \
613                                                                         \
614   /* Perform the operation */                                           \
615   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr");                   \
616                                                                         \
617   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
618          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
619 }
620
621 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
622 name                                                                                    \
623 {                                                                                       \
624   CInt s1, s2;                                                                          \
625   W_ d1, d2;                                                                            \
626   FETCH_MP_TEMP(mp_tmp1);                                                               \
627   FETCH_MP_TEMP(mp_tmp2);                                                               \
628   FETCH_MP_TEMP(mp_result1)                                                             \
629   FETCH_MP_TEMP(mp_result2)                                                             \
630                                                                                         \
631   /* call doYouWantToGC() */                                                            \
632   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
633                                                                                         \
634   s1 = W_TO_INT(R1);                                                                    \
635   d1 = R2;                                                                              \
636   s2 = W_TO_INT(R3);                                                                    \
637   d2 = R4;                                                                              \
638                                                                                         \
639   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
640   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
641   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
642   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
643   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
644   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
645                                                                                         \
646   foreign "C" mpz_init(mp_result1 "ptr");                                               \
647   foreign "C" mpz_init(mp_result2 "ptr");                                               \
648                                                                                         \
649   /* Perform the operation */                                                           \
650   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr");    \
651                                                                                         \
652   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
653            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
654            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
655            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
656 }
657
658 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
659 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
660 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
661 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
662 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
663 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
664 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
665 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
666 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
667 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
668 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
669
670 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
671 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
672
673 #ifndef SMP
674 section "bss" {
675   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
676 }
677 #endif
678
679 gcdIntzh_fast
680 {
681     /* R1 = the first Int#; R2 = the second Int# */
682     W_ r; 
683     FETCH_MP_TEMP(mp_tmp_w);
684
685     W_[mp_tmp_w] = R1;
686     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2);
687
688     R1 = r;
689     /* Result parked in R1, return via info-pointer at TOS */
690     jump %ENTRY_CODE(Sp(0));
691 }
692
693
694 gcdIntegerIntzh_fast
695 {
696     /* R1 = s1; R2 = d1; R3 = the int */
697     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
698     
699     /* Result parked in R1, return via info-pointer at TOS */
700     jump %ENTRY_CODE(Sp(0));
701 }
702
703
704 cmpIntegerIntzh_fast
705 {
706     /* R1 = s1; R2 = d1; R3 = the int */
707     W_ usize, vsize, v_digit, u_digit;
708
709     usize = R1;
710     vsize = 0;
711     v_digit = R3;
712
713     // paraphrased from mpz_cmp_si() in the GMP sources
714     if (%gt(v_digit,0)) {
715         vsize = 1;
716     } else { 
717         if (%lt(v_digit,0)) {
718             vsize = -1;
719             v_digit = -v_digit;
720         }
721     }
722
723     if (usize != vsize) {
724         R1 = usize - vsize; 
725         jump %ENTRY_CODE(Sp(0));
726     }
727
728     if (usize == 0) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     u_digit = W_[BYTE_ARR_CTS(R2)];
734
735     if (u_digit == v_digit) {
736         R1 = 0; 
737         jump %ENTRY_CODE(Sp(0));
738     }
739
740     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
741         R1 = usize; 
742     } else {
743         R1 = -usize; 
744     }
745
746     jump %ENTRY_CODE(Sp(0));
747 }
748
749 cmpIntegerzh_fast
750 {
751     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
752     W_ usize, vsize, size, up, vp;
753     CInt cmp;
754
755     // paraphrased from mpz_cmp() in the GMP sources
756     usize = R1;
757     vsize = R3;
758
759     if (usize != vsize) {
760         R1 = usize - vsize; 
761         jump %ENTRY_CODE(Sp(0));
762     }
763
764     if (usize == 0) {
765         R1 = 0; 
766         jump %ENTRY_CODE(Sp(0));
767     }
768
769     if (%lt(usize,0)) { // NB. not <, which is unsigned
770         size = -usize;
771     } else {
772         size = usize;
773     }
774
775     up = BYTE_ARR_CTS(R2);
776     vp = BYTE_ARR_CTS(R4);
777
778     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
779
780     if (cmp == 0 :: CInt) {
781         R1 = 0; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
786         R1 = 1;
787     } else {
788         R1 = (-1); 
789     }
790     /* Result parked in R1, return via info-pointer at TOS */
791     jump %ENTRY_CODE(Sp(0));
792 }
793
794 integer2Intzh_fast
795 {
796     /* R1 = s; R2 = d */
797     W_ r, s;
798
799     s = R1;
800     if (s == 0) {
801         r = 0;
802     } else {
803         r = W_[R2 + SIZEOF_StgArrWords];
804         if (%lt(s,0)) {
805             r = -r;
806         }
807     }
808     /* Result parked in R1, return via info-pointer at TOS */
809     R1 = r;
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Wordzh_fast
814 {
815   /* R1 = s; R2 = d */
816   W_ r, s;
817
818   s = R1;
819   if (s == 0) {
820     r = 0;
821   } else {
822     r = W_[R2 + SIZEOF_StgArrWords];
823     if (%lt(s,0)) {
824         r = -r;
825     }
826   }
827   /* Result parked in R1, return via info-pointer at TOS */
828   R1 = r;
829   jump %ENTRY_CODE(Sp(0));
830 }
831
832 decodeFloatzh_fast
833
834     W_ p;
835     F_ arg;
836     FETCH_MP_TEMP(mp_tmp1);
837     FETCH_MP_TEMP(mp_tmp_w);
838     
839     /* arguments: F1 = Float# */
840     arg = F1;
841     
842     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
843     
844     /* Be prepared to tell Lennart-coded __decodeFloat
845        where mantissa._mp_d can be put (it does not care about the rest) */
846     p = Hp - SIZEOF_StgArrWords;
847     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
848     StgArrWords_words(p) = 1;
849     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
850     
851     /* Perform the operation */
852     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg);
853     
854     /* returns: (Int# (expn), Int#, ByteArray#) */
855     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
856 }
857
858 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
859 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
860
861 decodeDoublezh_fast
862
863     D_ arg;
864     W_ p;
865     FETCH_MP_TEMP(mp_tmp1);
866     FETCH_MP_TEMP(mp_tmp_w);
867
868     /* arguments: D1 = Double# */
869     arg = D1;
870
871     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
872     
873     /* Be prepared to tell Lennart-coded __decodeDouble
874        where mantissa.d can be put (it does not care about the rest) */
875     p = Hp - ARR_SIZE + WDS(1);
876     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
877     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
878     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
879
880     /* Perform the operation */
881     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg);
882     
883     /* returns: (Int# (expn), Int#, ByteArray#) */
884     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
885 }
886
887 /* -----------------------------------------------------------------------------
888  * Concurrency primitives
889  * -------------------------------------------------------------------------- */
890
891 forkzh_fast
892 {
893   /* args: R1 = closure to spark */
894   
895   MAYBE_GC(R1_PTR, forkzh_fast);
896
897   // create it right now, return ThreadID in R1
898   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
899                                    R1 "ptr");
900   foreign "C" scheduleThread(R1 "ptr");
901
902   // switch at the earliest opportunity
903   CInt[context_switch] = 1 :: CInt;
904   
905   RET_P(R1);
906 }
907
908 yieldzh_fast
909 {
910   jump stg_yield_noregs;
911 }
912
913 myThreadIdzh_fast
914 {
915   /* no args. */
916   RET_P(CurrentTSO);
917 }
918
919 labelThreadzh_fast
920 {
921   /* args: 
922         R1 = ThreadId#
923         R2 = Addr# */
924 #ifdef DEBUG
925   foreign "C" labelThread(R1 "ptr", R2 "ptr");
926 #endif
927   jump %ENTRY_CODE(Sp(0));
928 }
929
930 isCurrentThreadBoundzh_fast
931 {
932   /* no args */
933   W_ r;
934   r = foreign "C" isThreadBound(CurrentTSO);
935   RET_N(r);
936 }
937
938
939 /* -----------------------------------------------------------------------------
940  * TVar primitives
941  * -------------------------------------------------------------------------- */
942
943 #ifdef REG_R1
944 #define SP_OFF 0
945 #define IF_NOT_REG_R1(x) 
946 #else
947 #define SP_OFF 1
948 #define IF_NOT_REG_R1(x) x
949 #endif
950
951 // Catch retry frame ------------------------------------------------------------
952
953 #define CATCH_RETRY_FRAME_ERROR(label) \
954   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
955
956 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
957 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
958 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
959 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
960 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
961 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
962 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
963 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
964
965 #if MAX_VECTORED_RTN > 8
966 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
967 #endif
968
969 #if defined(PROFILING)
970 #define CATCH_RETRY_FRAME_BITMAP 7
971 #define CATCH_RETRY_FRAME_WORDS  6
972 #else
973 #define CATCH_RETRY_FRAME_BITMAP 1
974 #define CATCH_RETRY_FRAME_WORDS  4
975 #endif
976
977 INFO_TABLE_RET(stg_catch_retry_frame,
978                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
979                CATCH_RETRY_FRAME,
980                stg_catch_retry_frame_0_ret,
981                stg_catch_retry_frame_1_ret,
982                stg_catch_retry_frame_2_ret,
983                stg_catch_retry_frame_3_ret,
984                stg_catch_retry_frame_4_ret,
985                stg_catch_retry_frame_5_ret,
986                stg_catch_retry_frame_6_ret,
987                stg_catch_retry_frame_7_ret)
988 {
989    W_ r, frame, trec, outer;
990    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
991
992    frame = Sp;
993    trec = StgTSO_trec(CurrentTSO);
994    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
995    r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
996    if (r) {
997      /* Succeeded (either first branch or second branch) */
998      StgTSO_trec(CurrentTSO) = outer;
999      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1000      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1001      jump %ENTRY_CODE(Sp(SP_OFF));
1002    } else {
1003      /* Did not commit: retry */
1004      W_ new_trec;
1005      "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1006      StgTSO_trec(CurrentTSO) = new_trec;
1007      if (StgCatchRetryFrame_running_alt_code(frame)) {
1008        R1 = StgCatchRetryFrame_alt_code(frame);
1009      } else {
1010        R1 = StgCatchRetryFrame_first_code(frame);
1011        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1012      }
1013      Sp_adj(-1);
1014      jump RET_LBL(stg_ap_v);
1015    }
1016 }
1017
1018
1019 // Atomically frame -------------------------------------------------------------
1020
1021
1022 #define ATOMICALLY_FRAME_ERROR(label) \
1023   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1024
1025 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1026 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1027 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1028 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1029 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1030 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1031 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1032 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1033
1034 #if MAX_VECTORED_RTN > 8
1035 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1036 #endif
1037
1038 #if defined(PROFILING)
1039 #define ATOMICALLY_FRAME_BITMAP 7
1040 #define ATOMICALLY_FRAME_WORDS  4
1041 #else
1042 #define ATOMICALLY_FRAME_BITMAP 1
1043 #define ATOMICALLY_FRAME_WORDS  2
1044 #endif
1045
1046
1047 INFO_TABLE_RET(stg_atomically_frame,
1048                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1049                ATOMICALLY_FRAME,
1050                stg_atomically_frame_0_ret,
1051                stg_atomically_frame_1_ret,
1052                stg_atomically_frame_2_ret,
1053                stg_atomically_frame_3_ret,
1054                stg_atomically_frame_4_ret,
1055                stg_atomically_frame_5_ret,
1056                stg_atomically_frame_6_ret,
1057                stg_atomically_frame_7_ret)
1058 {
1059    W_ frame, trec, valid;
1060    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1061
1062    frame = Sp;
1063    trec = StgTSO_trec(CurrentTSO);
1064    if (StgAtomicallyFrame_waiting(frame)) {
1065      /* The TSO is currently waiting: should we stop waiting? */
1066      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1067      if (valid) {
1068        /* Previous attempt is still valid: no point trying again yet */
1069           IF_NOT_REG_R1(Sp_adj(-2);
1070                         Sp(1) = stg_NO_FINALIZER_closure;
1071                         Sp(0) = stg_ut_1_0_unreg_info;)
1072        jump stg_block_noregs;
1073      } else {
1074        /* Previous attempt is no longer valid: try again */
1075        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1076        StgTSO_trec(CurrentTSO) = trec;
1077        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1078        R1 = StgAtomicallyFrame_code(frame);
1079        Sp_adj(-1);
1080        jump RET_LBL(stg_ap_v);
1081      }
1082    } else {
1083      /* The TSO is not currently waiting: try to commit the transaction */
1084      valid = foreign "C" stmCommitTransaction(BaseReg "ptr", trec "ptr");
1085      if (valid) {
1086        /* Transaction was valid: commit succeeded */
1087        StgTSO_trec(CurrentTSO) = NO_TREC;
1088        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1089        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1090        jump %ENTRY_CODE(Sp(SP_OFF));
1091      } else {
1092        /* Transaction was not valid: try again */
1093        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1094        StgTSO_trec(CurrentTSO) = trec;
1095        R1 = StgAtomicallyFrame_code(frame);
1096        Sp_adj(-1);
1097        jump RET_LBL(stg_ap_v);
1098      }
1099    }
1100 }
1101
1102
1103 // STM catch frame --------------------------------------------------------------
1104
1105 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1106    label                                                   \
1107    {                                                       \
1108       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1109       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1110       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1111       jump ret;                                            \
1112    }
1113
1114 #ifdef REG_R1
1115 #define SP_OFF 0
1116 #else
1117 #define SP_OFF 1
1118 #endif
1119
1120 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1121 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1123 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1124 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1125 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1126 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1127 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1128
1129 #if MAX_VECTORED_RTN > 8
1130 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1131 #endif
1132
1133 #if defined(PROFILING)
1134 #define CATCH_STM_FRAME_BITMAP 3
1135 #define CATCH_STM_FRAME_WORDS  3
1136 #else
1137 #define CATCH_STM_FRAME_BITMAP 0
1138 #define CATCH_STM_FRAME_WORDS  1
1139 #endif
1140
1141 /* Catch frames are very similar to update frames, but when entering
1142  * one we just pop the frame off the stack and perform the correct
1143  * kind of return to the activation record underneath us on the stack.
1144  */
1145
1146 INFO_TABLE_RET(stg_catch_stm_frame,
1147                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1148                CATCH_STM_FRAME,
1149                stg_catch_stm_frame_0_ret,
1150                stg_catch_stm_frame_1_ret,
1151                stg_catch_stm_frame_2_ret,
1152                stg_catch_stm_frame_3_ret,
1153                stg_catch_stm_frame_4_ret,
1154                stg_catch_stm_frame_5_ret,
1155                stg_catch_stm_frame_6_ret,
1156                stg_catch_stm_frame_7_ret)
1157 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1158
1159
1160 // Primop definition ------------------------------------------------------------
1161
1162 atomicallyzh_fast
1163 {
1164   W_ frame;
1165   W_ old_trec;
1166   W_ new_trec;
1167   
1168   // stmStartTransaction may allocate
1169   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1170
1171   /* Args: R1 = m :: STM a */
1172   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1173
1174   /* Set up the atomically frame */
1175   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1176   frame = Sp;
1177
1178   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1179   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1180   StgAtomicallyFrame_code(frame) = R1;
1181
1182   /* Start the memory transcation */
1183   old_trec = StgTSO_trec(CurrentTSO);
1184   ASSERT(old_trec == NO_TREC);
1185   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", old_trec "ptr");
1186   StgTSO_trec(CurrentTSO) = new_trec;
1187
1188   /* Apply R1 to the realworld token */
1189   Sp_adj(-1);
1190   jump RET_LBL(stg_ap_v);
1191 }
1192
1193
1194 catchSTMzh_fast
1195 {
1196   W_ frame;
1197   
1198   /* Args: R1 :: STM a */
1199   /* Args: R2 :: Exception -> STM a */
1200   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1201
1202   /* Set up the catch frame */
1203   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1204   frame = Sp;
1205
1206   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1207   StgCatchSTMFrame_handler(frame) = R2;
1208
1209   /* Apply R1 to the realworld token */
1210   Sp_adj(-1);
1211   jump RET_LBL(stg_ap_v);
1212 }
1213
1214
1215 catchRetryzh_fast
1216 {
1217   W_ frame;
1218   W_ new_trec;
1219   W_ trec;
1220
1221   // stmStartTransaction may allocate
1222   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1223
1224   /* Args: R1 :: STM a */
1225   /* Args: R2 :: STM a */
1226   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1227
1228   /* Start a nested transaction within which to run the first code */
1229   trec = StgTSO_trec(CurrentTSO);
1230   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", trec "ptr");
1231   StgTSO_trec(CurrentTSO) = new_trec;
1232
1233   /* Set up the catch-retry frame */
1234   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1235   frame = Sp;
1236   
1237   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1238   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1239   StgCatchRetryFrame_first_code(frame) = R1;
1240   StgCatchRetryFrame_alt_code(frame) = R2;
1241   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1242
1243   /* Apply R1 to the realworld token */
1244   Sp_adj(-1);
1245   jump RET_LBL(stg_ap_v);  
1246 }
1247
1248
1249 retryzh_fast
1250 {
1251   W_ frame_type;
1252   W_ frame;
1253   W_ trec;
1254   W_ outer;
1255   W_ r;
1256
1257   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1258
1259   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1260 retry_pop_stack:
1261   trec = StgTSO_trec(CurrentTSO);
1262   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1263   StgTSO_sp(CurrentTSO) = Sp;
1264   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1265   Sp = StgTSO_sp(CurrentTSO);
1266   frame = Sp;
1267
1268   if (frame_type == CATCH_RETRY_FRAME) {
1269     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1270     ASSERT(outer != NO_TREC);
1271     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1272       // Retry in the first code: try the alternative
1273       "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1274       StgTSO_trec(CurrentTSO) = trec;
1275       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1276       R1 = StgCatchRetryFrame_alt_code(frame);
1277       Sp_adj(-1);
1278       jump RET_LBL(stg_ap_v);
1279     } else {
1280       // Retry in the alternative code: propagate
1281       W_ other_trec;
1282       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1283       r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", other_trec "ptr");
1284       if (r) {
1285         r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
1286       }
1287       if (r) {
1288         // Merge between siblings succeeded: commit it back to enclosing transaction
1289         // and then propagate the retry
1290         StgTSO_trec(CurrentTSO) = outer;
1291         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1292         goto retry_pop_stack;
1293       } else {
1294         // Merge failed: we musn't propagate the retry.  Try both paths again.
1295         "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1296         StgCatchRetryFrame_first_code_trec(frame) = trec;
1297         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1298         StgTSO_trec(CurrentTSO) = trec;
1299         R1 = StgCatchRetryFrame_first_code(frame);
1300         Sp_adj(-1);
1301         jump RET_LBL(stg_ap_v);
1302       }
1303     }
1304   }
1305
1306   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1307   ASSERT(frame_type == ATOMICALLY_FRAME);
1308   ASSERT(outer == NO_TREC);
1309   r = foreign "C" stmWait(BaseReg "ptr", CurrentTSO "ptr", trec "ptr");
1310   if (r) {
1311     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1312     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1313     Sp = frame;
1314     // Fix up the stack in the unregisterised case: the return convention is different.
1315     IF_NOT_REG_R1(Sp_adj(-2); 
1316                   Sp(1) = stg_NO_FINALIZER_closure;
1317                   Sp(0) = stg_ut_1_0_unreg_info;)
1318     jump stg_block_noregs;
1319   } else {
1320     // Transaction was not valid: retry immediately
1321     "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1322     StgTSO_trec(CurrentTSO) = trec;
1323     R1 = StgAtomicallyFrame_code(frame);
1324     Sp = frame;
1325     Sp_adj(-1);
1326     jump RET_LBL(stg_ap_v);
1327   }
1328 }
1329
1330
1331 newTVarzh_fast
1332 {
1333   W_ tv;
1334   W_ new_value;
1335
1336   /* Args: R1 = initialisation value */
1337
1338   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1339   new_value = R1;
1340   tv = foreign "C" stmNewTVar(BaseReg "ptr", new_value "ptr");
1341   RET_P(tv);
1342 }
1343
1344
1345 readTVarzh_fast
1346 {
1347   W_ trec;
1348   W_ tvar;
1349   W_ result;
1350
1351   /* Args: R1 = TVar closure */
1352
1353   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1354   trec = StgTSO_trec(CurrentTSO);
1355   tvar = R1;
1356   "ptr" result = foreign "C" stmReadTVar(BaseReg "ptr", trec "ptr", tvar "ptr");
1357
1358   RET_P(result);
1359 }
1360
1361
1362 writeTVarzh_fast
1363 {
1364   W_ trec;
1365   W_ tvar;
1366   W_ new_value;
1367   
1368   /* Args: R1 = TVar closure */
1369   /*       R2 = New value    */
1370
1371   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1372   trec = StgTSO_trec(CurrentTSO);
1373   tvar = R1;
1374   new_value = R2;
1375   foreign "C" stmWriteTVar(BaseReg "ptr", trec "ptr", tvar "ptr", new_value "ptr");
1376
1377   jump %ENTRY_CODE(Sp(0));
1378 }
1379
1380
1381 /* -----------------------------------------------------------------------------
1382  * MVar primitives
1383  *
1384  * take & putMVar work as follows.  Firstly, an important invariant:
1385  *
1386  *    If the MVar is full, then the blocking queue contains only
1387  *    threads blocked on putMVar, and if the MVar is empty then the
1388  *    blocking queue contains only threads blocked on takeMVar.
1389  *
1390  * takeMvar:
1391  *    MVar empty : then add ourselves to the blocking queue
1392  *    MVar full  : remove the value from the MVar, and
1393  *                 blocking queue empty     : return
1394  *                 blocking queue non-empty : perform the first blocked putMVar
1395  *                                            from the queue, and wake up the
1396  *                                            thread (MVar is now full again)
1397  *
1398  * putMVar is just the dual of the above algorithm.
1399  *
1400  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1401  * the stack of the thread waiting to do the putMVar.  See
1402  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1403  * the stack layout, and the PerformPut and PerformTake macros below.
1404  *
1405  * It is important that a blocked take or put is woken up with the
1406  * take/put already performed, because otherwise there would be a
1407  * small window of vulnerability where the thread could receive an
1408  * exception and never perform its take or put, and we'd end up with a
1409  * deadlock.
1410  *
1411  * -------------------------------------------------------------------------- */
1412
1413 isEmptyMVarzh_fast
1414 {
1415     /* args: R1 = MVar closure */
1416
1417     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1418         RET_N(1);
1419     } else {
1420         RET_N(0);
1421     }
1422 }
1423
1424 newMVarzh_fast
1425 {
1426     /* args: none */
1427     W_ mvar;
1428
1429     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1430   
1431     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1432     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1433     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1434     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1435     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1436     RET_P(mvar);
1437 }
1438
1439
1440 /* If R1 isn't available, pass it on the stack */
1441 #ifdef REG_R1
1442 #define PerformTake(tso, value)                         \
1443     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1444     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1445 #else
1446 #define PerformTake(tso, value)                                 \
1447     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1448     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1449 #endif
1450
1451 #define PerformPut(tso,lval)                    \
1452     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1453     lval = W_[StgTSO_sp(tso) - WDS(1)];
1454
1455 takeMVarzh_fast
1456 {
1457     W_ mvar, val, info, tso;
1458
1459     /* args: R1 = MVar closure */
1460     mvar = R1;
1461
1462 #if defined(SMP)
1463     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1464 #else
1465     info = GET_INFO(mvar);
1466 #endif
1467
1468     /* If the MVar is empty, put ourselves on its blocking queue,
1469      * and wait until we're woken up.
1470      */
1471     if (info == stg_EMPTY_MVAR_info) {
1472         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1473             StgMVar_head(mvar) = CurrentTSO;
1474         } else {
1475             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1476         }
1477         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1478         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1479         StgTSO_block_info(CurrentTSO)  = mvar;
1480         StgMVar_tail(mvar) = CurrentTSO;
1481         
1482         jump stg_block_takemvar;
1483   }
1484
1485   /* we got the value... */
1486   val = StgMVar_value(mvar);
1487
1488   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1489   {
1490       /* There are putMVar(s) waiting... 
1491        * wake up the first thread on the queue
1492        */
1493       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1494
1495       /* actually perform the putMVar for the thread that we just woke up */
1496       tso = StgMVar_head(mvar);
1497       PerformPut(tso,StgMVar_value(mvar));
1498
1499 #if defined(GRAN) || defined(PAR)
1500       /* ToDo: check 2nd arg (mvar) is right */
1501       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1502       StgMVar_head(mvar) = tso;
1503 #else
1504       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1505       StgMVar_head(mvar) = tso;
1506 #endif
1507
1508       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1509           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1510       }
1511
1512 #if defined(SMP)
1513       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1514 #endif
1515       RET_P(val);
1516   } 
1517   else
1518   {
1519       /* No further putMVars, MVar is now empty */
1520       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1521  
1522 #if defined(SMP)
1523       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1524 #else
1525       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1526 #endif
1527
1528       RET_P(val);
1529   }
1530 }
1531
1532
1533 tryTakeMVarzh_fast
1534 {
1535     W_ mvar, val, info, tso;
1536
1537     /* args: R1 = MVar closure */
1538
1539     mvar = R1;
1540
1541 #if defined(SMP)
1542     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1543 #else
1544     info = GET_INFO(mvar);
1545 #endif
1546
1547     if (info == stg_EMPTY_MVAR_info) {
1548 #if defined(SMP)
1549         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1550 #endif
1551         /* HACK: we need a pointer to pass back, 
1552          * so we abuse NO_FINALIZER_closure
1553          */
1554         RET_NP(0, stg_NO_FINALIZER_closure);
1555     }
1556
1557     /* we got the value... */
1558     val = StgMVar_value(mvar);
1559
1560     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1561
1562         /* There are putMVar(s) waiting... 
1563          * wake up the first thread on the queue
1564          */
1565         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1566
1567         /* actually perform the putMVar for the thread that we just woke up */
1568         tso = StgMVar_head(mvar);
1569         PerformPut(tso,StgMVar_value(mvar));
1570
1571 #if defined(GRAN) || defined(PAR)
1572         /* ToDo: check 2nd arg (mvar) is right */
1573         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1574         StgMVar_head(mvar) = tso;
1575 #else
1576         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1577         StgMVar_head(mvar) = tso;
1578 #endif
1579
1580         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1581             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1582         }
1583 #if defined(SMP)
1584         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1585 #endif
1586     }
1587     else 
1588     {
1589         /* No further putMVars, MVar is now empty */
1590         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1591 #if defined(SMP)
1592         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1593 #else
1594         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1595 #endif
1596     }
1597     
1598     RET_NP(1, val);
1599 }
1600
1601
1602 putMVarzh_fast
1603 {
1604     W_ mvar, info, tso;
1605
1606     /* args: R1 = MVar, R2 = value */
1607     mvar = R1;
1608
1609 #if defined(SMP)
1610     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1611 #else
1612     info = GET_INFO(mvar);
1613 #endif
1614
1615     if (info == stg_FULL_MVAR_info) {
1616         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1617             StgMVar_head(mvar) = CurrentTSO;
1618         } else {
1619             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1620         }
1621         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1622         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1623         StgTSO_block_info(CurrentTSO)  = mvar;
1624         StgMVar_tail(mvar) = CurrentTSO;
1625         
1626         jump stg_block_putmvar;
1627     }
1628   
1629     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1630
1631         /* There are takeMVar(s) waiting: wake up the first one
1632          */
1633         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1634
1635         /* actually perform the takeMVar */
1636         tso = StgMVar_head(mvar);
1637         PerformTake(tso, R2);
1638       
1639 #if defined(GRAN) || defined(PAR)
1640         /* ToDo: check 2nd arg (mvar) is right */
1641         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1642         StgMVar_head(mvar) = tso;
1643 #else
1644         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1645         StgMVar_head(mvar) = tso;
1646 #endif
1647
1648         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1649             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1650         }
1651
1652 #if defined(SMP)
1653         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1654 #endif
1655         jump %ENTRY_CODE(Sp(0));
1656     }
1657     else
1658     {
1659         /* No further takes, the MVar is now full. */
1660         StgMVar_value(mvar) = R2;
1661
1662 #if defined(SMP)
1663         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1664 #else
1665         SET_INFO(mvar,stg_FULL_MVAR_info);
1666 #endif
1667         jump %ENTRY_CODE(Sp(0));
1668     }
1669     
1670     /* ToDo: yield afterward for better communication performance? */
1671 }
1672
1673
1674 tryPutMVarzh_fast
1675 {
1676     W_ mvar, info, tso;
1677
1678     /* args: R1 = MVar, R2 = value */
1679     mvar = R1;
1680
1681 #if defined(SMP)
1682     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1683 #else
1684     info = GET_INFO(mvar);
1685 #endif
1686
1687     if (info == stg_FULL_MVAR_info) {
1688 #if defined(SMP)
1689         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1690 #endif
1691         RET_N(0);
1692     }
1693   
1694     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1695
1696         /* There are takeMVar(s) waiting: wake up the first one
1697          */
1698         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1699         
1700         /* actually perform the takeMVar */
1701         tso = StgMVar_head(mvar);
1702         PerformTake(tso, R2);
1703       
1704 #if defined(GRAN) || defined(PAR)
1705         /* ToDo: check 2nd arg (mvar) is right */
1706         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1707         StgMVar_head(mvar) = tso;
1708 #else
1709         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1710         StgMVar_head(mvar) = tso;
1711 #endif
1712
1713         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1714             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1715         }
1716
1717 #if defined(SMP)
1718         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1719 #endif
1720         jump %ENTRY_CODE(Sp(0));
1721     }
1722     else
1723     {
1724         /* No further takes, the MVar is now full. */
1725         StgMVar_value(mvar) = R2;
1726
1727 #if defined(SMP)
1728         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1729 #else
1730         SET_INFO(mvar,stg_FULL_MVAR_info);
1731 #endif
1732         jump %ENTRY_CODE(Sp(0));
1733     }
1734     
1735     /* ToDo: yield afterward for better communication performance? */
1736 }
1737
1738
1739 /* -----------------------------------------------------------------------------
1740    Stable pointer primitives
1741    -------------------------------------------------------------------------  */
1742
1743 makeStableNamezh_fast
1744 {
1745     W_ index, sn_obj;
1746
1747     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1748   
1749     index = foreign "C" lookupStableName(R1 "ptr");
1750
1751     /* Is there already a StableName for this heap object?
1752      *  stable_ptr_table is a pointer to an array of snEntry structs.
1753      */
1754     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1755         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1756         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1757         StgStableName_sn(sn_obj) = index;
1758         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1759     } else {
1760         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1761     }
1762     
1763     RET_P(sn_obj);
1764 }
1765
1766
1767 makeStablePtrzh_fast
1768 {
1769     /* Args: R1 = a */
1770     W_ sp;
1771     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1772     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1773     RET_N(sp);
1774 }
1775
1776 deRefStablePtrzh_fast
1777 {
1778     /* Args: R1 = the stable ptr */
1779     W_ r, sp;
1780     sp = R1;
1781     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1782     RET_P(r);
1783 }
1784
1785 /* -----------------------------------------------------------------------------
1786    Bytecode object primitives
1787    -------------------------------------------------------------------------  */
1788
1789 newBCOzh_fast
1790 {
1791     /* R1 = instrs
1792        R2 = literals
1793        R3 = ptrs
1794        R4 = itbls
1795        R5 = arity
1796        R6 = bitmap array
1797     */
1798     W_ bco, bitmap_arr, bytes, words;
1799     
1800     bitmap_arr = R6;
1801     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1802     bytes = WDS(words);
1803
1804     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1805
1806     bco = Hp - bytes + WDS(1);
1807     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1808     
1809     StgBCO_instrs(bco)     = R1;
1810     StgBCO_literals(bco)   = R2;
1811     StgBCO_ptrs(bco)       = R3;
1812     StgBCO_itbls(bco)      = R4;
1813     StgBCO_arity(bco)      = HALF_W_(R5);
1814     StgBCO_size(bco)       = HALF_W_(words);
1815     
1816     // Copy the arity/bitmap info into the BCO
1817     W_ i;
1818     i = 0;
1819 for:
1820     if (i < StgArrWords_words(bitmap_arr)) {
1821         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1822         i = i + 1;
1823         goto for;
1824     }
1825     
1826     RET_P(bco);
1827 }
1828
1829
1830 mkApUpd0zh_fast
1831 {
1832     // R1 = the BCO# for the AP
1833     //  
1834     W_ ap;
1835
1836     // This function is *only* used to wrap zero-arity BCOs in an
1837     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1838     // saturated and always points directly to a FUN or BCO.
1839     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1840            StgBCO_arity(R1) == HALF_W_(0));
1841
1842     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1843     TICK_ALLOC_UP_THK(0, 0);
1844     CCCS_ALLOC(SIZEOF_StgAP);
1845
1846     ap = Hp - SIZEOF_StgAP + WDS(1);
1847     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1848     
1849     StgAP_n_args(ap) = HALF_W_(0);
1850     StgAP_fun(ap) = R1;
1851     
1852     RET_P(ap);
1853 }
1854
1855 /* -----------------------------------------------------------------------------
1856    Thread I/O blocking primitives
1857    -------------------------------------------------------------------------- */
1858
1859 /* Add a thread to the end of the blocked queue. (C-- version of the C
1860  * macro in Schedule.h).
1861  */
1862 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1863     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1864     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1865       W_[blocked_queue_hd] = tso;                       \
1866     } else {                                            \
1867       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1868     }                                                   \
1869     W_[blocked_queue_tl] = tso;
1870
1871 waitReadzh_fast
1872 {
1873     /* args: R1 */
1874 #ifdef THREADED_RTS
1875     foreign "C" barf("waitRead# on threaded RTS");
1876 #endif
1877
1878     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1879     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1880     StgTSO_block_info(CurrentTSO) = R1;
1881     // No locking - we're not going to use this interface in the
1882     // threaded RTS anyway.
1883     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1884     jump stg_block_noregs;
1885 }
1886
1887 waitWritezh_fast
1888 {
1889     /* args: R1 */
1890 #ifdef THREADED_RTS
1891     foreign "C" barf("waitWrite# on threaded RTS");
1892 #endif
1893
1894     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1895     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1896     StgTSO_block_info(CurrentTSO) = R1;
1897     // No locking - we're not going to use this interface in the
1898     // threaded RTS anyway.
1899     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1900     jump stg_block_noregs;
1901 }
1902
1903
1904 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1905 delayzh_fast
1906 {
1907 #ifdef mingw32_HOST_OS
1908     W_ ares;
1909     CInt reqID;
1910 #else
1911     W_ t, prev, target;
1912 #endif
1913
1914 #ifdef THREADED_RTS
1915     foreign "C" barf("delay# on threaded RTS");
1916 #endif
1917
1918     /* args: R1 (microsecond delay amount) */
1919     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1920     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1921
1922 #ifdef mingw32_HOST_OS
1923
1924     /* could probably allocate this on the heap instead */
1925     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1926                                             stg_delayzh_malloc_str);
1927     reqID = foreign "C" addDelayRequest(R1);
1928     StgAsyncIOResult_reqID(ares)   = reqID;
1929     StgAsyncIOResult_len(ares)     = 0;
1930     StgAsyncIOResult_errCode(ares) = 0;
1931     StgTSO_block_info(CurrentTSO)  = ares;
1932
1933     /* Having all async-blocked threads reside on the blocked_queue
1934      * simplifies matters, so change the status to OnDoProc put the
1935      * delayed thread on the blocked_queue.
1936      */
1937     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1938     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1939     jump stg_block_async_void;
1940
1941 #else
1942
1943     W_ time;
1944     time = foreign "C" getourtimeofday();
1945     target = (R1 / (TICK_MILLISECS*1000)) + time;
1946     StgTSO_block_info(CurrentTSO) = target;
1947
1948     /* Insert the new thread in the sleeping queue. */
1949     prev = NULL;
1950     t = W_[sleeping_queue];
1951 while:
1952     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1953         prev = t;
1954         t = StgTSO_link(t);
1955         goto while;
1956     }
1957
1958     StgTSO_link(CurrentTSO) = t;
1959     if (prev == NULL) {
1960         W_[sleeping_queue] = CurrentTSO;
1961     } else {
1962         StgTSO_link(prev) = CurrentTSO;
1963     }
1964     jump stg_block_noregs;
1965 #endif
1966 }
1967
1968
1969 #ifdef mingw32_HOST_OS
1970 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1971 asyncReadzh_fast
1972 {
1973     W_ ares;
1974     CInt reqID;
1975
1976 #ifdef THREADED_RTS
1977     foreign "C" barf("asyncRead# on threaded RTS");
1978 #endif
1979
1980     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1981     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1982     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1983
1984     /* could probably allocate this on the heap instead */
1985     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1986                                             stg_asyncReadzh_malloc_str);
1987     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1988     StgAsyncIOResult_reqID(ares)   = reqID;
1989     StgAsyncIOResult_len(ares)     = 0;
1990     StgAsyncIOResult_errCode(ares) = 0;
1991     StgTSO_block_info(CurrentTSO)  = ares;
1992     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1993     jump stg_block_async;
1994 }
1995
1996 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1997 asyncWritezh_fast
1998 {
1999     W_ ares;
2000     CInt reqID;
2001
2002 #ifdef THREADED_RTS
2003     foreign "C" barf("asyncWrite# on threaded RTS");
2004 #endif
2005
2006     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2007     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2008     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2009
2010     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2011                                             stg_asyncWritezh_malloc_str);
2012     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
2013
2014     StgAsyncIOResult_reqID(ares)   = reqID;
2015     StgAsyncIOResult_len(ares)     = 0;
2016     StgAsyncIOResult_errCode(ares) = 0;
2017     StgTSO_block_info(CurrentTSO)  = ares;
2018     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2019     jump stg_block_async;
2020 }
2021
2022 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2023 asyncDoProczh_fast
2024 {
2025     W_ ares;
2026     CInt reqID;
2027
2028     /* args: R1 = proc, R2 = param */
2029     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2030     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2031
2032     /* could probably allocate this on the heap instead */
2033     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2034                                             stg_asyncDoProczh_malloc_str);
2035     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2036     StgAsyncIOResult_reqID(ares)   = reqID;
2037     StgAsyncIOResult_len(ares)     = 0;
2038     StgAsyncIOResult_errCode(ares) = 0;
2039     StgTSO_block_info(CurrentTSO) = ares;
2040     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2041     jump stg_block_async;
2042 }
2043 #endif
2044
2045 /* -----------------------------------------------------------------------------
2046   ** temporary **
2047
2048    classes CCallable and CReturnable don't really exist, but the
2049    compiler insists on generating dictionaries containing references
2050    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2051    for these.  Some C compilers can't cope with zero-length static arrays,
2052    so we have to make these one element long.
2053   --------------------------------------------------------------------------- */
2054
2055 section "rodata" {
2056   GHC_ZCCCallable_static_info:   W_ 0;
2057 }
2058
2059 section "rodata" {
2060   GHC_ZCCReturnable_static_info: W_ 0;
2061 }