[project @ 2005-07-25 14:12:48 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Weak Pointer Primitives
244    -------------------------------------------------------------------------- */
245
246 STRING(stg_weak_msg,"New weak pointer at %p\n")
247
248 mkWeakzh_fast
249 {
250   /* R1 = key
251      R2 = value
252      R3 = finalizer (or NULL)
253   */
254   W_ w;
255
256   if (R3 == NULL) {
257     R3 = stg_NO_FINALIZER_closure;
258   }
259
260   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
261
262   w = Hp - SIZEOF_StgWeak + WDS(1);
263   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
264
265   StgWeak_key(w)       = R1;
266   StgWeak_value(w)     = R2;
267   StgWeak_finalizer(w) = R3;
268
269   StgWeak_link(w)       = W_[weak_ptr_list];
270   W_[weak_ptr_list]     = w;
271
272   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
273
274   RET_P(w);
275 }
276
277
278 finalizzeWeakzh_fast
279 {
280   /* R1 = weak ptr
281    */
282   W_ w, f;
283
284   w = R1;
285
286   // already dead?
287   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
288       RET_NP(0,stg_NO_FINALIZER_closure);
289   }
290
291   // kill it
292 #ifdef PROFILING
293   // @LDV profiling
294   // A weak pointer is inherently used, so we do not need to call
295   // LDV_recordDead_FILL_SLOP_DYNAMIC():
296   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
297   // or, LDV_recordDead():
298   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
299   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
300   // large as weak pointers, so there is no need to fill the slop, either.
301   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
302 #endif
303
304   //
305   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
306   //
307   SET_INFO(w,stg_DEAD_WEAK_info);
308   LDV_RECORD_CREATE(w);
309
310   f = StgWeak_finalizer(w);
311   StgDeadWeak_link(w) = StgWeak_link(w);
312
313   /* return the finalizer */
314   if (f == stg_NO_FINALIZER_closure) {
315       RET_NP(0,stg_NO_FINALIZER_closure);
316   } else {
317       RET_NP(1,f);
318   }
319 }
320
321 deRefWeakzh_fast
322 {
323   /* R1 = weak ptr */
324   W_ w, code, val;
325
326   w = R1;
327   if (GET_INFO(w) == stg_WEAK_info) {
328     code = 1;
329     val = StgWeak_value(w);
330   } else {
331     code = 0;
332     val = w;
333   }
334   RET_NP(code,val);
335 }
336
337 /* -----------------------------------------------------------------------------
338    Arbitrary-precision Integer operations.
339
340    There are some assumptions in this code that mp_limb_t == W_.  This is
341    the case for all the platforms that GHC supports, currently.
342    -------------------------------------------------------------------------- */
343
344 int2Integerzh_fast
345 {
346    /* arguments: R1 = Int# */
347
348    W_ val, s, p;        /* to avoid aliasing */
349
350    val = R1;
351    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
352
353    p = Hp - SIZEOF_StgArrWords;
354    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
355    StgArrWords_words(p) = 1;
356
357    /* mpz_set_si is inlined here, makes things simpler */
358    if (%lt(val,0)) { 
359         s  = -1;
360         Hp(0) = -val;
361    } else { 
362      if (%gt(val,0)) {
363         s = 1;
364         Hp(0) = val;
365      } else {
366         s = 0;
367      }
368   }
369
370    /* returns (# size  :: Int#, 
371                  data  :: ByteArray# 
372                #)
373    */
374    RET_NP(s,p);
375 }
376
377 word2Integerzh_fast
378 {
379    /* arguments: R1 = Word# */
380
381    W_ val, s, p;        /* to avoid aliasing */
382
383    val = R1;
384
385    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
386
387    p = Hp - SIZEOF_StgArrWords;
388    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
389    StgArrWords_words(p) = 1;
390
391    if (val != 0) {
392         s = 1;
393         W_[Hp] = val;
394    } else {
395         s = 0;
396    }
397
398    /* returns (# size  :: Int#, 
399                  data  :: ByteArray# #)
400    */
401    RET_NP(s,p);
402 }
403
404
405 /*
406  * 'long long' primops for converting to/from Integers.
407  */
408
409 #ifdef SUPPORT_LONG_LONGS
410
411 int64ToIntegerzh_fast
412 {
413    /* arguments: L1 = Int64# */
414
415    L_ val;
416    W_ hi, s, neg, words_needed, p;
417
418    val = L1;
419    neg = 0;
420
421    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
422        words_needed = 2;
423    } else { 
424        // minimum is one word
425        words_needed = 1;
426    }
427
428    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
429                NO_PTRS, int64ToIntegerzh_fast );
430
431    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
432    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
433    StgArrWords_words(p) = words_needed;
434
435    if ( %lt(val,0::L_) ) {
436      neg = 1;
437      val = -val;
438    }
439
440    hi = TO_W_(val >> 32);
441
442    if ( words_needed == 2 )  { 
443       s = 2;
444       Hp(-1) = TO_W_(val);
445       Hp(0) = hi;
446    } else { 
447        if ( val != 0::L_ ) {
448            s = 1;
449            Hp(0) = TO_W_(val);
450        } else /* val==0 */  {
451            s = 0;
452        }
453    }
454    if ( neg != 0 ) {
455         s = -s;
456    }
457
458    /* returns (# size  :: Int#, 
459                  data  :: ByteArray# #)
460    */
461    RET_NP(s,p);
462 }
463
464 word64ToIntegerzh_fast
465 {
466    /* arguments: L1 = Word64# */
467
468    L_ val;
469    W_ hi, s, words_needed, p;
470
471    val = L1;
472    if ( val >= 0x100000000::L_ ) {
473       words_needed = 2;
474    } else {
475       words_needed = 1;
476    }
477
478    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
479                NO_PTRS, word64ToIntegerzh_fast );
480
481    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
482    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
483    StgArrWords_words(p) = words_needed;
484
485    hi = TO_W_(val >> 32);
486    if ( val >= 0x100000000::L_ ) { 
487      s = 2;
488      Hp(-1) = TO_W_(val);
489      Hp(0)  = hi;
490    } else {
491       if ( val != 0::L_ ) {
492         s = 1;
493         Hp(0) = TO_W_(val);
494      } else /* val==0 */  {
495       s = 0;
496      }
497   }
498
499    /* returns (# size  :: Int#, 
500                  data  :: ByteArray# #)
501    */
502    RET_NP(s,p);
503 }
504
505
506 #endif /* SUPPORT_LONG_LONGS */
507
508 /* ToDo: this is shockingly inefficient */
509
510 #ifndef SMP
511 section "bss" {
512   mp_tmp1:
513     bits8 [SIZEOF_MP_INT];
514 }
515
516 section "bss" {
517   mp_tmp2:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_result1:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result2:
528     bits8 [SIZEOF_MP_INT];
529 }
530 #endif
531
532 #ifdef SMP
533 #define FETCH_MP_TEMP(X) \
534 W_ X; \
535 X = BaseReg + (OFFSET_StgRegTable_r ## X);
536 #else
537 #define FETCH_MP_TEMP(X) /* Nothing */
538 #endif
539
540 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
541 name                                                                    \
542 {                                                                       \
543   CInt s1, s2;                                                          \
544   W_ d1, d2;                                                            \
545   FETCH_MP_TEMP(mp_tmp1);                                               \
546   FETCH_MP_TEMP(mp_tmp2);                                               \
547   FETCH_MP_TEMP(mp_result1)                                             \
548   FETCH_MP_TEMP(mp_result2);                                            \
549                                                                         \
550   /* call doYouWantToGC() */                                            \
551   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
552                                                                         \
553   s1 = W_TO_INT(R1);                                                    \
554   d1 = R2;                                                              \
555   s2 = W_TO_INT(R3);                                                    \
556   d2 = R4;                                                              \
557                                                                         \
558   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
559   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
560   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
561   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
562   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
563   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
564                                                                         \
565   foreign "C" mpz_init(mp_result1 "ptr");                               \
566                                                                         \
567   /* Perform the operation */                                           \
568   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr");   \
569                                                                         \
570   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
571          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
572 }
573
574 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
575 name                                                                    \
576 {                                                                       \
577   CInt s1;                                                              \
578   W_ d1;                                                                \
579   FETCH_MP_TEMP(mp_tmp1);                                               \
580   FETCH_MP_TEMP(mp_result1)                                             \
581                                                                         \
582   /* call doYouWantToGC() */                                            \
583   MAYBE_GC(R2_PTR, name);                                               \
584                                                                         \
585   d1 = R2;                                                              \
586   s1 = W_TO_INT(R1);                                                    \
587                                                                         \
588   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
589   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
590   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
591                                                                         \
592   foreign "C" mpz_init(mp_result1 "ptr");                               \
593                                                                         \
594   /* Perform the operation */                                           \
595   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr");                   \
596                                                                         \
597   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
598          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
599 }
600
601 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
602 name                                                                                    \
603 {                                                                                       \
604   CInt s1, s2;                                                                          \
605   W_ d1, d2;                                                                            \
606   FETCH_MP_TEMP(mp_tmp1);                                                               \
607   FETCH_MP_TEMP(mp_tmp2);                                                               \
608   FETCH_MP_TEMP(mp_result1)                                                             \
609   FETCH_MP_TEMP(mp_result2)                                                             \
610                                                                                         \
611   /* call doYouWantToGC() */                                                            \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
613                                                                                         \
614   s1 = W_TO_INT(R1);                                                                    \
615   d1 = R2;                                                                              \
616   s2 = W_TO_INT(R3);                                                                    \
617   d2 = R4;                                                                              \
618                                                                                         \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
625                                                                                         \
626   foreign "C" mpz_init(mp_result1 "ptr");                                               \
627   foreign "C" mpz_init(mp_result2 "ptr");                                               \
628                                                                                         \
629   /* Perform the operation */                                                           \
630   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr");    \
631                                                                                         \
632   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
633            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
634            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
635            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 #ifndef SMP
654 section "bss" {
655   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
656 }
657 #endif
658
659 gcdIntzh_fast
660 {
661     /* R1 = the first Int#; R2 = the second Int# */
662     W_ r; 
663     FETCH_MP_TEMP(mp_tmp_w);
664
665     W_[mp_tmp_w] = R1;
666     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2);
667
668     R1 = r;
669     /* Result parked in R1, return via info-pointer at TOS */
670     jump %ENTRY_CODE(Sp(0));
671 }
672
673
674 gcdIntegerIntzh_fast
675 {
676     /* R1 = s1; R2 = d1; R3 = the int */
677     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
678     
679     /* Result parked in R1, return via info-pointer at TOS */
680     jump %ENTRY_CODE(Sp(0));
681 }
682
683
684 cmpIntegerIntzh_fast
685 {
686     /* R1 = s1; R2 = d1; R3 = the int */
687     W_ usize, vsize, v_digit, u_digit;
688
689     usize = R1;
690     vsize = 0;
691     v_digit = R3;
692
693     // paraphrased from mpz_cmp_si() in the GMP sources
694     if (%gt(v_digit,0)) {
695         vsize = 1;
696     } else { 
697         if (%lt(v_digit,0)) {
698             vsize = -1;
699             v_digit = -v_digit;
700         }
701     }
702
703     if (usize != vsize) {
704         R1 = usize - vsize; 
705         jump %ENTRY_CODE(Sp(0));
706     }
707
708     if (usize == 0) {
709         R1 = 0; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     u_digit = W_[BYTE_ARR_CTS(R2)];
714
715     if (u_digit == v_digit) {
716         R1 = 0; 
717         jump %ENTRY_CODE(Sp(0));
718     }
719
720     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
721         R1 = usize; 
722     } else {
723         R1 = -usize; 
724     }
725
726     jump %ENTRY_CODE(Sp(0));
727 }
728
729 cmpIntegerzh_fast
730 {
731     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
732     W_ usize, vsize, size, up, vp;
733     CInt cmp;
734
735     // paraphrased from mpz_cmp() in the GMP sources
736     usize = R1;
737     vsize = R3;
738
739     if (usize != vsize) {
740         R1 = usize - vsize; 
741         jump %ENTRY_CODE(Sp(0));
742     }
743
744     if (usize == 0) {
745         R1 = 0; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (%lt(usize,0)) { // NB. not <, which is unsigned
750         size = -usize;
751     } else {
752         size = usize;
753     }
754
755     up = BYTE_ARR_CTS(R2);
756     vp = BYTE_ARR_CTS(R4);
757
758     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
759
760     if (cmp == 0 :: CInt) {
761         R1 = 0; 
762         jump %ENTRY_CODE(Sp(0));
763     }
764
765     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
766         R1 = 1;
767     } else {
768         R1 = (-1); 
769     }
770     /* Result parked in R1, return via info-pointer at TOS */
771     jump %ENTRY_CODE(Sp(0));
772 }
773
774 integer2Intzh_fast
775 {
776     /* R1 = s; R2 = d */
777     W_ r, s;
778
779     s = R1;
780     if (s == 0) {
781         r = 0;
782     } else {
783         r = W_[R2 + SIZEOF_StgArrWords];
784         if (%lt(s,0)) {
785             r = -r;
786         }
787     }
788     /* Result parked in R1, return via info-pointer at TOS */
789     R1 = r;
790     jump %ENTRY_CODE(Sp(0));
791 }
792
793 integer2Wordzh_fast
794 {
795   /* R1 = s; R2 = d */
796   W_ r, s;
797
798   s = R1;
799   if (s == 0) {
800     r = 0;
801   } else {
802     r = W_[R2 + SIZEOF_StgArrWords];
803     if (%lt(s,0)) {
804         r = -r;
805     }
806   }
807   /* Result parked in R1, return via info-pointer at TOS */
808   R1 = r;
809   jump %ENTRY_CODE(Sp(0));
810 }
811
812 decodeFloatzh_fast
813
814     W_ p;
815     F_ arg;
816     FETCH_MP_TEMP(mp_tmp1);
817     FETCH_MP_TEMP(mp_tmp_w);
818     
819     /* arguments: F1 = Float# */
820     arg = F1;
821     
822     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
823     
824     /* Be prepared to tell Lennart-coded __decodeFloat
825        where mantissa._mp_d can be put (it does not care about the rest) */
826     p = Hp - SIZEOF_StgArrWords;
827     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
828     StgArrWords_words(p) = 1;
829     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
830     
831     /* Perform the operation */
832     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg);
833     
834     /* returns: (Int# (expn), Int#, ByteArray#) */
835     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
836 }
837
838 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
839 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
840
841 decodeDoublezh_fast
842
843     D_ arg;
844     W_ p;
845     FETCH_MP_TEMP(mp_tmp1);
846     FETCH_MP_TEMP(mp_tmp_w);
847
848     /* arguments: D1 = Double# */
849     arg = D1;
850
851     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
852     
853     /* Be prepared to tell Lennart-coded __decodeDouble
854        where mantissa.d can be put (it does not care about the rest) */
855     p = Hp - ARR_SIZE + WDS(1);
856     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
857     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
858     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
859
860     /* Perform the operation */
861     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg);
862     
863     /* returns: (Int# (expn), Int#, ByteArray#) */
864     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
865 }
866
867 /* -----------------------------------------------------------------------------
868  * Concurrency primitives
869  * -------------------------------------------------------------------------- */
870
871 forkzh_fast
872 {
873   /* args: R1 = closure to spark */
874   
875   MAYBE_GC(R1_PTR, forkzh_fast);
876
877   // create it right now, return ThreadID in R1
878   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
879                                    R1 "ptr");
880   foreign "C" scheduleThread(R1 "ptr");
881
882   // switch at the earliest opportunity
883   CInt[context_switch] = 1 :: CInt;
884   
885   RET_P(R1);
886 }
887
888 yieldzh_fast
889 {
890   jump stg_yield_noregs;
891 }
892
893 myThreadIdzh_fast
894 {
895   /* no args. */
896   RET_P(CurrentTSO);
897 }
898
899 labelThreadzh_fast
900 {
901   /* args: 
902         R1 = ThreadId#
903         R2 = Addr# */
904 #ifdef DEBUG
905   foreign "C" labelThread(R1 "ptr", R2 "ptr");
906 #endif
907   jump %ENTRY_CODE(Sp(0));
908 }
909
910 isCurrentThreadBoundzh_fast
911 {
912   /* no args */
913   W_ r;
914   r = foreign "C" isThreadBound(CurrentTSO);
915   RET_N(r);
916 }
917
918
919 /* -----------------------------------------------------------------------------
920  * TVar primitives
921  * -------------------------------------------------------------------------- */
922
923 #ifdef REG_R1
924 #define SP_OFF 0
925 #define IF_NOT_REG_R1(x) 
926 #else
927 #define SP_OFF 1
928 #define IF_NOT_REG_R1(x) x
929 #endif
930
931 // Catch retry frame ------------------------------------------------------------
932
933 #define CATCH_RETRY_FRAME_ERROR(label) \
934   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
935
936 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
941 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
942 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
943 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
944
945 #if MAX_VECTORED_RTN > 8
946 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
947 #endif
948
949 #if defined(PROFILING)
950 #define CATCH_RETRY_FRAME_BITMAP 7
951 #define CATCH_RETRY_FRAME_WORDS  6
952 #else
953 #define CATCH_RETRY_FRAME_BITMAP 1
954 #define CATCH_RETRY_FRAME_WORDS  4
955 #endif
956
957 INFO_TABLE_RET(stg_catch_retry_frame,
958                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
959                CATCH_RETRY_FRAME,
960                stg_catch_retry_frame_0_ret,
961                stg_catch_retry_frame_1_ret,
962                stg_catch_retry_frame_2_ret,
963                stg_catch_retry_frame_3_ret,
964                stg_catch_retry_frame_4_ret,
965                stg_catch_retry_frame_5_ret,
966                stg_catch_retry_frame_6_ret,
967                stg_catch_retry_frame_7_ret)
968 {
969    W_ r, frame, trec, outer;
970    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
971
972    frame = Sp;
973    trec = StgTSO_trec(CurrentTSO);
974    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
975    r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
976    if (r) {
977      /* Succeeded (either first branch or second branch) */
978      StgTSO_trec(CurrentTSO) = outer;
979      Sp = Sp + SIZEOF_StgCatchRetryFrame;
980      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
981      jump %ENTRY_CODE(Sp(SP_OFF));
982    } else {
983      /* Did not commit: retry */
984      W_ new_trec;
985      "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
986      StgTSO_trec(CurrentTSO) = new_trec;
987      if (StgCatchRetryFrame_running_alt_code(frame)) {
988        R1 = StgCatchRetryFrame_alt_code(frame);
989      } else {
990        R1 = StgCatchRetryFrame_first_code(frame);
991        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
992      }
993      Sp_adj(-1);
994      jump RET_LBL(stg_ap_v);
995    }
996 }
997
998
999 // Atomically frame -------------------------------------------------------------
1000
1001
1002 #define ATOMICALLY_FRAME_ERROR(label) \
1003   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1004
1005 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1010 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1011 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1012 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1013
1014 #if MAX_VECTORED_RTN > 8
1015 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1016 #endif
1017
1018 #if defined(PROFILING)
1019 #define ATOMICALLY_FRAME_BITMAP 7
1020 #define ATOMICALLY_FRAME_WORDS  4
1021 #else
1022 #define ATOMICALLY_FRAME_BITMAP 1
1023 #define ATOMICALLY_FRAME_WORDS  2
1024 #endif
1025
1026
1027 INFO_TABLE_RET(stg_atomically_frame,
1028                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1029                ATOMICALLY_FRAME,
1030                stg_atomically_frame_0_ret,
1031                stg_atomically_frame_1_ret,
1032                stg_atomically_frame_2_ret,
1033                stg_atomically_frame_3_ret,
1034                stg_atomically_frame_4_ret,
1035                stg_atomically_frame_5_ret,
1036                stg_atomically_frame_6_ret,
1037                stg_atomically_frame_7_ret)
1038 {
1039    W_ frame, trec, valid;
1040    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1041
1042    frame = Sp;
1043    trec = StgTSO_trec(CurrentTSO);
1044    if (StgAtomicallyFrame_waiting(frame)) {
1045      /* The TSO is currently waiting: should we stop waiting? */
1046      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1047      if (valid) {
1048        /* Previous attempt is still valid: no point trying again yet */
1049           IF_NOT_REG_R1(Sp_adj(-2);
1050                         Sp(1) = stg_NO_FINALIZER_closure;
1051                         Sp(0) = stg_ut_1_0_unreg_info;)
1052        jump stg_block_noregs;
1053      } else {
1054        /* Previous attempt is no longer valid: try again */
1055        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1056        StgTSO_trec(CurrentTSO) = trec;
1057        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1058        R1 = StgAtomicallyFrame_code(frame);
1059        Sp_adj(-1);
1060        jump RET_LBL(stg_ap_v);
1061      }
1062    } else {
1063      /* The TSO is not currently waiting: try to commit the transaction */
1064      valid = foreign "C" stmCommitTransaction(BaseReg "ptr", trec "ptr");
1065      if (valid) {
1066        /* Transaction was valid: commit succeeded */
1067        StgTSO_trec(CurrentTSO) = NO_TREC;
1068        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1069        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1070        jump %ENTRY_CODE(Sp(SP_OFF));
1071      } else {
1072        /* Transaction was not valid: try again */
1073        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1074        StgTSO_trec(CurrentTSO) = trec;
1075        R1 = StgAtomicallyFrame_code(frame);
1076        Sp_adj(-1);
1077        jump RET_LBL(stg_ap_v);
1078      }
1079    }
1080 }
1081
1082
1083 // STM catch frame --------------------------------------------------------------
1084
1085 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1086    label                                                   \
1087    {                                                       \
1088       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1089       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1090       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1091       jump ret;                                            \
1092    }
1093
1094 #ifdef REG_R1
1095 #define SP_OFF 0
1096 #else
1097 #define SP_OFF 1
1098 #endif
1099
1100 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1105 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1106 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1107 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1108
1109 #if MAX_VECTORED_RTN > 8
1110 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1111 #endif
1112
1113 #if defined(PROFILING)
1114 #define CATCH_STM_FRAME_BITMAP 3
1115 #define CATCH_STM_FRAME_WORDS  3
1116 #else
1117 #define CATCH_STM_FRAME_BITMAP 0
1118 #define CATCH_STM_FRAME_WORDS  1
1119 #endif
1120
1121 /* Catch frames are very similar to update frames, but when entering
1122  * one we just pop the frame off the stack and perform the correct
1123  * kind of return to the activation record underneath us on the stack.
1124  */
1125
1126 INFO_TABLE_RET(stg_catch_stm_frame,
1127                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1128                CATCH_STM_FRAME,
1129                stg_catch_stm_frame_0_ret,
1130                stg_catch_stm_frame_1_ret,
1131                stg_catch_stm_frame_2_ret,
1132                stg_catch_stm_frame_3_ret,
1133                stg_catch_stm_frame_4_ret,
1134                stg_catch_stm_frame_5_ret,
1135                stg_catch_stm_frame_6_ret,
1136                stg_catch_stm_frame_7_ret)
1137 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1138
1139
1140 // Primop definition ------------------------------------------------------------
1141
1142 atomicallyzh_fast
1143 {
1144   W_ frame;
1145   W_ old_trec;
1146   W_ new_trec;
1147   
1148   // stmStartTransaction may allocate
1149   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1150
1151   /* Args: R1 = m :: STM a */
1152   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1153
1154   /* Set up the atomically frame */
1155   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1156   frame = Sp;
1157
1158   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1159   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1160   StgAtomicallyFrame_code(frame) = R1;
1161
1162   /* Start the memory transcation */
1163   old_trec = StgTSO_trec(CurrentTSO);
1164   ASSERT(old_trec == NO_TREC);
1165   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", old_trec "ptr");
1166   StgTSO_trec(CurrentTSO) = new_trec;
1167
1168   /* Apply R1 to the realworld token */
1169   Sp_adj(-1);
1170   jump RET_LBL(stg_ap_v);
1171 }
1172
1173
1174 catchSTMzh_fast
1175 {
1176   W_ frame;
1177   
1178   /* Args: R1 :: STM a */
1179   /* Args: R2 :: Exception -> STM a */
1180   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1181
1182   /* Set up the catch frame */
1183   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1184   frame = Sp;
1185
1186   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1187   StgCatchSTMFrame_handler(frame) = R2;
1188
1189   /* Apply R1 to the realworld token */
1190   Sp_adj(-1);
1191   jump RET_LBL(stg_ap_v);
1192 }
1193
1194
1195 catchRetryzh_fast
1196 {
1197   W_ frame;
1198   W_ new_trec;
1199   W_ trec;
1200
1201   // stmStartTransaction may allocate
1202   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1203
1204   /* Args: R1 :: STM a */
1205   /* Args: R2 :: STM a */
1206   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1207
1208   /* Start a nested transaction within which to run the first code */
1209   trec = StgTSO_trec(CurrentTSO);
1210   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", trec "ptr");
1211   StgTSO_trec(CurrentTSO) = new_trec;
1212
1213   /* Set up the catch-retry frame */
1214   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1215   frame = Sp;
1216   
1217   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1218   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1219   StgCatchRetryFrame_first_code(frame) = R1;
1220   StgCatchRetryFrame_alt_code(frame) = R2;
1221   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1222
1223   /* Apply R1 to the realworld token */
1224   Sp_adj(-1);
1225   jump RET_LBL(stg_ap_v);  
1226 }
1227
1228
1229 retryzh_fast
1230 {
1231   W_ frame_type;
1232   W_ frame;
1233   W_ trec;
1234   W_ outer;
1235   W_ r;
1236
1237   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1238
1239   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1240 retry_pop_stack:
1241   trec = StgTSO_trec(CurrentTSO);
1242   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1243   StgTSO_sp(CurrentTSO) = Sp;
1244   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1245   Sp = StgTSO_sp(CurrentTSO);
1246   frame = Sp;
1247
1248   if (frame_type == CATCH_RETRY_FRAME) {
1249     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1250     ASSERT(outer != NO_TREC);
1251     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1252       // Retry in the first code: try the alternative
1253       "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1254       StgTSO_trec(CurrentTSO) = trec;
1255       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1256       R1 = StgCatchRetryFrame_alt_code(frame);
1257       Sp_adj(-1);
1258       jump RET_LBL(stg_ap_v);
1259     } else {
1260       // Retry in the alternative code: propagate
1261       W_ other_trec;
1262       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1263       r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", other_trec "ptr");
1264       if (r) {
1265         r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
1266       }
1267       if (r) {
1268         // Merge between siblings succeeded: commit it back to enclosing transaction
1269         // and then propagate the retry
1270         StgTSO_trec(CurrentTSO) = outer;
1271         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1272         goto retry_pop_stack;
1273       } else {
1274         // Merge failed: we musn't propagate the retry.  Try both paths again.
1275         "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1276         StgCatchRetryFrame_first_code_trec(frame) = trec;
1277         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1278         StgTSO_trec(CurrentTSO) = trec;
1279         R1 = StgCatchRetryFrame_first_code(frame);
1280         Sp_adj(-1);
1281         jump RET_LBL(stg_ap_v);
1282       }
1283     }
1284   }
1285
1286   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1287   ASSERT(frame_type == ATOMICALLY_FRAME);
1288   ASSERT(outer == NO_TREC);
1289   r = foreign "C" stmWait(BaseReg "ptr", CurrentTSO "ptr", trec "ptr");
1290   if (r) {
1291     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1292     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1293     Sp = frame;
1294     // Fix up the stack in the unregisterised case: the return convention is different.
1295     IF_NOT_REG_R1(Sp_adj(-2); 
1296                   Sp(1) = stg_NO_FINALIZER_closure;
1297                   Sp(0) = stg_ut_1_0_unreg_info;)
1298     jump stg_block_noregs;
1299   } else {
1300     // Transaction was not valid: retry immediately
1301     "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1302     StgTSO_trec(CurrentTSO) = trec;
1303     R1 = StgAtomicallyFrame_code(frame);
1304     Sp = frame;
1305     Sp_adj(-1);
1306     jump RET_LBL(stg_ap_v);
1307   }
1308 }
1309
1310
1311 newTVarzh_fast
1312 {
1313   W_ tv;
1314   W_ new_value;
1315
1316   /* Args: R1 = initialisation value */
1317
1318   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1319   new_value = R1;
1320   tv = foreign "C" stmNewTVar(BaseReg "ptr", new_value "ptr");
1321   RET_P(tv);
1322 }
1323
1324
1325 readTVarzh_fast
1326 {
1327   W_ trec;
1328   W_ tvar;
1329   W_ result;
1330
1331   /* Args: R1 = TVar closure */
1332
1333   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1334   trec = StgTSO_trec(CurrentTSO);
1335   tvar = R1;
1336   "ptr" result = foreign "C" stmReadTVar(BaseReg "ptr", trec "ptr", tvar "ptr");
1337
1338   RET_P(result);
1339 }
1340
1341
1342 writeTVarzh_fast
1343 {
1344   W_ trec;
1345   W_ tvar;
1346   W_ new_value;
1347   
1348   /* Args: R1 = TVar closure */
1349   /*       R2 = New value    */
1350
1351   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1352   trec = StgTSO_trec(CurrentTSO);
1353   tvar = R1;
1354   new_value = R2;
1355   foreign "C" stmWriteTVar(BaseReg "ptr", trec "ptr", tvar "ptr", new_value "ptr");
1356
1357   jump %ENTRY_CODE(Sp(0));
1358 }
1359
1360
1361 /* -----------------------------------------------------------------------------
1362  * MVar primitives
1363  *
1364  * take & putMVar work as follows.  Firstly, an important invariant:
1365  *
1366  *    If the MVar is full, then the blocking queue contains only
1367  *    threads blocked on putMVar, and if the MVar is empty then the
1368  *    blocking queue contains only threads blocked on takeMVar.
1369  *
1370  * takeMvar:
1371  *    MVar empty : then add ourselves to the blocking queue
1372  *    MVar full  : remove the value from the MVar, and
1373  *                 blocking queue empty     : return
1374  *                 blocking queue non-empty : perform the first blocked putMVar
1375  *                                            from the queue, and wake up the
1376  *                                            thread (MVar is now full again)
1377  *
1378  * putMVar is just the dual of the above algorithm.
1379  *
1380  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1381  * the stack of the thread waiting to do the putMVar.  See
1382  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1383  * the stack layout, and the PerformPut and PerformTake macros below.
1384  *
1385  * It is important that a blocked take or put is woken up with the
1386  * take/put already performed, because otherwise there would be a
1387  * small window of vulnerability where the thread could receive an
1388  * exception and never perform its take or put, and we'd end up with a
1389  * deadlock.
1390  *
1391  * -------------------------------------------------------------------------- */
1392
1393 isEmptyMVarzh_fast
1394 {
1395     /* args: R1 = MVar closure */
1396
1397     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1398         RET_N(1);
1399     } else {
1400         RET_N(0);
1401     }
1402 }
1403
1404 newMVarzh_fast
1405 {
1406     /* args: none */
1407     W_ mvar;
1408
1409     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1410   
1411     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1412     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1413     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1414     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1415     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1416     RET_P(mvar);
1417 }
1418
1419
1420 /* If R1 isn't available, pass it on the stack */
1421 #ifdef REG_R1
1422 #define PerformTake(tso, value)                         \
1423     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1424     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1425 #else
1426 #define PerformTake(tso, value)                                 \
1427     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1428     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1429 #endif
1430
1431 #define PerformPut(tso,lval)                    \
1432     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1433     lval = W_[StgTSO_sp(tso) - WDS(1)];
1434
1435 takeMVarzh_fast
1436 {
1437     W_ mvar, val, info, tso;
1438
1439     /* args: R1 = MVar closure */
1440     mvar = R1;
1441
1442 #if defined(SMP)
1443     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1444 #else
1445     info = GET_INFO(mvar);
1446 #endif
1447
1448     /* If the MVar is empty, put ourselves on its blocking queue,
1449      * and wait until we're woken up.
1450      */
1451     if (info == stg_EMPTY_MVAR_info) {
1452         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1453             StgMVar_head(mvar) = CurrentTSO;
1454         } else {
1455             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1456         }
1457         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1458         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1459         StgTSO_block_info(CurrentTSO)  = mvar;
1460         StgMVar_tail(mvar) = CurrentTSO;
1461         
1462         jump stg_block_takemvar;
1463   }
1464
1465   /* we got the value... */
1466   val = StgMVar_value(mvar);
1467
1468   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1469   {
1470       /* There are putMVar(s) waiting... 
1471        * wake up the first thread on the queue
1472        */
1473       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1474
1475       /* actually perform the putMVar for the thread that we just woke up */
1476       tso = StgMVar_head(mvar);
1477       PerformPut(tso,StgMVar_value(mvar));
1478
1479 #if defined(GRAN) || defined(PAR)
1480       /* ToDo: check 2nd arg (mvar) is right */
1481       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1482       StgMVar_head(mvar) = tso;
1483 #else
1484       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1485       StgMVar_head(mvar) = tso;
1486 #endif
1487
1488       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1489           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1490       }
1491
1492 #if defined(SMP)
1493       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1494 #endif
1495       RET_P(val);
1496   } 
1497   else
1498   {
1499       /* No further putMVars, MVar is now empty */
1500       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1501  
1502 #if defined(SMP)
1503       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1504 #else
1505       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1506 #endif
1507
1508       RET_P(val);
1509   }
1510 }
1511
1512
1513 tryTakeMVarzh_fast
1514 {
1515     W_ mvar, val, info, tso;
1516
1517     /* args: R1 = MVar closure */
1518
1519     mvar = R1;
1520
1521 #if defined(SMP)
1522     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1523 #else
1524     info = GET_INFO(mvar);
1525 #endif
1526
1527     if (info == stg_EMPTY_MVAR_info) {
1528 #if defined(SMP)
1529         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1530 #endif
1531         /* HACK: we need a pointer to pass back, 
1532          * so we abuse NO_FINALIZER_closure
1533          */
1534         RET_NP(0, stg_NO_FINALIZER_closure);
1535     }
1536
1537     /* we got the value... */
1538     val = StgMVar_value(mvar);
1539
1540     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1541
1542         /* There are putMVar(s) waiting... 
1543          * wake up the first thread on the queue
1544          */
1545         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1546
1547         /* actually perform the putMVar for the thread that we just woke up */
1548         tso = StgMVar_head(mvar);
1549         PerformPut(tso,StgMVar_value(mvar));
1550
1551 #if defined(GRAN) || defined(PAR)
1552         /* ToDo: check 2nd arg (mvar) is right */
1553         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1554         StgMVar_head(mvar) = tso;
1555 #else
1556         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1557         StgMVar_head(mvar) = tso;
1558 #endif
1559
1560         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1561             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1562         }
1563 #if defined(SMP)
1564         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1565 #endif
1566     }
1567     else 
1568     {
1569         /* No further putMVars, MVar is now empty */
1570         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1571 #if defined(SMP)
1572         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1573 #else
1574         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1575 #endif
1576     }
1577     
1578     RET_NP(1, val);
1579 }
1580
1581
1582 putMVarzh_fast
1583 {
1584     W_ mvar, info, tso;
1585
1586     /* args: R1 = MVar, R2 = value */
1587     mvar = R1;
1588
1589 #if defined(SMP)
1590     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1591 #else
1592     info = GET_INFO(mvar);
1593 #endif
1594
1595     if (info == stg_FULL_MVAR_info) {
1596         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1597             StgMVar_head(mvar) = CurrentTSO;
1598         } else {
1599             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1600         }
1601         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1602         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1603         StgTSO_block_info(CurrentTSO)  = mvar;
1604         StgMVar_tail(mvar) = CurrentTSO;
1605         
1606         jump stg_block_putmvar;
1607     }
1608   
1609     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1610
1611         /* There are takeMVar(s) waiting: wake up the first one
1612          */
1613         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1614
1615         /* actually perform the takeMVar */
1616         tso = StgMVar_head(mvar);
1617         PerformTake(tso, R2);
1618       
1619 #if defined(GRAN) || defined(PAR)
1620         /* ToDo: check 2nd arg (mvar) is right */
1621         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1622         StgMVar_head(mvar) = tso;
1623 #else
1624         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1625         StgMVar_head(mvar) = tso;
1626 #endif
1627
1628         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1629             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1630         }
1631
1632 #if defined(SMP)
1633         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1634 #endif
1635         jump %ENTRY_CODE(Sp(0));
1636     }
1637     else
1638     {
1639         /* No further takes, the MVar is now full. */
1640         StgMVar_value(mvar) = R2;
1641
1642 #if defined(SMP)
1643         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1644 #else
1645         SET_INFO(mvar,stg_FULL_MVAR_info);
1646 #endif
1647         jump %ENTRY_CODE(Sp(0));
1648     }
1649     
1650     /* ToDo: yield afterward for better communication performance? */
1651 }
1652
1653
1654 tryPutMVarzh_fast
1655 {
1656     W_ mvar, info, tso;
1657
1658     /* args: R1 = MVar, R2 = value */
1659     mvar = R1;
1660
1661 #if defined(SMP)
1662     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1663 #else
1664     info = GET_INFO(mvar);
1665 #endif
1666
1667     if (info == stg_FULL_MVAR_info) {
1668 #if defined(SMP)
1669         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1670 #endif
1671         RET_N(0);
1672     }
1673   
1674     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1675
1676         /* There are takeMVar(s) waiting: wake up the first one
1677          */
1678         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1679         
1680         /* actually perform the takeMVar */
1681         tso = StgMVar_head(mvar);
1682         PerformTake(tso, R2);
1683       
1684 #if defined(GRAN) || defined(PAR)
1685         /* ToDo: check 2nd arg (mvar) is right */
1686         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1687         StgMVar_head(mvar) = tso;
1688 #else
1689         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1690         StgMVar_head(mvar) = tso;
1691 #endif
1692
1693         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1694             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1695         }
1696
1697 #if defined(SMP)
1698         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1699 #endif
1700         jump %ENTRY_CODE(Sp(0));
1701     }
1702     else
1703     {
1704         /* No further takes, the MVar is now full. */
1705         StgMVar_value(mvar) = R2;
1706
1707 #if defined(SMP)
1708         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1709 #else
1710         SET_INFO(mvar,stg_FULL_MVAR_info);
1711 #endif
1712         jump %ENTRY_CODE(Sp(0));
1713     }
1714     
1715     /* ToDo: yield afterward for better communication performance? */
1716 }
1717
1718
1719 /* -----------------------------------------------------------------------------
1720    Stable pointer primitives
1721    -------------------------------------------------------------------------  */
1722
1723 makeStableNamezh_fast
1724 {
1725     W_ index, sn_obj;
1726
1727     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1728   
1729     index = foreign "C" lookupStableName(R1 "ptr");
1730
1731     /* Is there already a StableName for this heap object?
1732      *  stable_ptr_table is a pointer to an array of snEntry structs.
1733      */
1734     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1735         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1736         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1737         StgStableName_sn(sn_obj) = index;
1738         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1739     } else {
1740         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1741     }
1742     
1743     RET_P(sn_obj);
1744 }
1745
1746
1747 makeStablePtrzh_fast
1748 {
1749     /* Args: R1 = a */
1750     W_ sp;
1751     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1752     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1753     RET_N(sp);
1754 }
1755
1756 deRefStablePtrzh_fast
1757 {
1758     /* Args: R1 = the stable ptr */
1759     W_ r, sp;
1760     sp = R1;
1761     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1762     RET_P(r);
1763 }
1764
1765 /* -----------------------------------------------------------------------------
1766    Bytecode object primitives
1767    -------------------------------------------------------------------------  */
1768
1769 newBCOzh_fast
1770 {
1771     /* R1 = instrs
1772        R2 = literals
1773        R3 = ptrs
1774        R4 = itbls
1775        R5 = arity
1776        R6 = bitmap array
1777     */
1778     W_ bco, bitmap_arr, bytes, words;
1779     
1780     bitmap_arr = R6;
1781     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1782     bytes = WDS(words);
1783
1784     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1785
1786     bco = Hp - bytes + WDS(1);
1787     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1788     
1789     StgBCO_instrs(bco)     = R1;
1790     StgBCO_literals(bco)   = R2;
1791     StgBCO_ptrs(bco)       = R3;
1792     StgBCO_itbls(bco)      = R4;
1793     StgBCO_arity(bco)      = HALF_W_(R5);
1794     StgBCO_size(bco)       = HALF_W_(words);
1795     
1796     // Copy the arity/bitmap info into the BCO
1797     W_ i;
1798     i = 0;
1799 for:
1800     if (i < StgArrWords_words(bitmap_arr)) {
1801         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1802         i = i + 1;
1803         goto for;
1804     }
1805     
1806     RET_P(bco);
1807 }
1808
1809
1810 mkApUpd0zh_fast
1811 {
1812     // R1 = the BCO# for the AP
1813     //  
1814     W_ ap;
1815
1816     // This function is *only* used to wrap zero-arity BCOs in an
1817     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1818     // saturated and always points directly to a FUN or BCO.
1819     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1820            StgBCO_arity(R1) == HALF_W_(0));
1821
1822     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1823     TICK_ALLOC_UP_THK(0, 0);
1824     CCCS_ALLOC(SIZEOF_StgAP);
1825
1826     ap = Hp - SIZEOF_StgAP + WDS(1);
1827     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1828     
1829     StgAP_n_args(ap) = HALF_W_(0);
1830     StgAP_fun(ap) = R1;
1831     
1832     RET_P(ap);
1833 }
1834
1835 /* -----------------------------------------------------------------------------
1836    Thread I/O blocking primitives
1837    -------------------------------------------------------------------------- */
1838
1839 /* Add a thread to the end of the blocked queue. (C-- version of the C
1840  * macro in Schedule.h).
1841  */
1842 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1843     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1844     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1845       W_[blocked_queue_hd] = tso;                       \
1846     } else {                                            \
1847       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1848     }                                                   \
1849     W_[blocked_queue_tl] = tso;
1850
1851 waitReadzh_fast
1852 {
1853     /* args: R1 */
1854 #ifdef THREADED_RTS
1855     foreign "C" barf("waitRead# on threaded RTS");
1856 #endif
1857
1858     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1859     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1860     StgTSO_block_info(CurrentTSO) = R1;
1861     // No locking - we're not going to use this interface in the
1862     // threaded RTS anyway.
1863     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1864     jump stg_block_noregs;
1865 }
1866
1867 waitWritezh_fast
1868 {
1869     /* args: R1 */
1870 #ifdef THREADED_RTS
1871     foreign "C" barf("waitWrite# on threaded RTS");
1872 #endif
1873
1874     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1875     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1876     StgTSO_block_info(CurrentTSO) = R1;
1877     // No locking - we're not going to use this interface in the
1878     // threaded RTS anyway.
1879     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1880     jump stg_block_noregs;
1881 }
1882
1883
1884 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1885 delayzh_fast
1886 {
1887 #ifdef mingw32_HOST_OS
1888     W_ ares;
1889     CInt reqID;
1890 #else
1891     W_ t, prev, target;
1892 #endif
1893
1894 #ifdef THREADED_RTS
1895     foreign "C" barf("delay# on threaded RTS");
1896 #endif
1897
1898     /* args: R1 (microsecond delay amount) */
1899     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1900     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1901
1902 #ifdef mingw32_HOST_OS
1903
1904     /* could probably allocate this on the heap instead */
1905     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1906                                             stg_delayzh_malloc_str);
1907     reqID = foreign "C" addDelayRequest(R1);
1908     StgAsyncIOResult_reqID(ares)   = reqID;
1909     StgAsyncIOResult_len(ares)     = 0;
1910     StgAsyncIOResult_errCode(ares) = 0;
1911     StgTSO_block_info(CurrentTSO)  = ares;
1912
1913     /* Having all async-blocked threads reside on the blocked_queue
1914      * simplifies matters, so change the status to OnDoProc put the
1915      * delayed thread on the blocked_queue.
1916      */
1917     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1918     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1919     jump stg_block_async_void;
1920
1921 #else
1922
1923     W_ time;
1924     time = foreign "C" getourtimeofday();
1925     target = (R1 / (TICK_MILLISECS*1000)) + time;
1926     StgTSO_block_info(CurrentTSO) = target;
1927
1928     /* Insert the new thread in the sleeping queue. */
1929     prev = NULL;
1930     t = W_[sleeping_queue];
1931 while:
1932     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1933         prev = t;
1934         t = StgTSO_link(t);
1935         goto while;
1936     }
1937
1938     StgTSO_link(CurrentTSO) = t;
1939     if (prev == NULL) {
1940         W_[sleeping_queue] = CurrentTSO;
1941     } else {
1942         StgTSO_link(prev) = CurrentTSO;
1943     }
1944     jump stg_block_noregs;
1945 #endif
1946 }
1947
1948
1949 #ifdef mingw32_HOST_OS
1950 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1951 asyncReadzh_fast
1952 {
1953     W_ ares;
1954     CInt reqID;
1955
1956 #ifdef THREADED_RTS
1957     foreign "C" barf("asyncRead# on threaded RTS");
1958 #endif
1959
1960     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1961     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1962     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1963
1964     /* could probably allocate this on the heap instead */
1965     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1966                                             stg_asyncReadzh_malloc_str);
1967     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1968     StgAsyncIOResult_reqID(ares)   = reqID;
1969     StgAsyncIOResult_len(ares)     = 0;
1970     StgAsyncIOResult_errCode(ares) = 0;
1971     StgTSO_block_info(CurrentTSO)  = ares;
1972     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1973     jump stg_block_async;
1974 }
1975
1976 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1977 asyncWritezh_fast
1978 {
1979     W_ ares;
1980     CInt reqID;
1981
1982 #ifdef THREADED_RTS
1983     foreign "C" barf("asyncWrite# on threaded RTS");
1984 #endif
1985
1986     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1987     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1988     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1989
1990     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1991                                             stg_asyncWritezh_malloc_str);
1992     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1993
1994     StgAsyncIOResult_reqID(ares)   = reqID;
1995     StgAsyncIOResult_len(ares)     = 0;
1996     StgAsyncIOResult_errCode(ares) = 0;
1997     StgTSO_block_info(CurrentTSO)  = ares;
1998     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1999     jump stg_block_async;
2000 }
2001
2002 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2003 asyncDoProczh_fast
2004 {
2005     W_ ares;
2006     CInt reqID;
2007
2008     /* args: R1 = proc, R2 = param */
2009     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2010     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2011
2012     /* could probably allocate this on the heap instead */
2013     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2014                                             stg_asyncDoProczh_malloc_str);
2015     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2016     StgAsyncIOResult_reqID(ares)   = reqID;
2017     StgAsyncIOResult_len(ares)     = 0;
2018     StgAsyncIOResult_errCode(ares) = 0;
2019     StgTSO_block_info(CurrentTSO) = ares;
2020     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2021     jump stg_block_async;
2022 }
2023 #endif
2024
2025 /* -----------------------------------------------------------------------------
2026   ** temporary **
2027
2028    classes CCallable and CReturnable don't really exist, but the
2029    compiler insists on generating dictionaries containing references
2030    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2031    for these.  Some C compilers can't cope with zero-length static arrays,
2032    so we have to make these one element long.
2033   --------------------------------------------------------------------------- */
2034
2035 section "rodata" {
2036   GHC_ZCCCallable_static_info:   W_ 0;
2037 }
2038
2039 section "rodata" {
2040   GHC_ZCCReturnable_static_info: W_ 0;
2041 }