[project @ 2005-10-17 10:01:35 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr") [R1];
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr") [];
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Weak Pointer Primitives
244    -------------------------------------------------------------------------- */
245
246 STRING(stg_weak_msg,"New weak pointer at %p\n")
247
248 mkWeakzh_fast
249 {
250   /* R1 = key
251      R2 = value
252      R3 = finalizer (or NULL)
253   */
254   W_ w;
255
256   if (R3 == NULL) {
257     R3 = stg_NO_FINALIZER_closure;
258   }
259
260   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
261
262   w = Hp - SIZEOF_StgWeak + WDS(1);
263   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
264
265   StgWeak_key(w)       = R1;
266   StgWeak_value(w)     = R2;
267   StgWeak_finalizer(w) = R3;
268
269   StgWeak_link(w)       = W_[weak_ptr_list];
270   W_[weak_ptr_list]     = w;
271
272   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
273
274   RET_P(w);
275 }
276
277
278 finalizzeWeakzh_fast
279 {
280   /* R1 = weak ptr
281    */
282   W_ w, f;
283
284   w = R1;
285
286   // already dead?
287   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
288       RET_NP(0,stg_NO_FINALIZER_closure);
289   }
290
291   // kill it
292 #ifdef PROFILING
293   // @LDV profiling
294   // A weak pointer is inherently used, so we do not need to call
295   // LDV_recordDead_FILL_SLOP_DYNAMIC():
296   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
297   // or, LDV_recordDead():
298   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
299   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
300   // large as weak pointers, so there is no need to fill the slop, either.
301   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
302 #endif
303
304   //
305   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
306   //
307   SET_INFO(w,stg_DEAD_WEAK_info);
308   LDV_RECORD_CREATE(w);
309
310   f = StgWeak_finalizer(w);
311   StgDeadWeak_link(w) = StgWeak_link(w);
312
313   /* return the finalizer */
314   if (f == stg_NO_FINALIZER_closure) {
315       RET_NP(0,stg_NO_FINALIZER_closure);
316   } else {
317       RET_NP(1,f);
318   }
319 }
320
321 deRefWeakzh_fast
322 {
323   /* R1 = weak ptr */
324   W_ w, code, val;
325
326   w = R1;
327   if (GET_INFO(w) == stg_WEAK_info) {
328     code = 1;
329     val = StgWeak_value(w);
330   } else {
331     code = 0;
332     val = w;
333   }
334   RET_NP(code,val);
335 }
336
337 /* -----------------------------------------------------------------------------
338    Arbitrary-precision Integer operations.
339
340    There are some assumptions in this code that mp_limb_t == W_.  This is
341    the case for all the platforms that GHC supports, currently.
342    -------------------------------------------------------------------------- */
343
344 int2Integerzh_fast
345 {
346    /* arguments: R1 = Int# */
347
348    W_ val, s, p;        /* to avoid aliasing */
349
350    val = R1;
351    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
352
353    p = Hp - SIZEOF_StgArrWords;
354    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
355    StgArrWords_words(p) = 1;
356
357    /* mpz_set_si is inlined here, makes things simpler */
358    if (%lt(val,0)) { 
359         s  = -1;
360         Hp(0) = -val;
361    } else { 
362      if (%gt(val,0)) {
363         s = 1;
364         Hp(0) = val;
365      } else {
366         s = 0;
367      }
368   }
369
370    /* returns (# size  :: Int#, 
371                  data  :: ByteArray# 
372                #)
373    */
374    RET_NP(s,p);
375 }
376
377 word2Integerzh_fast
378 {
379    /* arguments: R1 = Word# */
380
381    W_ val, s, p;        /* to avoid aliasing */
382
383    val = R1;
384
385    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
386
387    p = Hp - SIZEOF_StgArrWords;
388    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
389    StgArrWords_words(p) = 1;
390
391    if (val != 0) {
392         s = 1;
393         W_[Hp] = val;
394    } else {
395         s = 0;
396    }
397
398    /* returns (# size  :: Int#, 
399                  data  :: ByteArray# #)
400    */
401    RET_NP(s,p);
402 }
403
404
405 /*
406  * 'long long' primops for converting to/from Integers.
407  */
408
409 #ifdef SUPPORT_LONG_LONGS
410
411 int64ToIntegerzh_fast
412 {
413    /* arguments: L1 = Int64# */
414
415    L_ val;
416    W_ hi, s, neg, words_needed, p;
417
418    val = L1;
419    neg = 0;
420
421    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
422        words_needed = 2;
423    } else { 
424        // minimum is one word
425        words_needed = 1;
426    }
427
428    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
429                NO_PTRS, int64ToIntegerzh_fast );
430
431    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
432    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
433    StgArrWords_words(p) = words_needed;
434
435    if ( %lt(val,0::L_) ) {
436      neg = 1;
437      val = -val;
438    }
439
440    hi = TO_W_(val >> 32);
441
442    if ( words_needed == 2 )  { 
443       s = 2;
444       Hp(-1) = TO_W_(val);
445       Hp(0) = hi;
446    } else { 
447        if ( val != 0::L_ ) {
448            s = 1;
449            Hp(0) = TO_W_(val);
450        } else /* val==0 */  {
451            s = 0;
452        }
453    }
454    if ( neg != 0 ) {
455         s = -s;
456    }
457
458    /* returns (# size  :: Int#, 
459                  data  :: ByteArray# #)
460    */
461    RET_NP(s,p);
462 }
463
464 word64ToIntegerzh_fast
465 {
466    /* arguments: L1 = Word64# */
467
468    L_ val;
469    W_ hi, s, words_needed, p;
470
471    val = L1;
472    if ( val >= 0x100000000::L_ ) {
473       words_needed = 2;
474    } else {
475       words_needed = 1;
476    }
477
478    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
479                NO_PTRS, word64ToIntegerzh_fast );
480
481    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
482    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
483    StgArrWords_words(p) = words_needed;
484
485    hi = TO_W_(val >> 32);
486    if ( val >= 0x100000000::L_ ) { 
487      s = 2;
488      Hp(-1) = TO_W_(val);
489      Hp(0)  = hi;
490    } else {
491       if ( val != 0::L_ ) {
492         s = 1;
493         Hp(0) = TO_W_(val);
494      } else /* val==0 */  {
495       s = 0;
496      }
497   }
498
499    /* returns (# size  :: Int#, 
500                  data  :: ByteArray# #)
501    */
502    RET_NP(s,p);
503 }
504
505
506 #endif /* SUPPORT_LONG_LONGS */
507
508 /* ToDo: this is shockingly inefficient */
509
510 #ifndef SMP
511 section "bss" {
512   mp_tmp1:
513     bits8 [SIZEOF_MP_INT];
514 }
515
516 section "bss" {
517   mp_tmp2:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_result1:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result2:
528     bits8 [SIZEOF_MP_INT];
529 }
530 #endif
531
532 #ifdef SMP
533 #define FETCH_MP_TEMP(X) \
534 W_ X; \
535 X = BaseReg + (OFFSET_StgRegTable_r ## X);
536 #else
537 #define FETCH_MP_TEMP(X) /* Nothing */
538 #endif
539
540 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
541 name                                                                    \
542 {                                                                       \
543   CInt s1, s2;                                                          \
544   W_ d1, d2;                                                            \
545   FETCH_MP_TEMP(mp_tmp1);                                               \
546   FETCH_MP_TEMP(mp_tmp2);                                               \
547   FETCH_MP_TEMP(mp_result1)                                             \
548   FETCH_MP_TEMP(mp_result2);                                            \
549                                                                         \
550   /* call doYouWantToGC() */                                            \
551   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
552                                                                         \
553   s1 = W_TO_INT(R1);                                                    \
554   d1 = R2;                                                              \
555   s2 = W_TO_INT(R3);                                                    \
556   d2 = R4;                                                              \
557                                                                         \
558   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
559   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
560   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
561   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
562   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
563   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
564                                                                         \
565   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
566                                                                         \
567   /* Perform the operation */                                           \
568   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
569                                                                         \
570   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
571          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
572 }
573
574 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
575 name                                                                    \
576 {                                                                       \
577   CInt s1;                                                              \
578   W_ d1;                                                                \
579   FETCH_MP_TEMP(mp_tmp1);                                               \
580   FETCH_MP_TEMP(mp_result1)                                             \
581                                                                         \
582   /* call doYouWantToGC() */                                            \
583   MAYBE_GC(R2_PTR, name);                                               \
584                                                                         \
585   d1 = R2;                                                              \
586   s1 = W_TO_INT(R1);                                                    \
587                                                                         \
588   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
589   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
590   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
591                                                                         \
592   foreign "C" mpz_init(mp_result1 "ptr") [];                            \
593                                                                         \
594   /* Perform the operation */                                           \
595   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
596                                                                         \
597   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
598          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
599 }
600
601 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
602 name                                                                                    \
603 {                                                                                       \
604   CInt s1, s2;                                                                          \
605   W_ d1, d2;                                                                            \
606   FETCH_MP_TEMP(mp_tmp1);                                                               \
607   FETCH_MP_TEMP(mp_tmp2);                                                               \
608   FETCH_MP_TEMP(mp_result1)                                                             \
609   FETCH_MP_TEMP(mp_result2)                                                             \
610                                                                                         \
611   /* call doYouWantToGC() */                                                            \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
613                                                                                         \
614   s1 = W_TO_INT(R1);                                                                    \
615   d1 = R2;                                                                              \
616   s2 = W_TO_INT(R3);                                                                    \
617   d2 = R4;                                                                              \
618                                                                                         \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
625                                                                                         \
626   foreign "C" mpz_init(mp_result1 "ptr") [];                                               \
627   foreign "C" mpz_init(mp_result2 "ptr") [];                                               \
628                                                                                         \
629   /* Perform the operation */                                                           \
630   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
631                                                                                         \
632   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
633            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
634            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
635            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 #ifndef SMP
654 section "bss" {
655   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
656 }
657 #endif
658
659 gcdIntzh_fast
660 {
661     /* R1 = the first Int#; R2 = the second Int# */
662     W_ r; 
663     FETCH_MP_TEMP(mp_tmp_w);
664
665     W_[mp_tmp_w] = R1;
666     r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
667
668     R1 = r;
669     /* Result parked in R1, return via info-pointer at TOS */
670     jump %ENTRY_CODE(Sp(0));
671 }
672
673
674 gcdIntegerIntzh_fast
675 {
676     /* R1 = s1; R2 = d1; R3 = the int */
677     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
678     
679     /* Result parked in R1, return via info-pointer at TOS */
680     jump %ENTRY_CODE(Sp(0));
681 }
682
683
684 cmpIntegerIntzh_fast
685 {
686     /* R1 = s1; R2 = d1; R3 = the int */
687     W_ usize, vsize, v_digit, u_digit;
688
689     usize = R1;
690     vsize = 0;
691     v_digit = R3;
692
693     // paraphrased from mpz_cmp_si() in the GMP sources
694     if (%gt(v_digit,0)) {
695         vsize = 1;
696     } else { 
697         if (%lt(v_digit,0)) {
698             vsize = -1;
699             v_digit = -v_digit;
700         }
701     }
702
703     if (usize != vsize) {
704         R1 = usize - vsize; 
705         jump %ENTRY_CODE(Sp(0));
706     }
707
708     if (usize == 0) {
709         R1 = 0; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     u_digit = W_[BYTE_ARR_CTS(R2)];
714
715     if (u_digit == v_digit) {
716         R1 = 0; 
717         jump %ENTRY_CODE(Sp(0));
718     }
719
720     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
721         R1 = usize; 
722     } else {
723         R1 = -usize; 
724     }
725
726     jump %ENTRY_CODE(Sp(0));
727 }
728
729 cmpIntegerzh_fast
730 {
731     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
732     W_ usize, vsize, size, up, vp;
733     CInt cmp;
734
735     // paraphrased from mpz_cmp() in the GMP sources
736     usize = R1;
737     vsize = R3;
738
739     if (usize != vsize) {
740         R1 = usize - vsize; 
741         jump %ENTRY_CODE(Sp(0));
742     }
743
744     if (usize == 0) {
745         R1 = 0; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (%lt(usize,0)) { // NB. not <, which is unsigned
750         size = -usize;
751     } else {
752         size = usize;
753     }
754
755     up = BYTE_ARR_CTS(R2);
756     vp = BYTE_ARR_CTS(R4);
757
758     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
759
760     if (cmp == 0 :: CInt) {
761         R1 = 0; 
762         jump %ENTRY_CODE(Sp(0));
763     }
764
765     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
766         R1 = 1;
767     } else {
768         R1 = (-1); 
769     }
770     /* Result parked in R1, return via info-pointer at TOS */
771     jump %ENTRY_CODE(Sp(0));
772 }
773
774 integer2Intzh_fast
775 {
776     /* R1 = s; R2 = d */
777     W_ r, s;
778
779     s = R1;
780     if (s == 0) {
781         r = 0;
782     } else {
783         r = W_[R2 + SIZEOF_StgArrWords];
784         if (%lt(s,0)) {
785             r = -r;
786         }
787     }
788     /* Result parked in R1, return via info-pointer at TOS */
789     R1 = r;
790     jump %ENTRY_CODE(Sp(0));
791 }
792
793 integer2Wordzh_fast
794 {
795   /* R1 = s; R2 = d */
796   W_ r, s;
797
798   s = R1;
799   if (s == 0) {
800     r = 0;
801   } else {
802     r = W_[R2 + SIZEOF_StgArrWords];
803     if (%lt(s,0)) {
804         r = -r;
805     }
806   }
807   /* Result parked in R1, return via info-pointer at TOS */
808   R1 = r;
809   jump %ENTRY_CODE(Sp(0));
810 }
811
812 decodeFloatzh_fast
813
814     W_ p;
815     F_ arg;
816     FETCH_MP_TEMP(mp_tmp1);
817     FETCH_MP_TEMP(mp_tmp_w);
818     
819     /* arguments: F1 = Float# */
820     arg = F1;
821     
822     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
823     
824     /* Be prepared to tell Lennart-coded __decodeFloat
825        where mantissa._mp_d can be put (it does not care about the rest) */
826     p = Hp - SIZEOF_StgArrWords;
827     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
828     StgArrWords_words(p) = 1;
829     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
830     
831     /* Perform the operation */
832     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
833     
834     /* returns: (Int# (expn), Int#, ByteArray#) */
835     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
836 }
837
838 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
839 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
840
841 decodeDoublezh_fast
842
843     D_ arg;
844     W_ p;
845     FETCH_MP_TEMP(mp_tmp1);
846     FETCH_MP_TEMP(mp_tmp_w);
847
848     /* arguments: D1 = Double# */
849     arg = D1;
850
851     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
852     
853     /* Be prepared to tell Lennart-coded __decodeDouble
854        where mantissa.d can be put (it does not care about the rest) */
855     p = Hp - ARR_SIZE + WDS(1);
856     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
857     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
858     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
859
860     /* Perform the operation */
861     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
862     
863     /* returns: (Int# (expn), Int#, ByteArray#) */
864     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
865 }
866
867 /* -----------------------------------------------------------------------------
868  * Concurrency primitives
869  * -------------------------------------------------------------------------- */
870
871 forkzh_fast
872 {
873   /* args: R1 = closure to spark */
874   
875   MAYBE_GC(R1_PTR, forkzh_fast);
876
877   foreign "C" ACQUIRE_LOCK(sched_mutex "ptr");
878
879   // create it right now, return ThreadID in R1
880   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
881                                    R1 "ptr");
882   foreign "C" scheduleThreadLocked(R1 "ptr");
883
884   foreign "C" RELEASE_LOCK(sched_mutex "ptr");
885
886   // switch at the earliest opportunity
887   CInt[context_switch] = 1 :: CInt;
888   
889   RET_P(R1);
890 }
891
892 yieldzh_fast
893 {
894   jump stg_yield_noregs;
895 }
896
897 myThreadIdzh_fast
898 {
899   /* no args. */
900   RET_P(CurrentTSO);
901 }
902
903 labelThreadzh_fast
904 {
905   /* args: 
906         R1 = ThreadId#
907         R2 = Addr# */
908 #ifdef DEBUG
909   foreign "C" labelThread(R1 "ptr", R2 "ptr");
910 #endif
911   jump %ENTRY_CODE(Sp(0));
912 }
913
914 isCurrentThreadBoundzh_fast
915 {
916   /* no args */
917   W_ r;
918   r = foreign "C" isThreadBound(CurrentTSO) [];
919   RET_N(r);
920 }
921
922
923 /* -----------------------------------------------------------------------------
924  * TVar primitives
925  * -------------------------------------------------------------------------- */
926
927 #ifdef REG_R1
928 #define SP_OFF 0
929 #define IF_NOT_REG_R1(x) 
930 #else
931 #define SP_OFF 1
932 #define IF_NOT_REG_R1(x) x
933 #endif
934
935 // Catch retry frame ------------------------------------------------------------
936
937 #define CATCH_RETRY_FRAME_ERROR(label) \
938   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
939
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
941 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
942 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
943 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
944 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
945 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
946 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
947 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
948
949 #if MAX_VECTORED_RTN > 8
950 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
951 #endif
952
953 #if defined(PROFILING)
954 #define CATCH_RETRY_FRAME_BITMAP 7
955 #define CATCH_RETRY_FRAME_WORDS  6
956 #else
957 #define CATCH_RETRY_FRAME_BITMAP 1
958 #define CATCH_RETRY_FRAME_WORDS  4
959 #endif
960
961 INFO_TABLE_RET(stg_catch_retry_frame,
962                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
963                CATCH_RETRY_FRAME,
964                stg_catch_retry_frame_0_ret,
965                stg_catch_retry_frame_1_ret,
966                stg_catch_retry_frame_2_ret,
967                stg_catch_retry_frame_3_ret,
968                stg_catch_retry_frame_4_ret,
969                stg_catch_retry_frame_5_ret,
970                stg_catch_retry_frame_6_ret,
971                stg_catch_retry_frame_7_ret)
972 {
973    W_ r, frame, trec, outer;
974    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
975
976    frame = Sp;
977    trec = StgTSO_trec(CurrentTSO);
978    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
979    r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr") [];
980    if (r) {
981      /* Succeeded (either first branch or second branch) */
982      StgTSO_trec(CurrentTSO) = outer;
983      Sp = Sp + SIZEOF_StgCatchRetryFrame;
984      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
985      jump %ENTRY_CODE(Sp(SP_OFF));
986    } else {
987      /* Did not commit: retry */
988      W_ new_trec;
989      "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr") [];
990      StgTSO_trec(CurrentTSO) = new_trec;
991      if (StgCatchRetryFrame_running_alt_code(frame)) {
992        R1 = StgCatchRetryFrame_alt_code(frame);
993      } else {
994        R1 = StgCatchRetryFrame_first_code(frame);
995        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
996      }
997      Sp_adj(-1);
998      jump RET_LBL(stg_ap_v);
999    }
1000 }
1001
1002
1003 // Atomically frame -------------------------------------------------------------
1004
1005
1006 #define ATOMICALLY_FRAME_ERROR(label) \
1007   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1008
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1010 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1011 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1012 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1013 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1014 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1015 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1016 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1017
1018 #if MAX_VECTORED_RTN > 8
1019 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1020 #endif
1021
1022 #if defined(PROFILING)
1023 #define ATOMICALLY_FRAME_BITMAP 7
1024 #define ATOMICALLY_FRAME_WORDS  4
1025 #else
1026 #define ATOMICALLY_FRAME_BITMAP 1
1027 #define ATOMICALLY_FRAME_WORDS  2
1028 #endif
1029
1030
1031 INFO_TABLE_RET(stg_atomically_frame,
1032                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1033                ATOMICALLY_FRAME,
1034                stg_atomically_frame_0_ret,
1035                stg_atomically_frame_1_ret,
1036                stg_atomically_frame_2_ret,
1037                stg_atomically_frame_3_ret,
1038                stg_atomically_frame_4_ret,
1039                stg_atomically_frame_5_ret,
1040                stg_atomically_frame_6_ret,
1041                stg_atomically_frame_7_ret)
1042 {
1043    W_ frame, trec, valid;
1044    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1045
1046    frame = Sp;
1047    trec = StgTSO_trec(CurrentTSO);
1048    if (StgAtomicallyFrame_waiting(frame)) {
1049      /* The TSO is currently waiting: should we stop waiting? */
1050      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1051      if (valid) {
1052        /* Previous attempt is still valid: no point trying again yet */
1053           IF_NOT_REG_R1(Sp_adj(-2);
1054                         Sp(1) = stg_NO_FINALIZER_closure;
1055                         Sp(0) = stg_ut_1_0_unreg_info;)
1056        jump stg_block_noregs;
1057      } else {
1058        /* Previous attempt is no longer valid: try again */
1059        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1060        StgTSO_trec(CurrentTSO) = trec;
1061        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1062        R1 = StgAtomicallyFrame_code(frame);
1063        Sp_adj(-1);
1064        jump RET_LBL(stg_ap_v);
1065      }
1066    } else {
1067      /* The TSO is not currently waiting: try to commit the transaction */
1068      valid = foreign "C" stmCommitTransaction(BaseReg "ptr", trec "ptr");
1069      if (valid) {
1070        /* Transaction was valid: commit succeeded */
1071        StgTSO_trec(CurrentTSO) = NO_TREC;
1072        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1073        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1074        jump %ENTRY_CODE(Sp(SP_OFF));
1075      } else {
1076        /* Transaction was not valid: try again */
1077        "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", NO_TREC "ptr");
1078        StgTSO_trec(CurrentTSO) = trec;
1079        R1 = StgAtomicallyFrame_code(frame);
1080        Sp_adj(-1);
1081        jump RET_LBL(stg_ap_v);
1082      }
1083    }
1084 }
1085
1086
1087 // STM catch frame --------------------------------------------------------------
1088
1089 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1090    label                                                   \
1091    {                                                       \
1092       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1093       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1094       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1095       jump ret;                                            \
1096    }
1097
1098 #ifdef REG_R1
1099 #define SP_OFF 0
1100 #else
1101 #define SP_OFF 1
1102 #endif
1103
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1105 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1106 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1107 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1108 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1109 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1110 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1111 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1112
1113 #if MAX_VECTORED_RTN > 8
1114 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1115 #endif
1116
1117 #if defined(PROFILING)
1118 #define CATCH_STM_FRAME_BITMAP 3
1119 #define CATCH_STM_FRAME_WORDS  3
1120 #else
1121 #define CATCH_STM_FRAME_BITMAP 0
1122 #define CATCH_STM_FRAME_WORDS  1
1123 #endif
1124
1125 /* Catch frames are very similar to update frames, but when entering
1126  * one we just pop the frame off the stack and perform the correct
1127  * kind of return to the activation record underneath us on the stack.
1128  */
1129
1130 INFO_TABLE_RET(stg_catch_stm_frame,
1131                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1132                CATCH_STM_FRAME,
1133                stg_catch_stm_frame_0_ret,
1134                stg_catch_stm_frame_1_ret,
1135                stg_catch_stm_frame_2_ret,
1136                stg_catch_stm_frame_3_ret,
1137                stg_catch_stm_frame_4_ret,
1138                stg_catch_stm_frame_5_ret,
1139                stg_catch_stm_frame_6_ret,
1140                stg_catch_stm_frame_7_ret)
1141 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1142
1143
1144 // Primop definition ------------------------------------------------------------
1145
1146 atomicallyzh_fast
1147 {
1148   W_ frame;
1149   W_ old_trec;
1150   W_ new_trec;
1151   
1152   // stmStartTransaction may allocate
1153   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1154
1155   /* Args: R1 = m :: STM a */
1156   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1157
1158   /* Set up the atomically frame */
1159   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1160   frame = Sp;
1161
1162   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1163   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1164   StgAtomicallyFrame_code(frame) = R1;
1165
1166   /* Start the memory transcation */
1167   old_trec = StgTSO_trec(CurrentTSO);
1168   ASSERT(old_trec == NO_TREC);
1169   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", old_trec "ptr");
1170   StgTSO_trec(CurrentTSO) = new_trec;
1171
1172   /* Apply R1 to the realworld token */
1173   Sp_adj(-1);
1174   jump RET_LBL(stg_ap_v);
1175 }
1176
1177
1178 catchSTMzh_fast
1179 {
1180   W_ frame;
1181   
1182   /* Args: R1 :: STM a */
1183   /* Args: R2 :: Exception -> STM a */
1184   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1185
1186   /* Set up the catch frame */
1187   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1188   frame = Sp;
1189
1190   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1191   StgCatchSTMFrame_handler(frame) = R2;
1192
1193   /* Apply R1 to the realworld token */
1194   Sp_adj(-1);
1195   jump RET_LBL(stg_ap_v);
1196 }
1197
1198
1199 catchRetryzh_fast
1200 {
1201   W_ frame;
1202   W_ new_trec;
1203   W_ trec;
1204
1205   // stmStartTransaction may allocate
1206   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1207
1208   /* Args: R1 :: STM a */
1209   /* Args: R2 :: STM a */
1210   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1211
1212   /* Start a nested transaction within which to run the first code */
1213   trec = StgTSO_trec(CurrentTSO);
1214   "ptr" new_trec = foreign "C" stmStartTransaction(BaseReg "ptr", trec "ptr");
1215   StgTSO_trec(CurrentTSO) = new_trec;
1216
1217   /* Set up the catch-retry frame */
1218   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1219   frame = Sp;
1220   
1221   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1222   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1223   StgCatchRetryFrame_first_code(frame) = R1;
1224   StgCatchRetryFrame_alt_code(frame) = R2;
1225   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1226
1227   /* Apply R1 to the realworld token */
1228   Sp_adj(-1);
1229   jump RET_LBL(stg_ap_v);  
1230 }
1231
1232
1233 retryzh_fast
1234 {
1235   W_ frame_type;
1236   W_ frame;
1237   W_ trec;
1238   W_ outer;
1239   W_ r;
1240
1241   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1242
1243   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1244 retry_pop_stack:
1245   trec = StgTSO_trec(CurrentTSO);
1246   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1247   StgTSO_sp(CurrentTSO) = Sp;
1248   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1249   Sp = StgTSO_sp(CurrentTSO);
1250   frame = Sp;
1251
1252   if (frame_type == CATCH_RETRY_FRAME) {
1253     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1254     ASSERT(outer != NO_TREC);
1255     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1256       // Retry in the first code: try the alternative
1257       "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1258       StgTSO_trec(CurrentTSO) = trec;
1259       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1260       R1 = StgCatchRetryFrame_alt_code(frame);
1261       Sp_adj(-1);
1262       jump RET_LBL(stg_ap_v);
1263     } else {
1264       // Retry in the alternative code: propagate
1265       W_ other_trec;
1266       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1267       r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", other_trec "ptr");
1268       if (r) {
1269         r = foreign "C" stmCommitNestedTransaction(BaseReg "ptr", trec "ptr");
1270       }
1271       if (r) {
1272         // Merge between siblings succeeded: commit it back to enclosing transaction
1273         // and then propagate the retry
1274         StgTSO_trec(CurrentTSO) = outer;
1275         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1276         goto retry_pop_stack;
1277       } else {
1278         // Merge failed: we musn't propagate the retry.  Try both paths again.
1279         "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1280         StgCatchRetryFrame_first_code_trec(frame) = trec;
1281         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1282         StgTSO_trec(CurrentTSO) = trec;
1283         R1 = StgCatchRetryFrame_first_code(frame);
1284         Sp_adj(-1);
1285         jump RET_LBL(stg_ap_v);
1286       }
1287     }
1288   }
1289
1290   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1291   ASSERT(frame_type == ATOMICALLY_FRAME);
1292   ASSERT(outer == NO_TREC);
1293   r = foreign "C" stmWait(BaseReg "ptr", CurrentTSO "ptr", trec "ptr");
1294   if (r) {
1295     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1296     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1297     Sp = frame;
1298     // Fix up the stack in the unregisterised case: the return convention is different.
1299     IF_NOT_REG_R1(Sp_adj(-2); 
1300                   Sp(1) = stg_NO_FINALIZER_closure;
1301                   Sp(0) = stg_ut_1_0_unreg_info;)
1302     jump stg_block_noregs;
1303   } else {
1304     // Transaction was not valid: retry immediately
1305     "ptr" trec = foreign "C" stmStartTransaction(BaseReg "ptr", outer "ptr");
1306     StgTSO_trec(CurrentTSO) = trec;
1307     R1 = StgAtomicallyFrame_code(frame);
1308     Sp = frame;
1309     Sp_adj(-1);
1310     jump RET_LBL(stg_ap_v);
1311   }
1312 }
1313
1314
1315 newTVarzh_fast
1316 {
1317   W_ tv;
1318   W_ new_value;
1319
1320   /* Args: R1 = initialisation value */
1321
1322   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1323   new_value = R1;
1324   tv = foreign "C" stmNewTVar(BaseReg "ptr", new_value "ptr");
1325   RET_P(tv);
1326 }
1327
1328
1329 readTVarzh_fast
1330 {
1331   W_ trec;
1332   W_ tvar;
1333   W_ result;
1334
1335   /* Args: R1 = TVar closure */
1336
1337   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1338   trec = StgTSO_trec(CurrentTSO);
1339   tvar = R1;
1340   "ptr" result = foreign "C" stmReadTVar(BaseReg "ptr", trec "ptr", tvar "ptr") [];
1341
1342   RET_P(result);
1343 }
1344
1345
1346 writeTVarzh_fast
1347 {
1348   W_ trec;
1349   W_ tvar;
1350   W_ new_value;
1351   
1352   /* Args: R1 = TVar closure */
1353   /*       R2 = New value    */
1354
1355   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1356   trec = StgTSO_trec(CurrentTSO);
1357   tvar = R1;
1358   new_value = R2;
1359   foreign "C" stmWriteTVar(BaseReg "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1360
1361   jump %ENTRY_CODE(Sp(0));
1362 }
1363
1364
1365 /* -----------------------------------------------------------------------------
1366  * MVar primitives
1367  *
1368  * take & putMVar work as follows.  Firstly, an important invariant:
1369  *
1370  *    If the MVar is full, then the blocking queue contains only
1371  *    threads blocked on putMVar, and if the MVar is empty then the
1372  *    blocking queue contains only threads blocked on takeMVar.
1373  *
1374  * takeMvar:
1375  *    MVar empty : then add ourselves to the blocking queue
1376  *    MVar full  : remove the value from the MVar, and
1377  *                 blocking queue empty     : return
1378  *                 blocking queue non-empty : perform the first blocked putMVar
1379  *                                            from the queue, and wake up the
1380  *                                            thread (MVar is now full again)
1381  *
1382  * putMVar is just the dual of the above algorithm.
1383  *
1384  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1385  * the stack of the thread waiting to do the putMVar.  See
1386  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1387  * the stack layout, and the PerformPut and PerformTake macros below.
1388  *
1389  * It is important that a blocked take or put is woken up with the
1390  * take/put already performed, because otherwise there would be a
1391  * small window of vulnerability where the thread could receive an
1392  * exception and never perform its take or put, and we'd end up with a
1393  * deadlock.
1394  *
1395  * -------------------------------------------------------------------------- */
1396
1397 isEmptyMVarzh_fast
1398 {
1399     /* args: R1 = MVar closure */
1400
1401     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1402         RET_N(1);
1403     } else {
1404         RET_N(0);
1405     }
1406 }
1407
1408 newMVarzh_fast
1409 {
1410     /* args: none */
1411     W_ mvar;
1412
1413     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1414   
1415     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1416     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1417     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1418     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1419     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1420     RET_P(mvar);
1421 }
1422
1423
1424 /* If R1 isn't available, pass it on the stack */
1425 #ifdef REG_R1
1426 #define PerformTake(tso, value)                         \
1427     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1428     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1429 #else
1430 #define PerformTake(tso, value)                                 \
1431     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1432     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1433 #endif
1434
1435 #define PerformPut(tso,lval)                    \
1436     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1437     lval = W_[StgTSO_sp(tso) - WDS(1)];
1438
1439 takeMVarzh_fast
1440 {
1441     W_ mvar, val, info, tso;
1442
1443     /* args: R1 = MVar closure */
1444     mvar = R1;
1445
1446 #if defined(SMP)
1447     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1448 #else
1449     info = GET_INFO(mvar);
1450 #endif
1451
1452     /* If the MVar is empty, put ourselves on its blocking queue,
1453      * and wait until we're woken up.
1454      */
1455     if (info == stg_EMPTY_MVAR_info) {
1456         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1457             StgMVar_head(mvar) = CurrentTSO;
1458         } else {
1459             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1460         }
1461         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1462         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1463         StgTSO_block_info(CurrentTSO)  = mvar;
1464         StgMVar_tail(mvar) = CurrentTSO;
1465         
1466         jump stg_block_takemvar;
1467   }
1468
1469   /* we got the value... */
1470   val = StgMVar_value(mvar);
1471
1472   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1473   {
1474       /* There are putMVar(s) waiting... 
1475        * wake up the first thread on the queue
1476        */
1477       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1478
1479       /* actually perform the putMVar for the thread that we just woke up */
1480       tso = StgMVar_head(mvar);
1481       PerformPut(tso,StgMVar_value(mvar));
1482
1483 #if defined(GRAN) || defined(PAR)
1484       /* ToDo: check 2nd arg (mvar) is right */
1485       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1486       StgMVar_head(mvar) = tso;
1487 #else
1488       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr") [];
1489       StgMVar_head(mvar) = tso;
1490 #endif
1491
1492       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1493           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1494       }
1495
1496 #if defined(SMP)
1497       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1498 #endif
1499       RET_P(val);
1500   } 
1501   else
1502   {
1503       /* No further putMVars, MVar is now empty */
1504       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1505  
1506 #if defined(SMP)
1507       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1508 #else
1509       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1510 #endif
1511
1512       RET_P(val);
1513   }
1514 }
1515
1516
1517 tryTakeMVarzh_fast
1518 {
1519     W_ mvar, val, info, tso;
1520
1521     /* args: R1 = MVar closure */
1522
1523     mvar = R1;
1524
1525 #if defined(SMP)
1526     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1527 #else
1528     info = GET_INFO(mvar);
1529 #endif
1530
1531     if (info == stg_EMPTY_MVAR_info) {
1532 #if defined(SMP)
1533         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1534 #endif
1535         /* HACK: we need a pointer to pass back, 
1536          * so we abuse NO_FINALIZER_closure
1537          */
1538         RET_NP(0, stg_NO_FINALIZER_closure);
1539     }
1540
1541     /* we got the value... */
1542     val = StgMVar_value(mvar);
1543
1544     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1545
1546         /* There are putMVar(s) waiting... 
1547          * wake up the first thread on the queue
1548          */
1549         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1550
1551         /* actually perform the putMVar for the thread that we just woke up */
1552         tso = StgMVar_head(mvar);
1553         PerformPut(tso,StgMVar_value(mvar));
1554
1555 #if defined(GRAN) || defined(PAR)
1556         /* ToDo: check 2nd arg (mvar) is right */
1557         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1558         StgMVar_head(mvar) = tso;
1559 #else
1560         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr") [];
1561         StgMVar_head(mvar) = tso;
1562 #endif
1563
1564         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1565             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1566         }
1567 #if defined(SMP)
1568         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1569 #endif
1570     }
1571     else 
1572     {
1573         /* No further putMVars, MVar is now empty */
1574         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1575 #if defined(SMP)
1576         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1577 #else
1578         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1579 #endif
1580     }
1581     
1582     RET_NP(1, val);
1583 }
1584
1585
1586 putMVarzh_fast
1587 {
1588     W_ mvar, info, tso;
1589
1590     /* args: R1 = MVar, R2 = value */
1591     mvar = R1;
1592
1593 #if defined(SMP)
1594     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1595 #else
1596     info = GET_INFO(mvar);
1597 #endif
1598
1599     if (info == stg_FULL_MVAR_info) {
1600         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1601             StgMVar_head(mvar) = CurrentTSO;
1602         } else {
1603             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1604         }
1605         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1606         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1607         StgTSO_block_info(CurrentTSO)  = mvar;
1608         StgMVar_tail(mvar) = CurrentTSO;
1609         
1610         jump stg_block_putmvar;
1611     }
1612   
1613     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1614
1615         /* There are takeMVar(s) waiting: wake up the first one
1616          */
1617         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1618
1619         /* actually perform the takeMVar */
1620         tso = StgMVar_head(mvar);
1621         PerformTake(tso, R2);
1622       
1623 #if defined(GRAN) || defined(PAR)
1624         /* ToDo: check 2nd arg (mvar) is right */
1625         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr") [];
1626         StgMVar_head(mvar) = tso;
1627 #else
1628         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr") [];
1629         StgMVar_head(mvar) = tso;
1630 #endif
1631
1632         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1633             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1634         }
1635
1636 #if defined(SMP)
1637         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1638 #endif
1639         jump %ENTRY_CODE(Sp(0));
1640     }
1641     else
1642     {
1643         /* No further takes, the MVar is now full. */
1644         StgMVar_value(mvar) = R2;
1645
1646 #if defined(SMP)
1647         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1648 #else
1649         SET_INFO(mvar,stg_FULL_MVAR_info);
1650 #endif
1651         jump %ENTRY_CODE(Sp(0));
1652     }
1653     
1654     /* ToDo: yield afterward for better communication performance? */
1655 }
1656
1657
1658 tryPutMVarzh_fast
1659 {
1660     W_ mvar, info, tso;
1661
1662     /* args: R1 = MVar, R2 = value */
1663     mvar = R1;
1664
1665 #if defined(SMP)
1666     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1667 #else
1668     info = GET_INFO(mvar);
1669 #endif
1670
1671     if (info == stg_FULL_MVAR_info) {
1672 #if defined(SMP)
1673         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1674 #endif
1675         RET_N(0);
1676     }
1677   
1678     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1679
1680         /* There are takeMVar(s) waiting: wake up the first one
1681          */
1682         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1683         
1684         /* actually perform the takeMVar */
1685         tso = StgMVar_head(mvar);
1686         PerformTake(tso, R2);
1687       
1688 #if defined(GRAN) || defined(PAR)
1689         /* ToDo: check 2nd arg (mvar) is right */
1690         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr") [];
1691         StgMVar_head(mvar) = tso;
1692 #else
1693         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr") [];
1694         StgMVar_head(mvar) = tso;
1695 #endif
1696
1697         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1698             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1699         }
1700
1701 #if defined(SMP)
1702         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1703 #endif
1704         jump %ENTRY_CODE(Sp(0));
1705     }
1706     else
1707     {
1708         /* No further takes, the MVar is now full. */
1709         StgMVar_value(mvar) = R2;
1710
1711 #if defined(SMP)
1712         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1713 #else
1714         SET_INFO(mvar,stg_FULL_MVAR_info);
1715 #endif
1716         jump %ENTRY_CODE(Sp(0));
1717     }
1718     
1719     /* ToDo: yield afterward for better communication performance? */
1720 }
1721
1722
1723 /* -----------------------------------------------------------------------------
1724    Stable pointer primitives
1725    -------------------------------------------------------------------------  */
1726
1727 makeStableNamezh_fast
1728 {
1729     W_ index, sn_obj;
1730
1731     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1732   
1733     index = foreign "C" lookupStableName(R1 "ptr") [];
1734
1735     /* Is there already a StableName for this heap object?
1736      *  stable_ptr_table is a pointer to an array of snEntry structs.
1737      */
1738     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1739         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1740         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1741         StgStableName_sn(sn_obj) = index;
1742         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1743     } else {
1744         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1745     }
1746     
1747     RET_P(sn_obj);
1748 }
1749
1750
1751 makeStablePtrzh_fast
1752 {
1753     /* Args: R1 = a */
1754     W_ sp;
1755     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1756     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1757     RET_N(sp);
1758 }
1759
1760 deRefStablePtrzh_fast
1761 {
1762     /* Args: R1 = the stable ptr */
1763     W_ r, sp;
1764     sp = R1;
1765     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1766     RET_P(r);
1767 }
1768
1769 /* -----------------------------------------------------------------------------
1770    Bytecode object primitives
1771    -------------------------------------------------------------------------  */
1772
1773 newBCOzh_fast
1774 {
1775     /* R1 = instrs
1776        R2 = literals
1777        R3 = ptrs
1778        R4 = itbls
1779        R5 = arity
1780        R6 = bitmap array
1781     */
1782     W_ bco, bitmap_arr, bytes, words;
1783     
1784     bitmap_arr = R6;
1785     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1786     bytes = WDS(words);
1787
1788     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1789
1790     bco = Hp - bytes + WDS(1);
1791     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1792     
1793     StgBCO_instrs(bco)     = R1;
1794     StgBCO_literals(bco)   = R2;
1795     StgBCO_ptrs(bco)       = R3;
1796     StgBCO_itbls(bco)      = R4;
1797     StgBCO_arity(bco)      = HALF_W_(R5);
1798     StgBCO_size(bco)       = HALF_W_(words);
1799     
1800     // Copy the arity/bitmap info into the BCO
1801     W_ i;
1802     i = 0;
1803 for:
1804     if (i < StgArrWords_words(bitmap_arr)) {
1805         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1806         i = i + 1;
1807         goto for;
1808     }
1809     
1810     RET_P(bco);
1811 }
1812
1813
1814 mkApUpd0zh_fast
1815 {
1816     // R1 = the BCO# for the AP
1817     //  
1818     W_ ap;
1819
1820     // This function is *only* used to wrap zero-arity BCOs in an
1821     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1822     // saturated and always points directly to a FUN or BCO.
1823     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1824            StgBCO_arity(R1) == HALF_W_(0));
1825
1826     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1827     TICK_ALLOC_UP_THK(0, 0);
1828     CCCS_ALLOC(SIZEOF_StgAP);
1829
1830     ap = Hp - SIZEOF_StgAP + WDS(1);
1831     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1832     
1833     StgAP_n_args(ap) = HALF_W_(0);
1834     StgAP_fun(ap) = R1;
1835     
1836     RET_P(ap);
1837 }
1838
1839 /* -----------------------------------------------------------------------------
1840    Thread I/O blocking primitives
1841    -------------------------------------------------------------------------- */
1842
1843 /* Add a thread to the end of the blocked queue. (C-- version of the C
1844  * macro in Schedule.h).
1845  */
1846 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1847     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1848     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1849       W_[blocked_queue_hd] = tso;                       \
1850     } else {                                            \
1851       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1852     }                                                   \
1853     W_[blocked_queue_tl] = tso;
1854
1855 waitReadzh_fast
1856 {
1857     /* args: R1 */
1858 #ifdef THREADED_RTS
1859     foreign "C" barf("waitRead# on threaded RTS");
1860 #endif
1861
1862     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1863     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1864     StgTSO_block_info(CurrentTSO) = R1;
1865     // No locking - we're not going to use this interface in the
1866     // threaded RTS anyway.
1867     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1868     jump stg_block_noregs;
1869 }
1870
1871 waitWritezh_fast
1872 {
1873     /* args: R1 */
1874 #ifdef THREADED_RTS
1875     foreign "C" barf("waitWrite# on threaded RTS");
1876 #endif
1877
1878     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1879     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1880     StgTSO_block_info(CurrentTSO) = R1;
1881     // No locking - we're not going to use this interface in the
1882     // threaded RTS anyway.
1883     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1884     jump stg_block_noregs;
1885 }
1886
1887
1888 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1889 delayzh_fast
1890 {
1891 #ifdef mingw32_HOST_OS
1892     W_ ares;
1893     CInt reqID;
1894 #else
1895     W_ t, prev, target;
1896 #endif
1897
1898 #ifdef THREADED_RTS
1899     foreign "C" barf("delay# on threaded RTS");
1900 #endif
1901
1902     /* args: R1 (microsecond delay amount) */
1903     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1904     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1905
1906 #ifdef mingw32_HOST_OS
1907
1908     /* could probably allocate this on the heap instead */
1909     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1910                                             stg_delayzh_malloc_str);
1911     reqID = foreign "C" addDelayRequest(R1);
1912     StgAsyncIOResult_reqID(ares)   = reqID;
1913     StgAsyncIOResult_len(ares)     = 0;
1914     StgAsyncIOResult_errCode(ares) = 0;
1915     StgTSO_block_info(CurrentTSO)  = ares;
1916
1917     /* Having all async-blocked threads reside on the blocked_queue
1918      * simplifies matters, so change the status to OnDoProc put the
1919      * delayed thread on the blocked_queue.
1920      */
1921     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1922     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1923     jump stg_block_async_void;
1924
1925 #else
1926
1927     W_ time;
1928     time = foreign "C" getourtimeofday();
1929     target = (R1 / (TICK_MILLISECS*1000)) + time;
1930     StgTSO_block_info(CurrentTSO) = target;
1931
1932     /* Insert the new thread in the sleeping queue. */
1933     prev = NULL;
1934     t = W_[sleeping_queue];
1935 while:
1936     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1937         prev = t;
1938         t = StgTSO_link(t);
1939         goto while;
1940     }
1941
1942     StgTSO_link(CurrentTSO) = t;
1943     if (prev == NULL) {
1944         W_[sleeping_queue] = CurrentTSO;
1945     } else {
1946         StgTSO_link(prev) = CurrentTSO;
1947     }
1948     jump stg_block_noregs;
1949 #endif
1950 }
1951
1952
1953 #ifdef mingw32_HOST_OS
1954 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1955 asyncReadzh_fast
1956 {
1957     W_ ares;
1958     CInt reqID;
1959
1960 #ifdef THREADED_RTS
1961     foreign "C" barf("asyncRead# on threaded RTS");
1962 #endif
1963
1964     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1965     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1966     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1967
1968     /* could probably allocate this on the heap instead */
1969     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1970                                             stg_asyncReadzh_malloc_str);
1971     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1972     StgAsyncIOResult_reqID(ares)   = reqID;
1973     StgAsyncIOResult_len(ares)     = 0;
1974     StgAsyncIOResult_errCode(ares) = 0;
1975     StgTSO_block_info(CurrentTSO)  = ares;
1976     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1977     jump stg_block_async;
1978 }
1979
1980 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1981 asyncWritezh_fast
1982 {
1983     W_ ares;
1984     CInt reqID;
1985
1986 #ifdef THREADED_RTS
1987     foreign "C" barf("asyncWrite# on threaded RTS");
1988 #endif
1989
1990     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1991     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1992     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1993
1994     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1995                                             stg_asyncWritezh_malloc_str);
1996     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1997
1998     StgAsyncIOResult_reqID(ares)   = reqID;
1999     StgAsyncIOResult_len(ares)     = 0;
2000     StgAsyncIOResult_errCode(ares) = 0;
2001     StgTSO_block_info(CurrentTSO)  = ares;
2002     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2003     jump stg_block_async;
2004 }
2005
2006 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2007 asyncDoProczh_fast
2008 {
2009     W_ ares;
2010     CInt reqID;
2011
2012     /* args: R1 = proc, R2 = param */
2013     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2014     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2015
2016     /* could probably allocate this on the heap instead */
2017     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2018                                             stg_asyncDoProczh_malloc_str);
2019     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2020     StgAsyncIOResult_reqID(ares)   = reqID;
2021     StgAsyncIOResult_len(ares)     = 0;
2022     StgAsyncIOResult_errCode(ares) = 0;
2023     StgTSO_block_info(CurrentTSO) = ares;
2024     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2025     jump stg_block_async;
2026 }
2027 #endif
2028
2029 /* -----------------------------------------------------------------------------
2030   ** temporary **
2031
2032    classes CCallable and CReturnable don't really exist, but the
2033    compiler insists on generating dictionaries containing references
2034    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2035    for these.  Some C compilers can't cope with zero-length static arrays,
2036    so we have to make these one element long.
2037   --------------------------------------------------------------------------- */
2038
2039 section "rodata" {
2040   GHC_ZCCCallable_static_info:   W_ 0;
2041 }
2042
2043 section "rodata" {
2044   GHC_ZCCReturnable_static_info: W_ 0;
2045 }