Massive patch for the first months work adding System FC to GHC #35
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
427        words_needed = 2;
428    } else { 
429        // minimum is one word
430        words_needed = 1;
431    }
432
433    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
434                NO_PTRS, int64ToIntegerzh_fast );
435
436    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
437    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
438    StgArrWords_words(p) = words_needed;
439
440    if ( %lt(val,0::L_) ) {
441      neg = 1;
442      val = -val;
443    }
444
445    hi = TO_W_(val >> 32);
446
447    if ( words_needed == 2 )  { 
448       s = 2;
449       Hp(-1) = TO_W_(val);
450       Hp(0) = hi;
451    } else { 
452        if ( val != 0::L_ ) {
453            s = 1;
454            Hp(0) = TO_W_(val);
455        } else /* val==0 */  {
456            s = 0;
457        }
458    }
459    if ( neg != 0 ) {
460         s = -s;
461    }
462
463    /* returns (# size  :: Int#, 
464                  data  :: ByteArray# #)
465    */
466    RET_NP(s,p);
467 }
468
469 word64ToIntegerzh_fast
470 {
471    /* arguments: L1 = Word64# */
472
473    L_ val;
474    W_ hi, s, words_needed, p;
475
476    val = L1;
477    if ( val >= 0x100000000::L_ ) {
478       words_needed = 2;
479    } else {
480       words_needed = 1;
481    }
482
483    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
484                NO_PTRS, word64ToIntegerzh_fast );
485
486    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
487    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
488    StgArrWords_words(p) = words_needed;
489
490    hi = TO_W_(val >> 32);
491    if ( val >= 0x100000000::L_ ) { 
492      s = 2;
493      Hp(-1) = TO_W_(val);
494      Hp(0)  = hi;
495    } else {
496       if ( val != 0::L_ ) {
497         s = 1;
498         Hp(0) = TO_W_(val);
499      } else /* val==0 */  {
500       s = 0;
501      }
502   }
503
504    /* returns (# size  :: Int#, 
505                  data  :: ByteArray# #)
506    */
507    RET_NP(s,p);
508 }
509
510
511 #endif /* SUPPORT_LONG_LONGS */
512
513 /* ToDo: this is shockingly inefficient */
514
515 #ifndef THREADED_RTS
516 section "bss" {
517   mp_tmp1:
518     bits8 [SIZEOF_MP_INT];
519 }
520
521 section "bss" {
522   mp_tmp2:
523     bits8 [SIZEOF_MP_INT];
524 }
525
526 section "bss" {
527   mp_result1:
528     bits8 [SIZEOF_MP_INT];
529 }
530
531 section "bss" {
532   mp_result2:
533     bits8 [SIZEOF_MP_INT];
534 }
535 #endif
536
537 #ifdef THREADED_RTS
538 #define FETCH_MP_TEMP(X) \
539 W_ X; \
540 X = BaseReg + (OFFSET_StgRegTable_r ## X);
541 #else
542 #define FETCH_MP_TEMP(X) /* Nothing */
543 #endif
544
545 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
546 name                                                                    \
547 {                                                                       \
548   CInt s1, s2;                                                          \
549   W_ d1, d2;                                                            \
550   FETCH_MP_TEMP(mp_tmp1);                                               \
551   FETCH_MP_TEMP(mp_tmp2);                                               \
552   FETCH_MP_TEMP(mp_result1)                                             \
553   FETCH_MP_TEMP(mp_result2);                                            \
554                                                                         \
555   /* call doYouWantToGC() */                                            \
556   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
557                                                                         \
558   s1 = W_TO_INT(R1);                                                    \
559   d1 = R2;                                                              \
560   s2 = W_TO_INT(R3);                                                    \
561   d2 = R4;                                                              \
562                                                                         \
563   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
564   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
565   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
566   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
567   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
568   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
569                                                                         \
570   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
571                                                                         \
572   /* Perform the operation */                                           \
573   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
574                                                                         \
575   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
576          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
577 }
578
579 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1;                                                              \
583   W_ d1;                                                                \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_result1)                                             \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR, name);                                               \
589                                                                         \
590   d1 = R2;                                                              \
591   s1 = W_TO_INT(R1);                                                    \
592                                                                         \
593   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
594   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
595   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
596                                                                         \
597   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
598                                                                         \
599   /* Perform the operation */                                           \
600   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
601                                                                         \
602   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
603          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
604 }
605
606 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
607 name                                                                                    \
608 {                                                                                       \
609   CInt s1, s2;                                                                          \
610   W_ d1, d2;                                                                            \
611   FETCH_MP_TEMP(mp_tmp1);                                                               \
612   FETCH_MP_TEMP(mp_tmp2);                                                               \
613   FETCH_MP_TEMP(mp_result1)                                                             \
614   FETCH_MP_TEMP(mp_result2)                                                             \
615                                                                                         \
616   /* call doYouWantToGC() */                                                            \
617   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
618                                                                                         \
619   s1 = W_TO_INT(R1);                                                                    \
620   d1 = R2;                                                                              \
621   s2 = W_TO_INT(R3);                                                                    \
622   d2 = R4;                                                                              \
623                                                                                         \
624   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
625   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
626   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
627   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
628   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
629   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
630                                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
632   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
633                                                                                         \
634   /* Perform the operation */                                                           \
635   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
636                                                                                         \
637   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
638            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
639            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
640            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
641 }
642
643 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
644 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
645 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
646 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
647 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
648 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
649 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
650 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
651 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
652 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
653 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
654
655 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
656 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
657
658 #ifndef THREADED_RTS
659 section "bss" {
660   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
661 }
662 #endif
663
664 gcdIntzh_fast
665 {
666     /* R1 = the first Int#; R2 = the second Int# */
667     W_ r; 
668     FETCH_MP_TEMP(mp_tmp_w);
669
670     W_[mp_tmp_w] = R1;
671     r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
672
673     R1 = r;
674     /* Result parked in R1, return via info-pointer at TOS */
675     jump %ENTRY_CODE(Sp(0));
676 }
677
678
679 gcdIntegerIntzh_fast
680 {
681     /* R1 = s1; R2 = d1; R3 = the int */
682     R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
683     
684     /* Result parked in R1, return via info-pointer at TOS */
685     jump %ENTRY_CODE(Sp(0));
686 }
687
688
689 cmpIntegerIntzh_fast
690 {
691     /* R1 = s1; R2 = d1; R3 = the int */
692     W_ usize, vsize, v_digit, u_digit;
693
694     usize = R1;
695     vsize = 0;
696     v_digit = R3;
697
698     // paraphrased from __gmpz_cmp_si() in the GMP sources
699     if (%gt(v_digit,0)) {
700         vsize = 1;
701     } else { 
702         if (%lt(v_digit,0)) {
703             vsize = -1;
704             v_digit = -v_digit;
705         }
706     }
707
708     if (usize != vsize) {
709         R1 = usize - vsize; 
710         jump %ENTRY_CODE(Sp(0));
711     }
712
713     if (usize == 0) {
714         R1 = 0; 
715         jump %ENTRY_CODE(Sp(0));
716     }
717
718     u_digit = W_[BYTE_ARR_CTS(R2)];
719
720     if (u_digit == v_digit) {
721         R1 = 0; 
722         jump %ENTRY_CODE(Sp(0));
723     }
724
725     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
726         R1 = usize; 
727     } else {
728         R1 = -usize; 
729     }
730
731     jump %ENTRY_CODE(Sp(0));
732 }
733
734 cmpIntegerzh_fast
735 {
736     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
737     W_ usize, vsize, size, up, vp;
738     CInt cmp;
739
740     // paraphrased from __gmpz_cmp() in the GMP sources
741     usize = R1;
742     vsize = R3;
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     if (%lt(usize,0)) { // NB. not <, which is unsigned
755         size = -usize;
756     } else {
757         size = usize;
758     }
759
760     up = BYTE_ARR_CTS(R2);
761     vp = BYTE_ARR_CTS(R4);
762
763     cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
764
765     if (cmp == 0 :: CInt) {
766         R1 = 0; 
767         jump %ENTRY_CODE(Sp(0));
768     }
769
770     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
771         R1 = 1;
772     } else {
773         R1 = (-1); 
774     }
775     /* Result parked in R1, return via info-pointer at TOS */
776     jump %ENTRY_CODE(Sp(0));
777 }
778
779 integer2Intzh_fast
780 {
781     /* R1 = s; R2 = d */
782     W_ r, s;
783
784     s = R1;
785     if (s == 0) {
786         r = 0;
787     } else {
788         r = W_[R2 + SIZEOF_StgArrWords];
789         if (%lt(s,0)) {
790             r = -r;
791         }
792     }
793     /* Result parked in R1, return via info-pointer at TOS */
794     R1 = r;
795     jump %ENTRY_CODE(Sp(0));
796 }
797
798 integer2Wordzh_fast
799 {
800   /* R1 = s; R2 = d */
801   W_ r, s;
802
803   s = R1;
804   if (s == 0) {
805     r = 0;
806   } else {
807     r = W_[R2 + SIZEOF_StgArrWords];
808     if (%lt(s,0)) {
809         r = -r;
810     }
811   }
812   /* Result parked in R1, return via info-pointer at TOS */
813   R1 = r;
814   jump %ENTRY_CODE(Sp(0));
815 }
816
817 decodeFloatzh_fast
818
819     W_ p;
820     F_ arg;
821     FETCH_MP_TEMP(mp_tmp1);
822     FETCH_MP_TEMP(mp_tmp_w);
823     
824     /* arguments: F1 = Float# */
825     arg = F1;
826     
827     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
828     
829     /* Be prepared to tell Lennart-coded __decodeFloat
830        where mantissa._mp_d can be put (it does not care about the rest) */
831     p = Hp - SIZEOF_StgArrWords;
832     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
833     StgArrWords_words(p) = 1;
834     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
835     
836     /* Perform the operation */
837     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
838     
839     /* returns: (Int# (expn), Int#, ByteArray#) */
840     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
841 }
842
843 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
844 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
845
846 decodeDoublezh_fast
847
848     D_ arg;
849     W_ p;
850     FETCH_MP_TEMP(mp_tmp1);
851     FETCH_MP_TEMP(mp_tmp_w);
852
853     /* arguments: D1 = Double# */
854     arg = D1;
855
856     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
857     
858     /* Be prepared to tell Lennart-coded __decodeDouble
859        where mantissa.d can be put (it does not care about the rest) */
860     p = Hp - ARR_SIZE + WDS(1);
861     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
862     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
863     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
864
865     /* Perform the operation */
866     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
867     
868     /* returns: (Int# (expn), Int#, ByteArray#) */
869     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
870 }
871
872 /* -----------------------------------------------------------------------------
873  * Concurrency primitives
874  * -------------------------------------------------------------------------- */
875
876 forkzh_fast
877 {
878   /* args: R1 = closure to spark */
879
880   MAYBE_GC(R1_PTR, forkzh_fast);
881
882   W_ closure;
883   W_ threadid;
884   closure = R1;
885
886   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
887                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
888                                 closure "ptr") [];
889   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
890
891   // switch at the earliest opportunity
892   CInt[context_switch] = 1 :: CInt;
893   
894   RET_P(threadid);
895 }
896
897 forkOnzh_fast
898 {
899   /* args: R1 = cpu, R2 = closure to spark */
900
901   MAYBE_GC(R2_PTR, forkOnzh_fast);
902
903   W_ cpu;
904   W_ closure;
905   W_ threadid;
906   cpu = R1;
907   closure = R2;
908
909   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
910                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
911                                 closure "ptr") [];
912   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
913
914   // switch at the earliest opportunity
915   CInt[context_switch] = 1 :: CInt;
916   
917   RET_P(threadid);
918 }
919
920 yieldzh_fast
921 {
922   jump stg_yield_noregs;
923 }
924
925 myThreadIdzh_fast
926 {
927   /* no args. */
928   RET_P(CurrentTSO);
929 }
930
931 labelThreadzh_fast
932 {
933   /* args: 
934         R1 = ThreadId#
935         R2 = Addr# */
936 #ifdef DEBUG
937   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
938 #endif
939   jump %ENTRY_CODE(Sp(0));
940 }
941
942 isCurrentThreadBoundzh_fast
943 {
944   /* no args */
945   W_ r;
946   r = foreign "C" isThreadBound(CurrentTSO) [];
947   RET_N(r);
948 }
949
950
951 /* -----------------------------------------------------------------------------
952  * TVar primitives
953  * -------------------------------------------------------------------------- */
954
955 #ifdef REG_R1
956 #define SP_OFF 0
957 #define IF_NOT_REG_R1(x) 
958 #else
959 #define SP_OFF 1
960 #define IF_NOT_REG_R1(x) x
961 #endif
962
963 // Catch retry frame ------------------------------------------------------------
964
965 #define CATCH_RETRY_FRAME_ERROR(label) \
966   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
967
968 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
969 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
970 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
971 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
972 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
973 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
974 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
975 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
976
977 #if MAX_VECTORED_RTN > 8
978 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
979 #endif
980
981 #if defined(PROFILING)
982 #define CATCH_RETRY_FRAME_BITMAP 7
983 #define CATCH_RETRY_FRAME_WORDS  6
984 #else
985 #define CATCH_RETRY_FRAME_BITMAP 1
986 #define CATCH_RETRY_FRAME_WORDS  4
987 #endif
988
989 INFO_TABLE_RET(stg_catch_retry_frame,
990                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
991                CATCH_RETRY_FRAME,
992                stg_catch_retry_frame_0_ret,
993                stg_catch_retry_frame_1_ret,
994                stg_catch_retry_frame_2_ret,
995                stg_catch_retry_frame_3_ret,
996                stg_catch_retry_frame_4_ret,
997                stg_catch_retry_frame_5_ret,
998                stg_catch_retry_frame_6_ret,
999                stg_catch_retry_frame_7_ret)
1000 {
1001    W_ r, frame, trec, outer;
1002    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1003
1004    frame = Sp;
1005    trec = StgTSO_trec(CurrentTSO);
1006    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1007    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1008    if (r != 0) {
1009      /* Succeeded (either first branch or second branch) */
1010      StgTSO_trec(CurrentTSO) = outer;
1011      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1012      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1013      jump %ENTRY_CODE(Sp(SP_OFF));
1014    } else {
1015      /* Did not commit: retry */
1016      W_ new_trec;
1017      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1018      StgTSO_trec(CurrentTSO) = new_trec;
1019      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1020        R1 = StgCatchRetryFrame_alt_code(frame);
1021      } else {
1022        R1 = StgCatchRetryFrame_first_code(frame);
1023        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1024      }
1025      jump stg_ap_v_fast;
1026    }
1027 }
1028
1029
1030 // Atomically frame -------------------------------------------------------------
1031
1032
1033 #define ATOMICALLY_FRAME_ERROR(label) \
1034   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1035
1036 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1037 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1038 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1039 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1040 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1041 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1042 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1043 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1044
1045 #if MAX_VECTORED_RTN > 8
1046 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1047 #endif
1048
1049 #if defined(PROFILING)
1050 #define ATOMICALLY_FRAME_BITMAP 3
1051 #define ATOMICALLY_FRAME_WORDS  3
1052 #else
1053 #define ATOMICALLY_FRAME_BITMAP 0
1054 #define ATOMICALLY_FRAME_WORDS  1
1055 #endif
1056
1057
1058 INFO_TABLE_RET(stg_atomically_frame,
1059                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1060                ATOMICALLY_FRAME,
1061                stg_atomically_frame_0_ret,
1062                stg_atomically_frame_1_ret,
1063                stg_atomically_frame_2_ret,
1064                stg_atomically_frame_3_ret,
1065                stg_atomically_frame_4_ret,
1066                stg_atomically_frame_5_ret,
1067                stg_atomically_frame_6_ret,
1068                stg_atomically_frame_7_ret)
1069 {
1070   W_ frame, trec, valid;
1071   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1072
1073   frame = Sp;
1074   trec = StgTSO_trec(CurrentTSO);
1075
1076   /* The TSO is not currently waiting: try to commit the transaction */
1077   valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1078   if (valid != 0) {
1079     /* Transaction was valid: commit succeeded */
1080     StgTSO_trec(CurrentTSO) = NO_TREC;
1081     Sp = Sp + SIZEOF_StgAtomicallyFrame;
1082     IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1083     jump %ENTRY_CODE(Sp(SP_OFF));
1084   } else {
1085     /* Transaction was not valid: try again */
1086     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1087     StgTSO_trec(CurrentTSO) = trec;
1088     R1 = StgAtomicallyFrame_code(frame);
1089     jump stg_ap_v_fast;
1090   }
1091 }
1092
1093 INFO_TABLE_RET(stg_atomically_waiting_frame,
1094                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1095                ATOMICALLY_FRAME,
1096                stg_atomically_frame_0_ret,
1097                stg_atomically_frame_1_ret,
1098                stg_atomically_frame_2_ret,
1099                stg_atomically_frame_3_ret,
1100                stg_atomically_frame_4_ret,
1101                stg_atomically_frame_5_ret,
1102                stg_atomically_frame_6_ret,
1103                stg_atomically_frame_7_ret)
1104 {
1105   W_ frame, trec, valid;
1106   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1107
1108   frame = Sp;
1109
1110   /* The TSO is currently waiting: should we stop waiting? */
1111   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1112   if (valid != 0) {
1113     /* Previous attempt is still valid: no point trying again yet */
1114           IF_NOT_REG_R1(Sp_adj(-2);
1115                         Sp(1) = stg_NO_FINALIZER_closure;
1116                         Sp(0) = stg_ut_1_0_unreg_info;)
1117     jump stg_block_noregs;
1118   } else {
1119     /* Previous attempt is no longer valid: try again */
1120     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1121     StgTSO_trec(CurrentTSO) = trec;
1122     StgHeader_info(frame) = stg_atomically_frame_info;
1123     R1 = StgAtomicallyFrame_code(frame);
1124     jump stg_ap_v_fast;
1125   }
1126 }
1127
1128 // STM catch frame --------------------------------------------------------------
1129
1130 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1131    label                                                   \
1132    {                                                       \
1133       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1134       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1135       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1136       jump ret;                                            \
1137    }
1138
1139 #ifdef REG_R1
1140 #define SP_OFF 0
1141 #else
1142 #define SP_OFF 1
1143 #endif
1144
1145 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1146 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1147 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1148 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1149 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1150 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1151 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1152 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1153
1154 #if MAX_VECTORED_RTN > 8
1155 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1156 #endif
1157
1158 #if defined(PROFILING)
1159 #define CATCH_STM_FRAME_BITMAP 3
1160 #define CATCH_STM_FRAME_WORDS  3
1161 #else
1162 #define CATCH_STM_FRAME_BITMAP 0
1163 #define CATCH_STM_FRAME_WORDS  1
1164 #endif
1165
1166 /* Catch frames are very similar to update frames, but when entering
1167  * one we just pop the frame off the stack and perform the correct
1168  * kind of return to the activation record underneath us on the stack.
1169  */
1170
1171 INFO_TABLE_RET(stg_catch_stm_frame,
1172                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1173                CATCH_STM_FRAME,
1174                stg_catch_stm_frame_0_ret,
1175                stg_catch_stm_frame_1_ret,
1176                stg_catch_stm_frame_2_ret,
1177                stg_catch_stm_frame_3_ret,
1178                stg_catch_stm_frame_4_ret,
1179                stg_catch_stm_frame_5_ret,
1180                stg_catch_stm_frame_6_ret,
1181                stg_catch_stm_frame_7_ret)
1182 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1183
1184
1185 // Primop definition ------------------------------------------------------------
1186
1187 atomicallyzh_fast
1188 {
1189   W_ frame;
1190   W_ old_trec;
1191   W_ new_trec;
1192   
1193   // stmStartTransaction may allocate
1194   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1195
1196   /* Args: R1 = m :: STM a */
1197   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1198
1199   old_trec = StgTSO_trec(CurrentTSO);
1200
1201   /* Nested transactions are not allowed; raise an exception */
1202   if (old_trec != NO_TREC) {
1203      R1 = base_GHCziIOBase_NestedAtomically_closure;
1204      jump raisezh_fast;
1205   }
1206
1207   /* Set up the atomically frame */
1208   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1209   frame = Sp;
1210
1211   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1212   StgAtomicallyFrame_code(frame) = R1;
1213
1214   /* Start the memory transcation */
1215   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1216   StgTSO_trec(CurrentTSO) = new_trec;
1217
1218   /* Apply R1 to the realworld token */
1219   jump stg_ap_v_fast;
1220 }
1221
1222
1223 catchSTMzh_fast
1224 {
1225   W_ frame;
1226   
1227   /* Args: R1 :: STM a */
1228   /* Args: R2 :: Exception -> STM a */
1229   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1230
1231   /* Set up the catch frame */
1232   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1233   frame = Sp;
1234
1235   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1236   StgCatchSTMFrame_handler(frame) = R2;
1237
1238   /* Apply R1 to the realworld token */
1239   jump stg_ap_v_fast;
1240 }
1241
1242
1243 catchRetryzh_fast
1244 {
1245   W_ frame;
1246   W_ new_trec;
1247   W_ trec;
1248
1249   // stmStartTransaction may allocate
1250   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1251
1252   /* Args: R1 :: STM a */
1253   /* Args: R2 :: STM a */
1254   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1255
1256   /* Start a nested transaction within which to run the first code */
1257   trec = StgTSO_trec(CurrentTSO);
1258   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1259   StgTSO_trec(CurrentTSO) = new_trec;
1260
1261   /* Set up the catch-retry frame */
1262   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1263   frame = Sp;
1264   
1265   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1266   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1267   StgCatchRetryFrame_first_code(frame) = R1;
1268   StgCatchRetryFrame_alt_code(frame) = R2;
1269   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1270
1271   /* Apply R1 to the realworld token */
1272   jump stg_ap_v_fast;
1273 }
1274
1275
1276 retryzh_fast
1277 {
1278   W_ frame_type;
1279   W_ frame;
1280   W_ trec;
1281   W_ outer;
1282   W_ r;
1283
1284   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1285
1286   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1287 retry_pop_stack:
1288   trec = StgTSO_trec(CurrentTSO);
1289   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1290   StgTSO_sp(CurrentTSO) = Sp;
1291   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1292   Sp = StgTSO_sp(CurrentTSO);
1293   frame = Sp;
1294
1295   if (frame_type == CATCH_RETRY_FRAME) {
1296     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1297     ASSERT(outer != NO_TREC);
1298     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1299       // Retry in the first code: try the alternative
1300       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1301       StgTSO_trec(CurrentTSO) = trec;
1302       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1303       R1 = StgCatchRetryFrame_alt_code(frame);
1304       jump stg_ap_v_fast;
1305     } else {
1306       // Retry in the alternative code: propagate
1307       W_ other_trec;
1308       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1309       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr") [];
1310       if (r != 0) {
1311         r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1312       } else {
1313         foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1314       }
1315       if (r != 0) {
1316         // Merge between siblings succeeded: commit it back to enclosing transaction
1317         // and then propagate the retry
1318         StgTSO_trec(CurrentTSO) = outer;
1319         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1320         goto retry_pop_stack;
1321       } else {
1322         // Merge failed: we musn't propagate the retry.  Try both paths again.
1323         "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1324         StgCatchRetryFrame_first_code_trec(frame) = trec;
1325         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1326         StgTSO_trec(CurrentTSO) = trec;
1327         R1 = StgCatchRetryFrame_first_code(frame);
1328         jump stg_ap_v_fast;
1329       }
1330     }
1331   }
1332
1333   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1334   ASSERT(frame_type == ATOMICALLY_FRAME);
1335   ASSERT(outer == NO_TREC);
1336   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1337   if (r != 0) {
1338     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1339     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1340     Sp = frame;
1341     // Fix up the stack in the unregisterised case: the return convention is different.
1342     IF_NOT_REG_R1(Sp_adj(-2); 
1343                   Sp(1) = stg_NO_FINALIZER_closure;
1344                   Sp(0) = stg_ut_1_0_unreg_info;)
1345     R3 = trec; // passing to stmWaitUnblock()
1346     jump stg_block_stmwait;
1347   } else {
1348     // Transaction was not valid: retry immediately
1349     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1350     StgTSO_trec(CurrentTSO) = trec;
1351     R1 = StgAtomicallyFrame_code(frame);
1352     Sp = frame;
1353     jump stg_ap_v_fast;
1354   }
1355 }
1356
1357
1358 newTVarzh_fast
1359 {
1360   W_ tv;
1361   W_ new_value;
1362
1363   /* Args: R1 = initialisation value */
1364
1365   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1366   new_value = R1;
1367   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1368   RET_P(tv);
1369 }
1370
1371
1372 readTVarzh_fast
1373 {
1374   W_ trec;
1375   W_ tvar;
1376   W_ result;
1377
1378   /* Args: R1 = TVar closure */
1379
1380   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1381   trec = StgTSO_trec(CurrentTSO);
1382   tvar = R1;
1383   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1384
1385   RET_P(result);
1386 }
1387
1388
1389 writeTVarzh_fast
1390 {
1391   W_ trec;
1392   W_ tvar;
1393   W_ new_value;
1394   
1395   /* Args: R1 = TVar closure */
1396   /*       R2 = New value    */
1397
1398   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1399   trec = StgTSO_trec(CurrentTSO);
1400   tvar = R1;
1401   new_value = R2;
1402   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1403
1404   jump %ENTRY_CODE(Sp(0));
1405 }
1406
1407
1408 /* -----------------------------------------------------------------------------
1409  * MVar primitives
1410  *
1411  * take & putMVar work as follows.  Firstly, an important invariant:
1412  *
1413  *    If the MVar is full, then the blocking queue contains only
1414  *    threads blocked on putMVar, and if the MVar is empty then the
1415  *    blocking queue contains only threads blocked on takeMVar.
1416  *
1417  * takeMvar:
1418  *    MVar empty : then add ourselves to the blocking queue
1419  *    MVar full  : remove the value from the MVar, and
1420  *                 blocking queue empty     : return
1421  *                 blocking queue non-empty : perform the first blocked putMVar
1422  *                                            from the queue, and wake up the
1423  *                                            thread (MVar is now full again)
1424  *
1425  * putMVar is just the dual of the above algorithm.
1426  *
1427  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1428  * the stack of the thread waiting to do the putMVar.  See
1429  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1430  * the stack layout, and the PerformPut and PerformTake macros below.
1431  *
1432  * It is important that a blocked take or put is woken up with the
1433  * take/put already performed, because otherwise there would be a
1434  * small window of vulnerability where the thread could receive an
1435  * exception and never perform its take or put, and we'd end up with a
1436  * deadlock.
1437  *
1438  * -------------------------------------------------------------------------- */
1439
1440 isEmptyMVarzh_fast
1441 {
1442     /* args: R1 = MVar closure */
1443
1444     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1445         RET_N(1);
1446     } else {
1447         RET_N(0);
1448     }
1449 }
1450
1451 newMVarzh_fast
1452 {
1453     /* args: none */
1454     W_ mvar;
1455
1456     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1457   
1458     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1459     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1460     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1461     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1462     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1463     RET_P(mvar);
1464 }
1465
1466
1467 /* If R1 isn't available, pass it on the stack */
1468 #ifdef REG_R1
1469 #define PerformTake(tso, value)                         \
1470     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1471     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1472 #else
1473 #define PerformTake(tso, value)                                 \
1474     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1475     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1476 #endif
1477
1478 #define PerformPut(tso,lval)                    \
1479     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1480     lval = W_[StgTSO_sp(tso) - WDS(1)];
1481
1482 takeMVarzh_fast
1483 {
1484     W_ mvar, val, info, tso;
1485
1486     /* args: R1 = MVar closure */
1487     mvar = R1;
1488
1489 #if defined(THREADED_RTS)
1490     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1491 #else
1492     info = GET_INFO(mvar);
1493 #endif
1494
1495     /* If the MVar is empty, put ourselves on its blocking queue,
1496      * and wait until we're woken up.
1497      */
1498     if (info == stg_EMPTY_MVAR_info) {
1499         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1500             StgMVar_head(mvar) = CurrentTSO;
1501         } else {
1502             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1503         }
1504         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1505         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1506         StgTSO_block_info(CurrentTSO)  = mvar;
1507         StgMVar_tail(mvar) = CurrentTSO;
1508         
1509         jump stg_block_takemvar;
1510   }
1511
1512   /* we got the value... */
1513   val = StgMVar_value(mvar);
1514
1515   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1516   {
1517       /* There are putMVar(s) waiting... 
1518        * wake up the first thread on the queue
1519        */
1520       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1521
1522       /* actually perform the putMVar for the thread that we just woke up */
1523       tso = StgMVar_head(mvar);
1524       PerformPut(tso,StgMVar_value(mvar));
1525       dirtyTSO(tso);
1526
1527 #if defined(GRAN) || defined(PAR)
1528       /* ToDo: check 2nd arg (mvar) is right */
1529       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1530       StgMVar_head(mvar) = tso;
1531 #else
1532       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1533                                          StgMVar_head(mvar) "ptr") [];
1534       StgMVar_head(mvar) = tso;
1535 #endif
1536
1537       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1538           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1539       }
1540
1541 #if defined(THREADED_RTS)
1542       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1543 #endif
1544       RET_P(val);
1545   } 
1546   else
1547   {
1548       /* No further putMVars, MVar is now empty */
1549       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1550  
1551 #if defined(THREADED_RTS)
1552       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1553 #else
1554       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1555 #endif
1556
1557       RET_P(val);
1558   }
1559 }
1560
1561
1562 tryTakeMVarzh_fast
1563 {
1564     W_ mvar, val, info, tso;
1565
1566     /* args: R1 = MVar closure */
1567
1568     mvar = R1;
1569
1570 #if defined(THREADED_RTS)
1571     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1572 #else
1573     info = GET_INFO(mvar);
1574 #endif
1575
1576     if (info == stg_EMPTY_MVAR_info) {
1577 #if defined(THREADED_RTS)
1578         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1579 #endif
1580         /* HACK: we need a pointer to pass back, 
1581          * so we abuse NO_FINALIZER_closure
1582          */
1583         RET_NP(0, stg_NO_FINALIZER_closure);
1584     }
1585
1586     /* we got the value... */
1587     val = StgMVar_value(mvar);
1588
1589     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1590
1591         /* There are putMVar(s) waiting... 
1592          * wake up the first thread on the queue
1593          */
1594         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1595
1596         /* actually perform the putMVar for the thread that we just woke up */
1597         tso = StgMVar_head(mvar);
1598         PerformPut(tso,StgMVar_value(mvar));
1599         dirtyTSO(tso);
1600
1601 #if defined(GRAN) || defined(PAR)
1602         /* ToDo: check 2nd arg (mvar) is right */
1603         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1604         StgMVar_head(mvar) = tso;
1605 #else
1606         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1607                                            StgMVar_head(mvar) "ptr") [];
1608         StgMVar_head(mvar) = tso;
1609 #endif
1610
1611         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1612             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1613         }
1614 #if defined(THREADED_RTS)
1615         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1616 #endif
1617     }
1618     else 
1619     {
1620         /* No further putMVars, MVar is now empty */
1621         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1622 #if defined(THREADED_RTS)
1623         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1624 #else
1625         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1626 #endif
1627     }
1628     
1629     RET_NP(1, val);
1630 }
1631
1632
1633 putMVarzh_fast
1634 {
1635     W_ mvar, info, tso;
1636
1637     /* args: R1 = MVar, R2 = value */
1638     mvar = R1;
1639
1640 #if defined(THREADED_RTS)
1641     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1642 #else
1643     info = GET_INFO(mvar);
1644 #endif
1645
1646     if (info == stg_FULL_MVAR_info) {
1647         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1648             StgMVar_head(mvar) = CurrentTSO;
1649         } else {
1650             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1651         }
1652         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1653         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1654         StgTSO_block_info(CurrentTSO)  = mvar;
1655         StgMVar_tail(mvar) = CurrentTSO;
1656         
1657         jump stg_block_putmvar;
1658     }
1659   
1660     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1661
1662         /* There are takeMVar(s) waiting: wake up the first one
1663          */
1664         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1665
1666         /* actually perform the takeMVar */
1667         tso = StgMVar_head(mvar);
1668         PerformTake(tso, R2);
1669         dirtyTSO(tso);
1670       
1671 #if defined(GRAN) || defined(PAR)
1672         /* ToDo: check 2nd arg (mvar) is right */
1673         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1674         StgMVar_head(mvar) = tso;
1675 #else
1676         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1677         StgMVar_head(mvar) = tso;
1678 #endif
1679
1680         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1681             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1682         }
1683
1684 #if defined(THREADED_RTS)
1685         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1686 #endif
1687         jump %ENTRY_CODE(Sp(0));
1688     }
1689     else
1690     {
1691         /* No further takes, the MVar is now full. */
1692         StgMVar_value(mvar) = R2;
1693
1694 #if defined(THREADED_RTS)
1695         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1696 #else
1697         SET_INFO(mvar,stg_FULL_MVAR_info);
1698 #endif
1699         jump %ENTRY_CODE(Sp(0));
1700     }
1701     
1702     /* ToDo: yield afterward for better communication performance? */
1703 }
1704
1705
1706 tryPutMVarzh_fast
1707 {
1708     W_ mvar, info, tso;
1709
1710     /* args: R1 = MVar, R2 = value */
1711     mvar = R1;
1712
1713 #if defined(THREADED_RTS)
1714     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1715 #else
1716     info = GET_INFO(mvar);
1717 #endif
1718
1719     if (info == stg_FULL_MVAR_info) {
1720 #if defined(THREADED_RTS)
1721         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1722 #endif
1723         RET_N(0);
1724     }
1725   
1726     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1727
1728         /* There are takeMVar(s) waiting: wake up the first one
1729          */
1730         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1731         
1732         /* actually perform the takeMVar */
1733         tso = StgMVar_head(mvar);
1734         PerformTake(tso, R2);
1735         dirtyTSO(tso);
1736       
1737 #if defined(GRAN) || defined(PAR)
1738         /* ToDo: check 2nd arg (mvar) is right */
1739         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1740         StgMVar_head(mvar) = tso;
1741 #else
1742         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1743         StgMVar_head(mvar) = tso;
1744 #endif
1745
1746         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1747             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1748         }
1749
1750 #if defined(THREADED_RTS)
1751         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
1752 #endif
1753     }
1754     else
1755     {
1756         /* No further takes, the MVar is now full. */
1757         StgMVar_value(mvar) = R2;
1758
1759 #if defined(THREADED_RTS)
1760         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
1761 #else
1762         SET_INFO(mvar,stg_FULL_MVAR_info);
1763 #endif
1764     }
1765     
1766     RET_N(1);
1767     /* ToDo: yield afterward for better communication performance? */
1768 }
1769
1770
1771 /* -----------------------------------------------------------------------------
1772    Stable pointer primitives
1773    -------------------------------------------------------------------------  */
1774
1775 makeStableNamezh_fast
1776 {
1777     W_ index, sn_obj;
1778
1779     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1780   
1781     index = foreign "C" lookupStableName(R1 "ptr") [];
1782
1783     /* Is there already a StableName for this heap object?
1784      *  stable_ptr_table is a pointer to an array of snEntry structs.
1785      */
1786     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1787         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1788         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1789         StgStableName_sn(sn_obj) = index;
1790         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1791     } else {
1792         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1793     }
1794     
1795     RET_P(sn_obj);
1796 }
1797
1798
1799 makeStablePtrzh_fast
1800 {
1801     /* Args: R1 = a */
1802     W_ sp;
1803     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1804     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1805     RET_N(sp);
1806 }
1807
1808 deRefStablePtrzh_fast
1809 {
1810     /* Args: R1 = the stable ptr */
1811     W_ r, sp;
1812     sp = R1;
1813     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1814     RET_P(r);
1815 }
1816
1817 /* -----------------------------------------------------------------------------
1818    Bytecode object primitives
1819    -------------------------------------------------------------------------  */
1820
1821 newBCOzh_fast
1822 {
1823     /* R1 = instrs
1824        R2 = literals
1825        R3 = ptrs
1826        R4 = itbls
1827        R5 = arity
1828        R6 = bitmap array
1829     */
1830     W_ bco, bitmap_arr, bytes, words;
1831     
1832     bitmap_arr = R6;
1833     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1834     bytes = WDS(words);
1835
1836     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1837
1838     bco = Hp - bytes + WDS(1);
1839     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1840     
1841     StgBCO_instrs(bco)     = R1;
1842     StgBCO_literals(bco)   = R2;
1843     StgBCO_ptrs(bco)       = R3;
1844     StgBCO_itbls(bco)      = R4;
1845     StgBCO_arity(bco)      = HALF_W_(R5);
1846     StgBCO_size(bco)       = HALF_W_(words);
1847     
1848     // Copy the arity/bitmap info into the BCO
1849     W_ i;
1850     i = 0;
1851 for:
1852     if (i < StgArrWords_words(bitmap_arr)) {
1853         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1854         i = i + 1;
1855         goto for;
1856     }
1857     
1858     RET_P(bco);
1859 }
1860
1861
1862 mkApUpd0zh_fast
1863 {
1864     // R1 = the BCO# for the AP
1865     //  
1866     W_ ap;
1867
1868     // This function is *only* used to wrap zero-arity BCOs in an
1869     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1870     // saturated and always points directly to a FUN or BCO.
1871     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1872            StgBCO_arity(R1) == HALF_W_(0));
1873
1874     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1875     TICK_ALLOC_UP_THK(0, 0);
1876     CCCS_ALLOC(SIZEOF_StgAP);
1877
1878     ap = Hp - SIZEOF_StgAP + WDS(1);
1879     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1880     
1881     StgAP_n_args(ap) = HALF_W_(0);
1882     StgAP_fun(ap) = R1;
1883     
1884     RET_P(ap);
1885 }
1886
1887 /* -----------------------------------------------------------------------------
1888    Thread I/O blocking primitives
1889    -------------------------------------------------------------------------- */
1890
1891 /* Add a thread to the end of the blocked queue. (C-- version of the C
1892  * macro in Schedule.h).
1893  */
1894 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1895     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1896     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1897       W_[blocked_queue_hd] = tso;                       \
1898     } else {                                            \
1899       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1900     }                                                   \
1901     W_[blocked_queue_tl] = tso;
1902
1903 waitReadzh_fast
1904 {
1905     /* args: R1 */
1906 #ifdef THREADED_RTS
1907     foreign "C" barf("waitRead# on threaded RTS");
1908 #else
1909
1910     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1911     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1912     StgTSO_block_info(CurrentTSO) = R1;
1913     // No locking - we're not going to use this interface in the
1914     // threaded RTS anyway.
1915     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1916     jump stg_block_noregs;
1917 #endif
1918 }
1919
1920 waitWritezh_fast
1921 {
1922     /* args: R1 */
1923 #ifdef THREADED_RTS
1924     foreign "C" barf("waitWrite# on threaded RTS");
1925 #else
1926
1927     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1928     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1929     StgTSO_block_info(CurrentTSO) = R1;
1930     // No locking - we're not going to use this interface in the
1931     // threaded RTS anyway.
1932     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1933     jump stg_block_noregs;
1934 #endif
1935 }
1936
1937
1938 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1939 delayzh_fast
1940 {
1941 #ifdef mingw32_HOST_OS
1942     W_ ares;
1943     CInt reqID;
1944 #else
1945     W_ t, prev, target;
1946 #endif
1947
1948 #ifdef THREADED_RTS
1949     foreign "C" barf("delay# on threaded RTS");
1950 #else
1951
1952     /* args: R1 (microsecond delay amount) */
1953     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1954     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1955
1956 #ifdef mingw32_HOST_OS
1957
1958     /* could probably allocate this on the heap instead */
1959     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1960                                             stg_delayzh_malloc_str);
1961     reqID = foreign "C" addDelayRequest(R1);
1962     StgAsyncIOResult_reqID(ares)   = reqID;
1963     StgAsyncIOResult_len(ares)     = 0;
1964     StgAsyncIOResult_errCode(ares) = 0;
1965     StgTSO_block_info(CurrentTSO)  = ares;
1966
1967     /* Having all async-blocked threads reside on the blocked_queue
1968      * simplifies matters, so change the status to OnDoProc put the
1969      * delayed thread on the blocked_queue.
1970      */
1971     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1972     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1973     jump stg_block_async_void;
1974
1975 #else
1976
1977     W_ time;
1978     time = foreign "C" getourtimeofday() [R1];
1979     target = (R1 / (TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000)) + time;
1980     StgTSO_block_info(CurrentTSO) = target;
1981
1982     /* Insert the new thread in the sleeping queue. */
1983     prev = NULL;
1984     t = W_[sleeping_queue];
1985 while:
1986     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1987         prev = t;
1988         t = StgTSO_link(t);
1989         goto while;
1990     }
1991
1992     StgTSO_link(CurrentTSO) = t;
1993     if (prev == NULL) {
1994         W_[sleeping_queue] = CurrentTSO;
1995     } else {
1996         StgTSO_link(prev) = CurrentTSO;
1997     }
1998     jump stg_block_noregs;
1999 #endif
2000 #endif /* !THREADED_RTS */
2001 }
2002
2003
2004 #ifdef mingw32_HOST_OS
2005 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2006 asyncReadzh_fast
2007 {
2008     W_ ares;
2009     CInt reqID;
2010
2011 #ifdef THREADED_RTS
2012     foreign "C" barf("asyncRead# on threaded RTS");
2013 #else
2014
2015     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2016     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2017     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2018
2019     /* could probably allocate this on the heap instead */
2020     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2021                                             stg_asyncReadzh_malloc_str)
2022                         [R1,R2,R3,R4];
2023     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2024     StgAsyncIOResult_reqID(ares)   = reqID;
2025     StgAsyncIOResult_len(ares)     = 0;
2026     StgAsyncIOResult_errCode(ares) = 0;
2027     StgTSO_block_info(CurrentTSO)  = ares;
2028     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2029     jump stg_block_async;
2030 #endif
2031 }
2032
2033 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2034 asyncWritezh_fast
2035 {
2036     W_ ares;
2037     CInt reqID;
2038
2039 #ifdef THREADED_RTS
2040     foreign "C" barf("asyncWrite# on threaded RTS");
2041 #else
2042
2043     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2044     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2045     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2046
2047     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2048                                             stg_asyncWritezh_malloc_str)
2049                         [R1,R2,R3,R4];
2050     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2051
2052     StgAsyncIOResult_reqID(ares)   = reqID;
2053     StgAsyncIOResult_len(ares)     = 0;
2054     StgAsyncIOResult_errCode(ares) = 0;
2055     StgTSO_block_info(CurrentTSO)  = ares;
2056     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2057     jump stg_block_async;
2058 #endif
2059 }
2060
2061 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2062 asyncDoProczh_fast
2063 {
2064     W_ ares;
2065     CInt reqID;
2066
2067 #ifdef THREADED_RTS
2068     foreign "C" barf("asyncDoProc# on threaded RTS");
2069 #else
2070
2071     /* args: R1 = proc, R2 = param */
2072     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2073     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2074
2075     /* could probably allocate this on the heap instead */
2076     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2077                                             stg_asyncDoProczh_malloc_str) 
2078                                 [R1,R2];
2079     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2080     StgAsyncIOResult_reqID(ares)   = reqID;
2081     StgAsyncIOResult_len(ares)     = 0;
2082     StgAsyncIOResult_errCode(ares) = 0;
2083     StgTSO_block_info(CurrentTSO) = ares;
2084     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2085     jump stg_block_async;
2086 #endif
2087 }
2088 #endif
2089
2090 /* -----------------------------------------------------------------------------
2091   ** temporary **
2092
2093    classes CCallable and CReturnable don't really exist, but the
2094    compiler insists on generating dictionaries containing references
2095    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2096    for these.  Some C compilers can't cope with zero-length static arrays,
2097    so we have to make these one element long.
2098   --------------------------------------------------------------------------- */
2099
2100 section "rodata" {
2101   GHC_ZCCCallable_static_info:   W_ 0;
2102 }
2103
2104 section "rodata" {
2105   GHC_ZCCReturnable_static_info: W_ 0;
2106 }