ad761ab2e4e8f8245f7ff74532ea2990f24423b9
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     ("ptr" p) = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, lo, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    hi = TO_W_(val >> 32);
427    lo = TO_W_(val);
428
429    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(hi,0) ) {
444      neg = 1;
445      lo = -lo;
446      if(lo == 0) {
447        hi = -hi;
448      } else {
449        hi = -hi - 1;
450      }
451    }
452
453    if ( words_needed == 2 )  { 
454       s = 2;
455       Hp(-1) = lo;
456       Hp(0) = hi;
457    } else { 
458        if ( lo != 0 ) {
459            s = 1;
460            Hp(0) = lo;
461        } else /* val==0 */  {
462            s = 0;
463        }
464    }
465    if ( neg != 0 ) {
466         s = -s;
467    }
468
469    /* returns (# size  :: Int#, 
470                  data  :: ByteArray# #)
471    */
472    RET_NP(s,p);
473 }
474 word64ToIntegerzh_fast
475 {
476    /* arguments: L1 = Word64# */
477
478    L_ val;
479    W_ hi, lo, s, words_needed, p;
480
481    val = L1;
482    hi = TO_W_(val >> 32);
483    lo = TO_W_(val);
484
485    if ( hi != 0 ) {
486       words_needed = 2;
487    } else {
488       words_needed = 1;
489    }
490
491    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
492                NO_PTRS, word64ToIntegerzh_fast );
493
494    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
495    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
496    StgArrWords_words(p) = words_needed;
497
498    if ( hi != 0 ) { 
499      s = 2;
500      Hp(-1) = lo;
501      Hp(0)  = hi;
502    } else {
503       if ( lo != 0 ) {
504         s = 1;
505         Hp(0) = lo;
506      } else /* val==0 */  {
507       s = 0;
508      }
509   }
510
511    /* returns (# size  :: Int#, 
512                  data  :: ByteArray# #)
513    */
514    RET_NP(s,p);
515 }
516
517
518
519 #endif /* SUPPORT_LONG_LONGS */
520
521 /* ToDo: this is shockingly inefficient */
522
523 #ifndef THREADED_RTS
524 section "bss" {
525   mp_tmp1:
526     bits8 [SIZEOF_MP_INT];
527 }
528
529 section "bss" {
530   mp_tmp2:
531     bits8 [SIZEOF_MP_INT];
532 }
533
534 section "bss" {
535   mp_result1:
536     bits8 [SIZEOF_MP_INT];
537 }
538
539 section "bss" {
540   mp_result2:
541     bits8 [SIZEOF_MP_INT];
542 }
543 #endif
544
545 #ifdef THREADED_RTS
546 #define FETCH_MP_TEMP(X) \
547 W_ X; \
548 X = BaseReg + (OFFSET_StgRegTable_r ## X);
549 #else
550 #define FETCH_MP_TEMP(X) /* Nothing */
551 #endif
552
553 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
554 name                                                                    \
555 {                                                                       \
556   CInt s1, s2;                                                          \
557   W_ d1, d2;                                                            \
558   FETCH_MP_TEMP(mp_tmp1);                                               \
559   FETCH_MP_TEMP(mp_tmp2);                                               \
560   FETCH_MP_TEMP(mp_result1)                                             \
561   FETCH_MP_TEMP(mp_result2);                                            \
562                                                                         \
563   /* call doYouWantToGC() */                                            \
564   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
565                                                                         \
566   s1 = W_TO_INT(R1);                                                    \
567   d1 = R2;                                                              \
568   s2 = W_TO_INT(R3);                                                    \
569   d2 = R4;                                                              \
570                                                                         \
571   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
572   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
573   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
574   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
575   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
576   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
577                                                                         \
578   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
579                                                                         \
580   /* Perform the operation */                                           \
581   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
582                                                                         \
583   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
584          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
585 }
586
587 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
588 name                                                                    \
589 {                                                                       \
590   CInt s1;                                                              \
591   W_ d1;                                                                \
592   FETCH_MP_TEMP(mp_tmp1);                                               \
593   FETCH_MP_TEMP(mp_result1)                                             \
594                                                                         \
595   /* call doYouWantToGC() */                                            \
596   MAYBE_GC(R2_PTR, name);                                               \
597                                                                         \
598   d1 = R2;                                                              \
599   s1 = W_TO_INT(R1);                                                    \
600                                                                         \
601   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
602   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
603   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
604                                                                         \
605   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
606                                                                         \
607   /* Perform the operation */                                           \
608   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
609                                                                         \
610   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
611          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
612 }
613
614 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
615 name                                                                                    \
616 {                                                                                       \
617   CInt s1, s2;                                                                          \
618   W_ d1, d2;                                                                            \
619   FETCH_MP_TEMP(mp_tmp1);                                                               \
620   FETCH_MP_TEMP(mp_tmp2);                                                               \
621   FETCH_MP_TEMP(mp_result1)                                                             \
622   FETCH_MP_TEMP(mp_result2)                                                             \
623                                                                                         \
624   /* call doYouWantToGC() */                                                            \
625   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
626                                                                                         \
627   s1 = W_TO_INT(R1);                                                                    \
628   d1 = R2;                                                                              \
629   s2 = W_TO_INT(R3);                                                                    \
630   d2 = R4;                                                                              \
631                                                                                         \
632   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
633   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
634   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
635   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
636   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
637   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
638                                                                                         \
639   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
640   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
641                                                                                         \
642   /* Perform the operation */                                                           \
643   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
644                                                                                         \
645   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
646            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
647            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
648            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
649 }
650
651 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
652 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
653 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
654 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
655 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
656 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
657 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
658 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
659 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
660 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
661 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
662
663 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
664 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
665
666 #ifndef THREADED_RTS
667 section "bss" {
668   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
669 }
670 #endif
671
672 gcdIntzh_fast
673 {
674     /* R1 = the first Int#; R2 = the second Int# */
675     W_ r; 
676     FETCH_MP_TEMP(mp_tmp_w);
677
678     W_[mp_tmp_w] = R1;
679     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
680
681     R1 = r;
682     /* Result parked in R1, return via info-pointer at TOS */
683     jump %ENTRY_CODE(Sp(0));
684 }
685
686
687 gcdIntegerIntzh_fast
688 {
689     /* R1 = s1; R2 = d1; R3 = the int */
690     W_ s1;
691     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
692     R1 = s1;
693     
694     /* Result parked in R1, return via info-pointer at TOS */
695     jump %ENTRY_CODE(Sp(0));
696 }
697
698
699 cmpIntegerIntzh_fast
700 {
701     /* R1 = s1; R2 = d1; R3 = the int */
702     W_ usize, vsize, v_digit, u_digit;
703
704     usize = R1;
705     vsize = 0;
706     v_digit = R3;
707
708     // paraphrased from __gmpz_cmp_si() in the GMP sources
709     if (%gt(v_digit,0)) {
710         vsize = 1;
711     } else { 
712         if (%lt(v_digit,0)) {
713             vsize = -1;
714             v_digit = -v_digit;
715         }
716     }
717
718     if (usize != vsize) {
719         R1 = usize - vsize; 
720         jump %ENTRY_CODE(Sp(0));
721     }
722
723     if (usize == 0) {
724         R1 = 0; 
725         jump %ENTRY_CODE(Sp(0));
726     }
727
728     u_digit = W_[BYTE_ARR_CTS(R2)];
729
730     if (u_digit == v_digit) {
731         R1 = 0; 
732         jump %ENTRY_CODE(Sp(0));
733     }
734
735     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
736         R1 = usize; 
737     } else {
738         R1 = -usize; 
739     }
740
741     jump %ENTRY_CODE(Sp(0));
742 }
743
744 cmpIntegerzh_fast
745 {
746     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
747     W_ usize, vsize, size, up, vp;
748     CInt cmp;
749
750     // paraphrased from __gmpz_cmp() in the GMP sources
751     usize = R1;
752     vsize = R3;
753
754     if (usize != vsize) {
755         R1 = usize - vsize; 
756         jump %ENTRY_CODE(Sp(0));
757     }
758
759     if (usize == 0) {
760         R1 = 0; 
761         jump %ENTRY_CODE(Sp(0));
762     }
763
764     if (%lt(usize,0)) { // NB. not <, which is unsigned
765         size = -usize;
766     } else {
767         size = usize;
768     }
769
770     up = BYTE_ARR_CTS(R2);
771     vp = BYTE_ARR_CTS(R4);
772
773     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
774
775     if (cmp == 0 :: CInt) {
776         R1 = 0; 
777         jump %ENTRY_CODE(Sp(0));
778     }
779
780     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
781         R1 = 1;
782     } else {
783         R1 = (-1); 
784     }
785     /* Result parked in R1, return via info-pointer at TOS */
786     jump %ENTRY_CODE(Sp(0));
787 }
788
789 integer2Intzh_fast
790 {
791     /* R1 = s; R2 = d */
792     W_ r, s;
793
794     s = R1;
795     if (s == 0) {
796         r = 0;
797     } else {
798         r = W_[R2 + SIZEOF_StgArrWords];
799         if (%lt(s,0)) {
800             r = -r;
801         }
802     }
803     /* Result parked in R1, return via info-pointer at TOS */
804     R1 = r;
805     jump %ENTRY_CODE(Sp(0));
806 }
807
808 integer2Wordzh_fast
809 {
810   /* R1 = s; R2 = d */
811   W_ r, s;
812
813   s = R1;
814   if (s == 0) {
815     r = 0;
816   } else {
817     r = W_[R2 + SIZEOF_StgArrWords];
818     if (%lt(s,0)) {
819         r = -r;
820     }
821   }
822   /* Result parked in R1, return via info-pointer at TOS */
823   R1 = r;
824   jump %ENTRY_CODE(Sp(0));
825 }
826
827 decodeFloatzh_fast
828
829     W_ p;
830     F_ arg;
831     FETCH_MP_TEMP(mp_tmp1);
832     FETCH_MP_TEMP(mp_tmp_w);
833     
834     /* arguments: F1 = Float# */
835     arg = F1;
836     
837     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
838     
839     /* Be prepared to tell Lennart-coded __decodeFloat
840        where mantissa._mp_d can be put (it does not care about the rest) */
841     p = Hp - SIZEOF_StgArrWords;
842     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
843     StgArrWords_words(p) = 1;
844     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
845     
846     /* Perform the operation */
847     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
848     
849     /* returns: (Int# (expn), Int#, ByteArray#) */
850     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
851 }
852
853 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
854 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
855
856 decodeDoublezh_fast
857
858     D_ arg;
859     W_ p;
860     FETCH_MP_TEMP(mp_tmp1);
861     FETCH_MP_TEMP(mp_tmp_w);
862
863     /* arguments: D1 = Double# */
864     arg = D1;
865
866     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
867     
868     /* Be prepared to tell Lennart-coded __decodeDouble
869        where mantissa.d can be put (it does not care about the rest) */
870     p = Hp - ARR_SIZE + WDS(1);
871     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
872     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
873     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
874
875     /* Perform the operation */
876     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
877     
878     /* returns: (Int# (expn), Int#, ByteArray#) */
879     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
880 }
881
882 /* -----------------------------------------------------------------------------
883  * Concurrency primitives
884  * -------------------------------------------------------------------------- */
885
886 forkzh_fast
887 {
888   /* args: R1 = closure to spark */
889
890   MAYBE_GC(R1_PTR, forkzh_fast);
891
892   W_ closure;
893   W_ threadid;
894   closure = R1;
895
896   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
897                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
898                                 closure "ptr") [];
899   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
900
901   // switch at the earliest opportunity
902   CInt[context_switch] = 1 :: CInt;
903   
904   RET_P(threadid);
905 }
906
907 forkOnzh_fast
908 {
909   /* args: R1 = cpu, R2 = closure to spark */
910
911   MAYBE_GC(R2_PTR, forkOnzh_fast);
912
913   W_ cpu;
914   W_ closure;
915   W_ threadid;
916   cpu = R1;
917   closure = R2;
918
919   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
920                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
921                                 closure "ptr") [];
922   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
923
924   // switch at the earliest opportunity
925   CInt[context_switch] = 1 :: CInt;
926   
927   RET_P(threadid);
928 }
929
930 yieldzh_fast
931 {
932   jump stg_yield_noregs;
933 }
934
935 myThreadIdzh_fast
936 {
937   /* no args. */
938   RET_P(CurrentTSO);
939 }
940
941 labelThreadzh_fast
942 {
943   /* args: 
944         R1 = ThreadId#
945         R2 = Addr# */
946 #ifdef DEBUG
947   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
948 #endif
949   jump %ENTRY_CODE(Sp(0));
950 }
951
952 isCurrentThreadBoundzh_fast
953 {
954   /* no args */
955   W_ r;
956   (r) = foreign "C" isThreadBound(CurrentTSO) [];
957   RET_N(r);
958 }
959
960
961 /* -----------------------------------------------------------------------------
962  * TVar primitives
963  * -------------------------------------------------------------------------- */
964
965 #ifdef REG_R1
966 #define SP_OFF 0
967 #define IF_NOT_REG_R1(x) 
968 #else
969 #define SP_OFF 1
970 #define IF_NOT_REG_R1(x) x
971 #endif
972
973 // Catch retry frame ------------------------------------------------------------
974
975 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
976 #if defined(PROFILING)
977   W_ unused1, W_ unused2,
978 #endif
979   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
980 {
981    W_ r, frame, trec, outer;
982    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
983
984    frame = Sp;
985    trec = StgTSO_trec(CurrentTSO);
986    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
987    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
988    if (r != 0) {
989      /* Succeeded (either first branch or second branch) */
990      StgTSO_trec(CurrentTSO) = outer;
991      Sp = Sp + SIZEOF_StgCatchRetryFrame;
992      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
993      jump %ENTRY_CODE(Sp(SP_OFF));
994    } else {
995      /* Did not commit: re-execute */
996      W_ new_trec;
997      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
998      StgTSO_trec(CurrentTSO) = new_trec;
999      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1000        R1 = StgCatchRetryFrame_alt_code(frame);
1001      } else {
1002        R1 = StgCatchRetryFrame_first_code(frame);
1003      }
1004      jump stg_ap_v_fast;
1005    }
1006 }
1007
1008
1009 // Atomically frame ------------------------------------------------------------
1010
1011 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1012 #if defined(PROFILING)
1013   W_ unused1, W_ unused2,
1014 #endif
1015   "ptr" W_ unused3, "ptr" W_ unused4)
1016 {
1017   W_ frame, trec, valid, next_invariant, q, outer;
1018   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1019
1020   frame = Sp;
1021   trec = StgTSO_trec(CurrentTSO);
1022   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1023
1024   if (outer == NO_TREC) {
1025     /* First time back at the atomically frame -- pick up invariants */
1026     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1027     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1028
1029   } else {
1030     /* Second/subsequent time back at the atomically frame -- abort the
1031      * tx that's checking the invariant and move on to the next one */
1032     StgTSO_trec(CurrentTSO) = outer;
1033     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1034     StgInvariantCheckQueue_my_execution(q) = trec;
1035     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1036     /* Don't free trec -- it's linked from q and will be stashed in the
1037      * invariant if we eventually commit. */
1038     q = StgInvariantCheckQueue_next_queue_entry(q);
1039     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1040     trec = outer;
1041   }
1042
1043   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1044
1045   if (q != END_INVARIANT_CHECK_QUEUE) {
1046     /* We can't commit yet: another invariant to check */
1047     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1048     StgTSO_trec(CurrentTSO) = trec;
1049
1050     next_invariant = StgInvariantCheckQueue_invariant(q);
1051     R1 = StgAtomicInvariant_code(next_invariant);
1052     jump stg_ap_v_fast;
1053
1054   } else {
1055
1056     /* We've got no more invariants to check, try to commit */
1057     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1058     if (valid != 0) {
1059       /* Transaction was valid: commit succeeded */
1060       StgTSO_trec(CurrentTSO) = NO_TREC;
1061       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1062       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1063       jump %ENTRY_CODE(Sp(SP_OFF));
1064     } else {
1065       /* Transaction was not valid: try again */
1066       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1067       StgTSO_trec(CurrentTSO) = trec;
1068       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1069       R1 = StgAtomicallyFrame_code(frame);
1070       jump stg_ap_v_fast;
1071     }
1072   }
1073 }
1074
1075 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1076 #if defined(PROFILING)
1077   W_ unused1, W_ unused2,
1078 #endif
1079   "ptr" W_ unused3, "ptr" W_ unused4)
1080 {
1081   W_ frame, trec, valid;
1082   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1083
1084   frame = Sp;
1085
1086   /* The TSO is currently waiting: should we stop waiting? */
1087   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1088   if (valid != 0) {
1089     /* Previous attempt is still valid: no point trying again yet */
1090           IF_NOT_REG_R1(Sp_adj(-2);
1091                         Sp(1) = stg_NO_FINALIZER_closure;
1092                         Sp(0) = stg_ut_1_0_unreg_info;)
1093     jump stg_block_noregs;
1094   } else {
1095     /* Previous attempt is no longer valid: try again */
1096     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1097     StgTSO_trec(CurrentTSO) = trec;
1098     StgHeader_info(frame) = stg_atomically_frame_info;
1099     R1 = StgAtomicallyFrame_code(frame);
1100     jump stg_ap_v_fast;
1101   }
1102 }
1103
1104 // STM catch frame --------------------------------------------------------------
1105
1106 #ifdef REG_R1
1107 #define SP_OFF 0
1108 #else
1109 #define SP_OFF 1
1110 #endif
1111
1112 /* Catch frames are very similar to update frames, but when entering
1113  * one we just pop the frame off the stack and perform the correct
1114  * kind of return to the activation record underneath us on the stack.
1115  */
1116
1117 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1118 #if defined(PROFILING)
1119   W_ unused1, W_ unused2,
1120 #endif
1121   "ptr" W_ unused3, "ptr" W_ unused4)
1122    {
1123       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1124       W_ r, frame, trec, outer;
1125       frame = Sp;
1126       trec = StgTSO_trec(CurrentTSO);
1127       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1128       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1129       if (r != 0) {
1130         /* Commit succeeded */
1131         StgTSO_trec(CurrentTSO) = outer;
1132         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1133         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1134         jump Sp(SP_OFF);
1135       } else {
1136         /* Commit failed */
1137         W_ new_trec;
1138         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1139         StgTSO_trec(CurrentTSO) = new_trec;
1140         R1 = StgCatchSTMFrame_code(frame);
1141         jump stg_ap_v_fast;
1142       }
1143    }
1144
1145
1146 // Primop definition ------------------------------------------------------------
1147
1148 atomicallyzh_fast
1149 {
1150   W_ frame;
1151   W_ old_trec;
1152   W_ new_trec;
1153   
1154   // stmStartTransaction may allocate
1155   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1156
1157   /* Args: R1 = m :: STM a */
1158   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1159
1160   old_trec = StgTSO_trec(CurrentTSO);
1161
1162   /* Nested transactions are not allowed; raise an exception */
1163   if (old_trec != NO_TREC) {
1164      R1 = base_GHCziIOBase_NestedAtomically_closure;
1165      jump raisezh_fast;
1166   }
1167
1168   /* Set up the atomically frame */
1169   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1170   frame = Sp;
1171
1172   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1173   StgAtomicallyFrame_code(frame) = R1;
1174   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1175
1176   /* Start the memory transcation */
1177   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1178   StgTSO_trec(CurrentTSO) = new_trec;
1179
1180   /* Apply R1 to the realworld token */
1181   jump stg_ap_v_fast;
1182 }
1183
1184
1185 catchSTMzh_fast
1186 {
1187   W_ frame;
1188   
1189   /* Args: R1 :: STM a */
1190   /* Args: R2 :: Exception -> STM a */
1191   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1192
1193   /* Set up the catch frame */
1194   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1195   frame = Sp;
1196
1197   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1198   StgCatchSTMFrame_handler(frame) = R2;
1199   StgCatchSTMFrame_code(frame) = R1;
1200
1201   /* Start a nested transaction to run the body of the try block in */
1202   W_ cur_trec;  
1203   W_ new_trec;
1204   cur_trec = StgTSO_trec(CurrentTSO);
1205   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1206   StgTSO_trec(CurrentTSO) = new_trec;
1207
1208   /* Apply R1 to the realworld token */
1209   jump stg_ap_v_fast;
1210 }
1211
1212
1213 catchRetryzh_fast
1214 {
1215   W_ frame;
1216   W_ new_trec;
1217   W_ trec;
1218
1219   // stmStartTransaction may allocate
1220   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1221
1222   /* Args: R1 :: STM a */
1223   /* Args: R2 :: STM a */
1224   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1225
1226   /* Start a nested transaction within which to run the first code */
1227   trec = StgTSO_trec(CurrentTSO);
1228   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1229   StgTSO_trec(CurrentTSO) = new_trec;
1230
1231   /* Set up the catch-retry frame */
1232   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1233   frame = Sp;
1234   
1235   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1236   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1237   StgCatchRetryFrame_first_code(frame) = R1;
1238   StgCatchRetryFrame_alt_code(frame) = R2;
1239
1240   /* Apply R1 to the realworld token */
1241   jump stg_ap_v_fast;
1242 }
1243
1244
1245 retryzh_fast
1246 {
1247   W_ frame_type;
1248   W_ frame;
1249   W_ trec;
1250   W_ outer;
1251   W_ r;
1252
1253   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1254
1255   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1256 retry_pop_stack:
1257   StgTSO_sp(CurrentTSO) = Sp;
1258   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1259   Sp = StgTSO_sp(CurrentTSO);
1260   frame = Sp;
1261   trec = StgTSO_trec(CurrentTSO);
1262   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1263
1264   if (frame_type == CATCH_RETRY_FRAME) {
1265     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1266     ASSERT(outer != NO_TREC);
1267     // Abort the transaction attempting the current branch
1268     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1269     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1270     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1271       // Retry in the first branch: try the alternative
1272       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1273       StgTSO_trec(CurrentTSO) = trec;
1274       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1275       R1 = StgCatchRetryFrame_alt_code(frame);
1276       jump stg_ap_v_fast;
1277     } else {
1278       // Retry in the alternative code: propagate the retry
1279       StgTSO_trec(CurrentTSO) = outer;
1280       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1281       goto retry_pop_stack;
1282     }
1283   }
1284
1285   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1286   ASSERT(frame_type == ATOMICALLY_FRAME);
1287   if (outer != NO_TREC) {
1288     // We called retry while checking invariants, so abort the current
1289     // invariant check (merging its TVar accesses into the parents read
1290     // set so we'll wait on them)
1291     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1292     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1293     trec = outer;
1294     StgTSO_trec(CurrentTSO) = trec;
1295     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1296   }
1297   ASSERT(outer == NO_TREC);
1298
1299   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1300   if (r != 0) {
1301     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1302     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1303     Sp = frame;
1304     // Fix up the stack in the unregisterised case: the return convention is different.
1305     IF_NOT_REG_R1(Sp_adj(-2); 
1306                   Sp(1) = stg_NO_FINALIZER_closure;
1307                   Sp(0) = stg_ut_1_0_unreg_info;)
1308     R3 = trec; // passing to stmWaitUnblock()
1309     jump stg_block_stmwait;
1310   } else {
1311     // Transaction was not valid: retry immediately
1312     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1313     StgTSO_trec(CurrentTSO) = trec;
1314     R1 = StgAtomicallyFrame_code(frame);
1315     Sp = frame;
1316     jump stg_ap_v_fast;
1317   }
1318 }
1319
1320
1321 checkzh_fast
1322 {
1323   W_ trec, closure;
1324
1325   /* Args: R1 = invariant closure */
1326   MAYBE_GC (R1_PTR, checkzh_fast); 
1327
1328   trec = StgTSO_trec(CurrentTSO);
1329   closure = R1;
1330   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1331                                      trec "ptr",
1332                                      closure "ptr") [];
1333
1334   jump %ENTRY_CODE(Sp(0));
1335 }
1336
1337
1338 newTVarzh_fast
1339 {
1340   W_ tv;
1341   W_ new_value;
1342
1343   /* Args: R1 = initialisation value */
1344
1345   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1346   new_value = R1;
1347   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1348   RET_P(tv);
1349 }
1350
1351
1352 readTVarzh_fast
1353 {
1354   W_ trec;
1355   W_ tvar;
1356   W_ result;
1357
1358   /* Args: R1 = TVar closure */
1359
1360   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1361   trec = StgTSO_trec(CurrentTSO);
1362   tvar = R1;
1363   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1364
1365   RET_P(result);
1366 }
1367
1368
1369 writeTVarzh_fast
1370 {
1371   W_ trec;
1372   W_ tvar;
1373   W_ new_value;
1374   
1375   /* Args: R1 = TVar closure */
1376   /*       R2 = New value    */
1377
1378   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1379   trec = StgTSO_trec(CurrentTSO);
1380   tvar = R1;
1381   new_value = R2;
1382   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1383
1384   jump %ENTRY_CODE(Sp(0));
1385 }
1386
1387
1388 /* -----------------------------------------------------------------------------
1389  * MVar primitives
1390  *
1391  * take & putMVar work as follows.  Firstly, an important invariant:
1392  *
1393  *    If the MVar is full, then the blocking queue contains only
1394  *    threads blocked on putMVar, and if the MVar is empty then the
1395  *    blocking queue contains only threads blocked on takeMVar.
1396  *
1397  * takeMvar:
1398  *    MVar empty : then add ourselves to the blocking queue
1399  *    MVar full  : remove the value from the MVar, and
1400  *                 blocking queue empty     : return
1401  *                 blocking queue non-empty : perform the first blocked putMVar
1402  *                                            from the queue, and wake up the
1403  *                                            thread (MVar is now full again)
1404  *
1405  * putMVar is just the dual of the above algorithm.
1406  *
1407  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1408  * the stack of the thread waiting to do the putMVar.  See
1409  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1410  * the stack layout, and the PerformPut and PerformTake macros below.
1411  *
1412  * It is important that a blocked take or put is woken up with the
1413  * take/put already performed, because otherwise there would be a
1414  * small window of vulnerability where the thread could receive an
1415  * exception and never perform its take or put, and we'd end up with a
1416  * deadlock.
1417  *
1418  * -------------------------------------------------------------------------- */
1419
1420 isEmptyMVarzh_fast
1421 {
1422     /* args: R1 = MVar closure */
1423
1424     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1425         RET_N(1);
1426     } else {
1427         RET_N(0);
1428     }
1429 }
1430
1431 newMVarzh_fast
1432 {
1433     /* args: none */
1434     W_ mvar;
1435
1436     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1437   
1438     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1439     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1440     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1441     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1442     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1443     RET_P(mvar);
1444 }
1445
1446
1447 /* If R1 isn't available, pass it on the stack */
1448 #ifdef REG_R1
1449 #define PerformTake(tso, value)                         \
1450     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1451     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1452 #else
1453 #define PerformTake(tso, value)                                 \
1454     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1455     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1456 #endif
1457
1458 #define PerformPut(tso,lval)                    \
1459     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1460     lval = W_[StgTSO_sp(tso) - WDS(1)];
1461
1462 takeMVarzh_fast
1463 {
1464     W_ mvar, val, info, tso;
1465
1466     /* args: R1 = MVar closure */
1467     mvar = R1;
1468
1469 #if defined(THREADED_RTS)
1470     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1471 #else
1472     info = GET_INFO(mvar);
1473 #endif
1474
1475     /* If the MVar is empty, put ourselves on its blocking queue,
1476      * and wait until we're woken up.
1477      */
1478     if (info == stg_EMPTY_MVAR_info) {
1479         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1480             StgMVar_head(mvar) = CurrentTSO;
1481         } else {
1482             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1483         }
1484         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1485         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1486         StgTSO_block_info(CurrentTSO)  = mvar;
1487         StgMVar_tail(mvar) = CurrentTSO;
1488         
1489         jump stg_block_takemvar;
1490   }
1491
1492   /* we got the value... */
1493   val = StgMVar_value(mvar);
1494
1495   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1496   {
1497       /* There are putMVar(s) waiting... 
1498        * wake up the first thread on the queue
1499        */
1500       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1501
1502       /* actually perform the putMVar for the thread that we just woke up */
1503       tso = StgMVar_head(mvar);
1504       PerformPut(tso,StgMVar_value(mvar));
1505       dirtyTSO(tso);
1506
1507 #if defined(GRAN) || defined(PAR)
1508       /* ToDo: check 2nd arg (mvar) is right */
1509       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1510       StgMVar_head(mvar) = tso;
1511 #else
1512       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1513                                          StgMVar_head(mvar) "ptr") [];
1514       StgMVar_head(mvar) = tso;
1515 #endif
1516
1517       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1518           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1519       }
1520
1521 #if defined(THREADED_RTS)
1522       unlockClosure(mvar, stg_FULL_MVAR_info);
1523 #endif
1524       RET_P(val);
1525   } 
1526   else
1527   {
1528       /* No further putMVars, MVar is now empty */
1529       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1530  
1531 #if defined(THREADED_RTS)
1532       unlockClosure(mvar, stg_EMPTY_MVAR_info);
1533 #else
1534       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1535 #endif
1536
1537       RET_P(val);
1538   }
1539 }
1540
1541
1542 tryTakeMVarzh_fast
1543 {
1544     W_ mvar, val, info, tso;
1545
1546     /* args: R1 = MVar closure */
1547
1548     mvar = R1;
1549
1550 #if defined(THREADED_RTS)
1551     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1552 #else
1553     info = GET_INFO(mvar);
1554 #endif
1555
1556     if (info == stg_EMPTY_MVAR_info) {
1557 #if defined(THREADED_RTS)
1558         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1559 #endif
1560         /* HACK: we need a pointer to pass back, 
1561          * so we abuse NO_FINALIZER_closure
1562          */
1563         RET_NP(0, stg_NO_FINALIZER_closure);
1564     }
1565
1566     /* we got the value... */
1567     val = StgMVar_value(mvar);
1568
1569     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1570
1571         /* There are putMVar(s) waiting... 
1572          * wake up the first thread on the queue
1573          */
1574         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1575
1576         /* actually perform the putMVar for the thread that we just woke up */
1577         tso = StgMVar_head(mvar);
1578         PerformPut(tso,StgMVar_value(mvar));
1579         dirtyTSO(tso);
1580
1581 #if defined(GRAN) || defined(PAR)
1582         /* ToDo: check 2nd arg (mvar) is right */
1583         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1584         StgMVar_head(mvar) = tso;
1585 #else
1586         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1587                                            StgMVar_head(mvar) "ptr") [];
1588         StgMVar_head(mvar) = tso;
1589 #endif
1590
1591         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1592             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1593         }
1594 #if defined(THREADED_RTS)
1595         unlockClosure(mvar, stg_FULL_MVAR_info);
1596 #endif
1597     }
1598     else 
1599     {
1600         /* No further putMVars, MVar is now empty */
1601         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1602 #if defined(THREADED_RTS)
1603         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1604 #else
1605         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1606 #endif
1607     }
1608     
1609     RET_NP(1, val);
1610 }
1611
1612
1613 putMVarzh_fast
1614 {
1615     W_ mvar, info, tso;
1616
1617     /* args: R1 = MVar, R2 = value */
1618     mvar = R1;
1619
1620 #if defined(THREADED_RTS)
1621     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1622 #else
1623     info = GET_INFO(mvar);
1624 #endif
1625
1626     if (info == stg_FULL_MVAR_info) {
1627         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1628             StgMVar_head(mvar) = CurrentTSO;
1629         } else {
1630             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1631         }
1632         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1633         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1634         StgTSO_block_info(CurrentTSO)  = mvar;
1635         StgMVar_tail(mvar) = CurrentTSO;
1636         
1637         jump stg_block_putmvar;
1638     }
1639   
1640     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1641
1642         /* There are takeMVar(s) waiting: wake up the first one
1643          */
1644         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1645
1646         /* actually perform the takeMVar */
1647         tso = StgMVar_head(mvar);
1648         PerformTake(tso, R2);
1649         dirtyTSO(tso);
1650       
1651 #if defined(GRAN) || defined(PAR)
1652         /* ToDo: check 2nd arg (mvar) is right */
1653         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1654         StgMVar_head(mvar) = tso;
1655 #else
1656         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1657         StgMVar_head(mvar) = tso;
1658 #endif
1659
1660         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1661             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1662         }
1663
1664 #if defined(THREADED_RTS)
1665         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1666 #endif
1667         jump %ENTRY_CODE(Sp(0));
1668     }
1669     else
1670     {
1671         /* No further takes, the MVar is now full. */
1672         StgMVar_value(mvar) = R2;
1673
1674 #if defined(THREADED_RTS)
1675         unlockClosure(mvar, stg_FULL_MVAR_info);
1676 #else
1677         SET_INFO(mvar,stg_FULL_MVAR_info);
1678 #endif
1679         jump %ENTRY_CODE(Sp(0));
1680     }
1681     
1682     /* ToDo: yield afterward for better communication performance? */
1683 }
1684
1685
1686 tryPutMVarzh_fast
1687 {
1688     W_ mvar, info, tso;
1689
1690     /* args: R1 = MVar, R2 = value */
1691     mvar = R1;
1692
1693 #if defined(THREADED_RTS)
1694     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1695 #else
1696     info = GET_INFO(mvar);
1697 #endif
1698
1699     if (info == stg_FULL_MVAR_info) {
1700 #if defined(THREADED_RTS)
1701         unlockClosure(mvar, stg_FULL_MVAR_info);
1702 #endif
1703         RET_N(0);
1704     }
1705   
1706     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1707
1708         /* There are takeMVar(s) waiting: wake up the first one
1709          */
1710         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1711         
1712         /* actually perform the takeMVar */
1713         tso = StgMVar_head(mvar);
1714         PerformTake(tso, R2);
1715         dirtyTSO(tso);
1716       
1717 #if defined(GRAN) || defined(PAR)
1718         /* ToDo: check 2nd arg (mvar) is right */
1719         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1720         StgMVar_head(mvar) = tso;
1721 #else
1722         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1723         StgMVar_head(mvar) = tso;
1724 #endif
1725
1726         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1727             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1728         }
1729
1730 #if defined(THREADED_RTS)
1731         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1732 #endif
1733     }
1734     else
1735     {
1736         /* No further takes, the MVar is now full. */
1737         StgMVar_value(mvar) = R2;
1738
1739 #if defined(THREADED_RTS)
1740         unlockClosure(mvar, stg_FULL_MVAR_info);
1741 #else
1742         SET_INFO(mvar,stg_FULL_MVAR_info);
1743 #endif
1744     }
1745     
1746     RET_N(1);
1747     /* ToDo: yield afterward for better communication performance? */
1748 }
1749
1750
1751 /* -----------------------------------------------------------------------------
1752    Stable pointer primitives
1753    -------------------------------------------------------------------------  */
1754
1755 makeStableNamezh_fast
1756 {
1757     W_ index, sn_obj;
1758
1759     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1760   
1761     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1762
1763     /* Is there already a StableName for this heap object?
1764      *  stable_ptr_table is a pointer to an array of snEntry structs.
1765      */
1766     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1767         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1768         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1769         StgStableName_sn(sn_obj) = index;
1770         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1771     } else {
1772         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1773     }
1774     
1775     RET_P(sn_obj);
1776 }
1777
1778
1779 makeStablePtrzh_fast
1780 {
1781     /* Args: R1 = a */
1782     W_ sp;
1783     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1784     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1785     RET_N(sp);
1786 }
1787
1788 deRefStablePtrzh_fast
1789 {
1790     /* Args: R1 = the stable ptr */
1791     W_ r, sp;
1792     sp = R1;
1793     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1794     RET_P(r);
1795 }
1796
1797 /* -----------------------------------------------------------------------------
1798    Bytecode object primitives
1799    -------------------------------------------------------------------------  */
1800
1801 newBCOzh_fast
1802 {
1803     /* R1 = instrs
1804        R2 = literals
1805        R3 = ptrs
1806        R4 = arity
1807        R5 = bitmap array
1808     */
1809     W_ bco, bitmap_arr, bytes, words;
1810     
1811     bitmap_arr = R5;
1812
1813     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1814     bytes = WDS(words);
1815
1816     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1817
1818     bco = Hp - bytes + WDS(1);
1819     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1820     
1821     StgBCO_instrs(bco)     = R1;
1822     StgBCO_literals(bco)   = R2;
1823     StgBCO_ptrs(bco)       = R3;
1824     StgBCO_arity(bco)      = HALF_W_(R4);
1825     StgBCO_size(bco)       = HALF_W_(words);
1826     
1827     // Copy the arity/bitmap info into the BCO
1828     W_ i;
1829     i = 0;
1830 for:
1831     if (i < StgArrWords_words(bitmap_arr)) {
1832         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1833         i = i + 1;
1834         goto for;
1835     }
1836     
1837     RET_P(bco);
1838 }
1839
1840
1841 mkApUpd0zh_fast
1842 {
1843     // R1 = the BCO# for the AP
1844     //  
1845     W_ ap;
1846
1847     // This function is *only* used to wrap zero-arity BCOs in an
1848     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1849     // saturated and always points directly to a FUN or BCO.
1850     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1851            StgBCO_arity(R1) == HALF_W_(0));
1852
1853     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1854     TICK_ALLOC_UP_THK(0, 0);
1855     CCCS_ALLOC(SIZEOF_StgAP);
1856
1857     ap = Hp - SIZEOF_StgAP + WDS(1);
1858     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1859     
1860     StgAP_n_args(ap) = HALF_W_(0);
1861     StgAP_fun(ap) = R1;
1862     
1863     RET_P(ap);
1864 }
1865
1866 unpackClosurezh_fast
1867 {
1868 /* args: R1 = closure to analyze */
1869 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1870
1871     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1872     info  = %GET_STD_INFO(R1);
1873
1874     // Some closures have non-standard layout, so we omit those here.
1875     W_ type;
1876     type = TO_W_(%INFO_TYPE(info));
1877     switch [0 .. N_CLOSURE_TYPES] type {
1878     case THUNK_SELECTOR : {
1879         ptrs = 1;
1880         nptrs = 0;
1881         goto out;
1882     }
1883     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1884          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1885         ptrs = 0;
1886         nptrs = 0;
1887         goto out;
1888     }
1889     default: {
1890         ptrs  = TO_W_(%INFO_PTRS(info)); 
1891         nptrs = TO_W_(%INFO_NPTRS(info));
1892         goto out;
1893     }}
1894 out:
1895
1896     W_ ptrs_arr_sz, nptrs_arr_sz;
1897     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1898     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1899
1900     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1901
1902     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1903     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1904
1905     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1906     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1907     p = 0;
1908 for:
1909     if(p < ptrs) {
1910          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
1911          p = p + 1;
1912          goto for;
1913     }
1914     
1915     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1916     StgArrWords_words(nptrs_arr) = nptrs;
1917     p = 0;
1918 for2:
1919     if(p < nptrs) {
1920          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
1921          p = p + 1;
1922          goto for2;
1923     }
1924     RET_NPP(info, ptrs_arr, nptrs_arr);
1925 }
1926
1927 /* -----------------------------------------------------------------------------
1928    Thread I/O blocking primitives
1929    -------------------------------------------------------------------------- */
1930
1931 /* Add a thread to the end of the blocked queue. (C-- version of the C
1932  * macro in Schedule.h).
1933  */
1934 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1935     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1936     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1937       W_[blocked_queue_hd] = tso;                       \
1938     } else {                                            \
1939       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1940     }                                                   \
1941     W_[blocked_queue_tl] = tso;
1942
1943 waitReadzh_fast
1944 {
1945     /* args: R1 */
1946 #ifdef THREADED_RTS
1947     foreign "C" barf("waitRead# on threaded RTS");
1948 #else
1949
1950     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1951     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1952     StgTSO_block_info(CurrentTSO) = R1;
1953     // No locking - we're not going to use this interface in the
1954     // threaded RTS anyway.
1955     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1956     jump stg_block_noregs;
1957 #endif
1958 }
1959
1960 waitWritezh_fast
1961 {
1962     /* args: R1 */
1963 #ifdef THREADED_RTS
1964     foreign "C" barf("waitWrite# on threaded RTS");
1965 #else
1966
1967     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1968     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1969     StgTSO_block_info(CurrentTSO) = R1;
1970     // No locking - we're not going to use this interface in the
1971     // threaded RTS anyway.
1972     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1973     jump stg_block_noregs;
1974 #endif
1975 }
1976
1977
1978 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1979 delayzh_fast
1980 {
1981 #ifdef mingw32_HOST_OS
1982     W_ ares;
1983     CInt reqID;
1984 #else
1985     W_ t, prev, target;
1986 #endif
1987
1988 #ifdef THREADED_RTS
1989     foreign "C" barf("delay# on threaded RTS");
1990 #else
1991
1992     /* args: R1 (microsecond delay amount) */
1993     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1994     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1995
1996 #ifdef mingw32_HOST_OS
1997
1998     /* could probably allocate this on the heap instead */
1999     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2000                                             stg_delayzh_malloc_str);
2001     reqID = foreign "C" addDelayRequest(R1);
2002     StgAsyncIOResult_reqID(ares)   = reqID;
2003     StgAsyncIOResult_len(ares)     = 0;
2004     StgAsyncIOResult_errCode(ares) = 0;
2005     StgTSO_block_info(CurrentTSO)  = ares;
2006
2007     /* Having all async-blocked threads reside on the blocked_queue
2008      * simplifies matters, so change the status to OnDoProc put the
2009      * delayed thread on the blocked_queue.
2010      */
2011     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2012     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2013     jump stg_block_async_void;
2014
2015 #else
2016
2017     W_ time;
2018     W_ divisor;
2019     (time) = foreign "C" getourtimeofday() [R1];
2020     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2021     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2022            + time + 1; /* Add 1 as getourtimeofday rounds down */
2023     StgTSO_block_info(CurrentTSO) = target;
2024
2025     /* Insert the new thread in the sleeping queue. */
2026     prev = NULL;
2027     t = W_[sleeping_queue];
2028 while:
2029     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2030         prev = t;
2031         t = StgTSO_link(t);
2032         goto while;
2033     }
2034
2035     StgTSO_link(CurrentTSO) = t;
2036     if (prev == NULL) {
2037         W_[sleeping_queue] = CurrentTSO;
2038     } else {
2039         StgTSO_link(prev) = CurrentTSO;
2040     }
2041     jump stg_block_noregs;
2042 #endif
2043 #endif /* !THREADED_RTS */
2044 }
2045
2046
2047 #ifdef mingw32_HOST_OS
2048 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2049 asyncReadzh_fast
2050 {
2051     W_ ares;
2052     CInt reqID;
2053
2054 #ifdef THREADED_RTS
2055     foreign "C" barf("asyncRead# on threaded RTS");
2056 #else
2057
2058     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2059     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2060     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2061
2062     /* could probably allocate this on the heap instead */
2063     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2064                                             stg_asyncReadzh_malloc_str)
2065                         [R1,R2,R3,R4];
2066     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2067     StgAsyncIOResult_reqID(ares)   = reqID;
2068     StgAsyncIOResult_len(ares)     = 0;
2069     StgAsyncIOResult_errCode(ares) = 0;
2070     StgTSO_block_info(CurrentTSO)  = ares;
2071     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2072     jump stg_block_async;
2073 #endif
2074 }
2075
2076 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2077 asyncWritezh_fast
2078 {
2079     W_ ares;
2080     CInt reqID;
2081
2082 #ifdef THREADED_RTS
2083     foreign "C" barf("asyncWrite# on threaded RTS");
2084 #else
2085
2086     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2087     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2088     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2089
2090     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2091                                             stg_asyncWritezh_malloc_str)
2092                         [R1,R2,R3,R4];
2093     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2094
2095     StgAsyncIOResult_reqID(ares)   = reqID;
2096     StgAsyncIOResult_len(ares)     = 0;
2097     StgAsyncIOResult_errCode(ares) = 0;
2098     StgTSO_block_info(CurrentTSO)  = ares;
2099     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2100     jump stg_block_async;
2101 #endif
2102 }
2103
2104 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2105 asyncDoProczh_fast
2106 {
2107     W_ ares;
2108     CInt reqID;
2109
2110 #ifdef THREADED_RTS
2111     foreign "C" barf("asyncDoProc# on threaded RTS");
2112 #else
2113
2114     /* args: R1 = proc, R2 = param */
2115     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2116     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2117
2118     /* could probably allocate this on the heap instead */
2119     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2120                                             stg_asyncDoProczh_malloc_str) 
2121                                 [R1,R2];
2122     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2123     StgAsyncIOResult_reqID(ares)   = reqID;
2124     StgAsyncIOResult_len(ares)     = 0;
2125     StgAsyncIOResult_errCode(ares) = 0;
2126     StgTSO_block_info(CurrentTSO) = ares;
2127     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2128     jump stg_block_async;
2129 #endif
2130 }
2131 #endif
2132
2133 // noDuplicate# tries to ensure that none of the thunks under
2134 // evaluation by the current thread are also under evaluation by
2135 // another thread.  It relies on *both* threads doing noDuplicate#;
2136 // the second one will get blocked if they are duplicating some work.
2137 noDuplicatezh_fast
2138 {
2139     SAVE_THREAD_STATE();
2140     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2141     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2142     
2143     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2144         jump stg_threadFinished;
2145     } else {
2146         LOAD_THREAD_STATE();
2147         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2148         jump %ENTRY_CODE(Sp(0));
2149     }
2150 }
2151
2152 getApStackValzh_fast
2153 {
2154    W_ ap_stack, offset, val, ok;
2155
2156    /* args: R1 = AP_STACK, R2 = offset */
2157    ap_stack = R1;
2158    offset   = R2;
2159
2160    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2161         ok = 1;
2162         val = StgAP_STACK_payload(ap_stack,offset); 
2163    } else {
2164         ok = 0;
2165         val = R1;
2166    }
2167    RET_NP(ok,val);
2168 }