newPinnedByteArray#: align the result to 16-bytes (part of #2917)
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 newPinnedByteArrayzh_fast
87 {
88     W_ words, payload_words, n, p;
89
90     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
91     n = R1;
92     payload_words = ROUNDUP_BYTES_TO_WDS(n);
93
94     // We want an 8-byte aligned array.  allocatePinned() gives us
95     // 8-byte aligned memory by default, but we want to align the
96     // *goods* inside the ArrWords object, so we have to check the
97     // size of the ArrWords header and adjust our size accordingly.
98     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
99     if ((SIZEOF_StgArrWords & 7) != 0) {
100         words = words + 1;
101     }
102
103     ("ptr" p) = foreign "C" allocatePinned(words) [];
104     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
105
106     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
107     // have to push the object forward one word so that the goods
108     // fall on an 8-byte boundary.
109     if ((SIZEOF_StgArrWords & 7) != 0) {
110         p = p + WDS(1);
111     }
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newArrayzh_fast
119 {
120     W_ words, n, init, arr, p;
121     /* Args: R1 = words, R2 = initialisation value */
122
123     n = R1;
124     MAYBE_GC(R2_PTR,newArrayzh_fast);
125
126     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
127     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
128     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
129
130     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
131     StgMutArrPtrs_ptrs(arr) = n;
132
133     // Initialise all elements of the the array with the value in R2
134     init = R2;
135     p = arr + SIZEOF_StgMutArrPtrs;
136   for:
137     if (p < arr + WDS(words)) {
138         W_[p] = init;
139         p = p + WDS(1);
140         goto for;
141     }
142
143     RET_P(arr);
144 }
145
146 unsafeThawArrayzh_fast
147 {
148   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
149   //
150   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
151   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
152   // it on the mutable list for the GC to remove (removing something from
153   // the mutable list is not easy, because the mut_list is only singly-linked).
154   // 
155   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
156   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
157   // to indicate that it is still on the mutable list.
158   //
159   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
160   // either it is on a mut_list, or it isn't.  We adopt the convention that
161   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
162   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
163   // we put it on the mutable list more than once, but it would get scavenged
164   // multiple times during GC, which would be unnecessarily slow.
165   //
166   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
167         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
168         recordMutable(R1, R1);
169         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
170         RET_P(R1);
171   } else {
172         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
173         RET_P(R1);
174   }
175 }
176
177 /* -----------------------------------------------------------------------------
178    MutVar primitives
179    -------------------------------------------------------------------------- */
180
181 newMutVarzh_fast
182 {
183     W_ mv;
184     /* Args: R1 = initialisation value */
185
186     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
187
188     mv = Hp - SIZEOF_StgMutVar + WDS(1);
189     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
190     StgMutVar_var(mv) = R1;
191     
192     RET_P(mv);
193 }
194
195 atomicModifyMutVarzh_fast
196 {
197     W_ mv, f, z, x, y, r, h;
198     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
199
200     /* If x is the current contents of the MutVar#, then 
201        We want to make the new contents point to
202
203          (sel_0 (f x))
204  
205        and the return value is
206          
207          (sel_1 (f x))
208
209         obviously we can share (f x).
210
211          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
212          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
213          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
214     */
215
216 #if MIN_UPD_SIZE > 1
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
219 #else
220 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
221 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
222 #endif
223
224 #if MIN_UPD_SIZE > 2
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
227 #else
228 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
229 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
230 #endif
231
232 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
233
234    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
235
236    mv = R1;
237    f = R2;
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = f;
245
246    TICK_ALLOC_THUNK_1();
247    CCCS_ALLOC(THUNK_1_SIZE);
248    y = z - THUNK_1_SIZE;
249    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
250    LDV_RECORD_CREATE(y);
251    StgThunk_payload(y,0) = z;
252
253    TICK_ALLOC_THUNK_1();
254    CCCS_ALLOC(THUNK_1_SIZE);
255    r = y - THUNK_1_SIZE;
256    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
257    LDV_RECORD_CREATE(r);
258    StgThunk_payload(r,0) = z;
259
260  retry:
261    x = StgMutVar_var(mv);
262    StgThunk_payload(z,1) = x;
263 #ifdef THREADED_RTS
264    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
265    if (h != x) { goto retry; }
266 #else
267    StgMutVar_var(mv) = y;
268 #endif
269
270    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
271      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
272    }
273
274    RET_P(r);
275 }
276
277 /* -----------------------------------------------------------------------------
278    Weak Pointer Primitives
279    -------------------------------------------------------------------------- */
280
281 STRING(stg_weak_msg,"New weak pointer at %p\n")
282
283 mkWeakzh_fast
284 {
285   /* R1 = key
286      R2 = value
287      R3 = finalizer (or NULL)
288   */
289   W_ w;
290
291   if (R3 == NULL) {
292     R3 = stg_NO_FINALIZER_closure;
293   }
294
295   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
296
297   w = Hp - SIZEOF_StgWeak + WDS(1);
298   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
299
300   // We don't care about cfinalizer here.
301   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
302   // something else?
303
304   StgWeak_key(w)        = R1;
305   StgWeak_value(w)      = R2;
306   StgWeak_finalizer(w)  = R3;
307   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
308
309   StgWeak_link(w)       = W_[weak_ptr_list];
310   W_[weak_ptr_list]     = w;
311
312   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
313
314   RET_P(w);
315 }
316
317 mkWeakForeignEnvzh_fast
318 {
319   /* R1 = key
320      R2 = value
321      R3 = finalizer
322      R4 = pointer
323      R5 = has environment (0 or 1)
324      R6 = environment
325   */
326   W_ w, payload_words, words, p;
327
328   W_ key, val, fptr, ptr, flag, eptr;
329
330   key  = R1;
331   val  = R2;
332   fptr = R3;
333   ptr  = R4;
334   flag = R5;
335   eptr = R6;
336
337   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
338
339   w = Hp - SIZEOF_StgWeak + WDS(1);
340   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
341
342   payload_words = 4;
343   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
344   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
345
346   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
347   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
348
349   StgArrWords_words(p)     = payload_words;
350   StgArrWords_payload(p,0) = fptr;
351   StgArrWords_payload(p,1) = ptr;
352   StgArrWords_payload(p,2) = eptr;
353   StgArrWords_payload(p,3) = flag;
354
355   // We don't care about the value here.
356   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
357
358   StgWeak_key(w)        = key;
359   StgWeak_value(w)      = val;
360   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
361   StgWeak_cfinalizer(w) = p;
362
363   StgWeak_link(w)   = W_[weak_ptr_list];
364   W_[weak_ptr_list] = w;
365
366   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
367
368   RET_P(w);
369 }
370
371 finalizzeWeakzh_fast
372 {
373   /* R1 = weak ptr
374    */
375   W_ w, f, arr;
376
377   w = R1;
378
379   // already dead?
380   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
381       RET_NP(0,stg_NO_FINALIZER_closure);
382   }
383
384   // kill it
385 #ifdef PROFILING
386   // @LDV profiling
387   // A weak pointer is inherently used, so we do not need to call
388   // LDV_recordDead_FILL_SLOP_DYNAMIC():
389   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
390   // or, LDV_recordDead():
391   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
392   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
393   // large as weak pointers, so there is no need to fill the slop, either.
394   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
395 #endif
396
397   //
398   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
399   //
400   SET_INFO(w,stg_DEAD_WEAK_info);
401   LDV_RECORD_CREATE(w);
402
403   f   = StgWeak_finalizer(w);
404   arr = StgWeak_cfinalizer(w);
405
406   StgDeadWeak_link(w) = StgWeak_link(w);
407
408   if (arr != stg_NO_FINALIZER_closure) {
409     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
410                               StgArrWords_payload(arr,1),
411                               StgArrWords_payload(arr,2),
412                               StgArrWords_payload(arr,3)) [];
413   }
414
415   /* return the finalizer */
416   if (f == stg_NO_FINALIZER_closure) {
417       RET_NP(0,stg_NO_FINALIZER_closure);
418   } else {
419       RET_NP(1,f);
420   }
421 }
422
423 deRefWeakzh_fast
424 {
425   /* R1 = weak ptr */
426   W_ w, code, val;
427
428   w = R1;
429   if (GET_INFO(w) == stg_WEAK_info) {
430     code = 1;
431     val = StgWeak_value(w);
432   } else {
433     code = 0;
434     val = w;
435   }
436   RET_NP(code,val);
437 }
438
439 /* -----------------------------------------------------------------------------
440    Arbitrary-precision Integer operations.
441
442    There are some assumptions in this code that mp_limb_t == W_.  This is
443    the case for all the platforms that GHC supports, currently.
444    -------------------------------------------------------------------------- */
445
446 int2Integerzh_fast
447 {
448    /* arguments: R1 = Int# */
449
450    W_ val, s, p;        /* to avoid aliasing */
451
452    val = R1;
453    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
454
455    p = Hp - SIZEOF_StgArrWords;
456    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
457    StgArrWords_words(p) = 1;
458
459    /* mpz_set_si is inlined here, makes things simpler */
460    if (%lt(val,0)) { 
461         s  = -1;
462         Hp(0) = -val;
463    } else { 
464      if (%gt(val,0)) {
465         s = 1;
466         Hp(0) = val;
467      } else {
468         s = 0;
469      }
470   }
471
472    /* returns (# size  :: Int#, 
473                  data  :: ByteArray# 
474                #)
475    */
476    RET_NP(s,p);
477 }
478
479 word2Integerzh_fast
480 {
481    /* arguments: R1 = Word# */
482
483    W_ val, s, p;        /* to avoid aliasing */
484
485    val = R1;
486
487    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
488
489    p = Hp - SIZEOF_StgArrWords;
490    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
491    StgArrWords_words(p) = 1;
492
493    if (val != 0) {
494         s = 1;
495         W_[Hp] = val;
496    } else {
497         s = 0;
498    }
499
500    /* returns (# size  :: Int#, 
501                  data  :: ByteArray# #)
502    */
503    RET_NP(s,p);
504 }
505
506
507 /*
508  * 'long long' primops for converting to/from Integers.
509  */
510
511 #ifdef SUPPORT_LONG_LONGS
512
513 int64ToIntegerzh_fast
514 {
515    /* arguments: L1 = Int64# */
516
517    L_ val;
518    W_ hi, lo, s, neg, words_needed, p;
519
520    val = L1;
521    neg = 0;
522
523    hi = TO_W_(val >> 32);
524    lo = TO_W_(val);
525
526    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
527        // minimum is one word
528        words_needed = 1;
529    } else { 
530        words_needed = 2;
531    }
532
533    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
534                NO_PTRS, int64ToIntegerzh_fast );
535
536    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
537    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
538    StgArrWords_words(p) = words_needed;
539
540    if ( %lt(hi,0) ) {
541      neg = 1;
542      lo = -lo;
543      if(lo == 0) {
544        hi = -hi;
545      } else {
546        hi = -hi - 1;
547      }
548    }
549
550    if ( words_needed == 2 )  { 
551       s = 2;
552       Hp(-1) = lo;
553       Hp(0) = hi;
554    } else { 
555        if ( lo != 0 ) {
556            s = 1;
557            Hp(0) = lo;
558        } else /* val==0 */  {
559            s = 0;
560        }
561    }
562    if ( neg != 0 ) {
563         s = -s;
564    }
565
566    /* returns (# size  :: Int#, 
567                  data  :: ByteArray# #)
568    */
569    RET_NP(s,p);
570 }
571 word64ToIntegerzh_fast
572 {
573    /* arguments: L1 = Word64# */
574
575    L_ val;
576    W_ hi, lo, s, words_needed, p;
577
578    val = L1;
579    hi = TO_W_(val >> 32);
580    lo = TO_W_(val);
581
582    if ( hi != 0 ) {
583       words_needed = 2;
584    } else {
585       words_needed = 1;
586    }
587
588    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
589                NO_PTRS, word64ToIntegerzh_fast );
590
591    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
592    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
593    StgArrWords_words(p) = words_needed;
594
595    if ( hi != 0 ) { 
596      s = 2;
597      Hp(-1) = lo;
598      Hp(0)  = hi;
599    } else {
600       if ( lo != 0 ) {
601         s = 1;
602         Hp(0) = lo;
603      } else /* val==0 */  {
604       s = 0;
605      }
606   }
607
608    /* returns (# size  :: Int#, 
609                  data  :: ByteArray# #)
610    */
611    RET_NP(s,p);
612 }
613
614
615
616 #endif /* SUPPORT_LONG_LONGS */
617
618 /* ToDo: this is shockingly inefficient */
619
620 #ifndef THREADED_RTS
621 section "bss" {
622   mp_tmp1:
623     bits8 [SIZEOF_MP_INT];
624 }
625
626 section "bss" {
627   mp_tmp2:
628     bits8 [SIZEOF_MP_INT];
629 }
630
631 section "bss" {
632   mp_result1:
633     bits8 [SIZEOF_MP_INT];
634 }
635
636 section "bss" {
637   mp_result2:
638     bits8 [SIZEOF_MP_INT];
639 }
640 #endif
641
642 #ifdef THREADED_RTS
643 #define FETCH_MP_TEMP(X) \
644 W_ X; \
645 X = BaseReg + (OFFSET_StgRegTable_r ## X);
646 #else
647 #define FETCH_MP_TEMP(X) /* Nothing */
648 #endif
649
650 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
651 name                                                                    \
652 {                                                                       \
653   CInt s1, s2;                                                          \
654   W_ d1, d2;                                                            \
655   FETCH_MP_TEMP(mp_tmp1);                                               \
656   FETCH_MP_TEMP(mp_tmp2);                                               \
657   FETCH_MP_TEMP(mp_result1)                                             \
658   FETCH_MP_TEMP(mp_result2);                                            \
659                                                                         \
660   /* call doYouWantToGC() */                                            \
661   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
662                                                                         \
663   s1 = W_TO_INT(R1);                                                    \
664   d1 = R2;                                                              \
665   s2 = W_TO_INT(R3);                                                    \
666   d2 = R4;                                                              \
667                                                                         \
668   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
669   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
670   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
671   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
672   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
673   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
674                                                                         \
675   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
676                                                                         \
677   /* Perform the operation */                                           \
678   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
679                                                                         \
680   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
681          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
682 }
683
684 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
685 name                                                                    \
686 {                                                                       \
687   CInt s1;                                                              \
688   W_ d1;                                                                \
689   FETCH_MP_TEMP(mp_tmp1);                                               \
690   FETCH_MP_TEMP(mp_result1)                                             \
691                                                                         \
692   /* call doYouWantToGC() */                                            \
693   MAYBE_GC(R2_PTR, name);                                               \
694                                                                         \
695   d1 = R2;                                                              \
696   s1 = W_TO_INT(R1);                                                    \
697                                                                         \
698   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
699   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
700   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
701                                                                         \
702   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
703                                                                         \
704   /* Perform the operation */                                           \
705   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
706                                                                         \
707   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
708          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
709 }
710
711 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
712 name                                                                                    \
713 {                                                                                       \
714   CInt s1, s2;                                                                          \
715   W_ d1, d2;                                                                            \
716   FETCH_MP_TEMP(mp_tmp1);                                                               \
717   FETCH_MP_TEMP(mp_tmp2);                                                               \
718   FETCH_MP_TEMP(mp_result1)                                                             \
719   FETCH_MP_TEMP(mp_result2)                                                             \
720                                                                                         \
721   /* call doYouWantToGC() */                                                            \
722   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
723                                                                                         \
724   s1 = W_TO_INT(R1);                                                                    \
725   d1 = R2;                                                                              \
726   s2 = W_TO_INT(R3);                                                                    \
727   d2 = R4;                                                                              \
728                                                                                         \
729   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
730   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
731   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
732   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
733   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
734   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
735                                                                                         \
736   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
737   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
738                                                                                         \
739   /* Perform the operation */                                                           \
740   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
741                                                                                         \
742   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
743            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
744            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
745            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
746 }
747
748 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
749 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
750 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
751 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
752 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
753 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
754 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
755 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
756 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
757 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
758 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
759
760 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
761 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
762
763 #ifndef THREADED_RTS
764 section "bss" {
765   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
766 }
767 #endif
768
769 gcdIntzh_fast
770 {
771     /* R1 = the first Int#; R2 = the second Int# */
772     W_ r; 
773     FETCH_MP_TEMP(mp_tmp_w);
774
775     W_[mp_tmp_w] = R1;
776     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
777
778     R1 = r;
779     /* Result parked in R1, return via info-pointer at TOS */
780     jump %ENTRY_CODE(Sp(0));
781 }
782
783
784 gcdIntegerIntzh_fast
785 {
786     /* R1 = s1; R2 = d1; R3 = the int */
787     W_ s1;
788     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
789     R1 = s1;
790     
791     /* Result parked in R1, return via info-pointer at TOS */
792     jump %ENTRY_CODE(Sp(0));
793 }
794
795
796 cmpIntegerIntzh_fast
797 {
798     /* R1 = s1; R2 = d1; R3 = the int */
799     W_ usize, vsize, v_digit, u_digit;
800
801     usize = R1;
802     vsize = 0;
803     v_digit = R3;
804
805     // paraphrased from __gmpz_cmp_si() in the GMP sources
806     if (%gt(v_digit,0)) {
807         vsize = 1;
808     } else { 
809         if (%lt(v_digit,0)) {
810             vsize = -1;
811             v_digit = -v_digit;
812         }
813     }
814
815     if (usize != vsize) {
816         R1 = usize - vsize; 
817         jump %ENTRY_CODE(Sp(0));
818     }
819
820     if (usize == 0) {
821         R1 = 0; 
822         jump %ENTRY_CODE(Sp(0));
823     }
824
825     u_digit = W_[BYTE_ARR_CTS(R2)];
826
827     if (u_digit == v_digit) {
828         R1 = 0; 
829         jump %ENTRY_CODE(Sp(0));
830     }
831
832     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
833         R1 = usize; 
834     } else {
835         R1 = -usize; 
836     }
837
838     jump %ENTRY_CODE(Sp(0));
839 }
840
841 cmpIntegerzh_fast
842 {
843     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
844     W_ usize, vsize, size, up, vp;
845     CInt cmp;
846
847     // paraphrased from __gmpz_cmp() in the GMP sources
848     usize = R1;
849     vsize = R3;
850
851     if (usize != vsize) {
852         R1 = usize - vsize; 
853         jump %ENTRY_CODE(Sp(0));
854     }
855
856     if (usize == 0) {
857         R1 = 0; 
858         jump %ENTRY_CODE(Sp(0));
859     }
860
861     if (%lt(usize,0)) { // NB. not <, which is unsigned
862         size = -usize;
863     } else {
864         size = usize;
865     }
866
867     up = BYTE_ARR_CTS(R2);
868     vp = BYTE_ARR_CTS(R4);
869
870     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
871
872     if (cmp == 0 :: CInt) {
873         R1 = 0; 
874         jump %ENTRY_CODE(Sp(0));
875     }
876
877     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
878         R1 = 1;
879     } else {
880         R1 = (-1); 
881     }
882     /* Result parked in R1, return via info-pointer at TOS */
883     jump %ENTRY_CODE(Sp(0));
884 }
885
886 integer2Intzh_fast
887 {
888     /* R1 = s; R2 = d */
889     W_ r, s;
890
891     s = R1;
892     if (s == 0) {
893         r = 0;
894     } else {
895         r = W_[R2 + SIZEOF_StgArrWords];
896         if (%lt(s,0)) {
897             r = -r;
898         }
899     }
900     /* Result parked in R1, return via info-pointer at TOS */
901     R1 = r;
902     jump %ENTRY_CODE(Sp(0));
903 }
904
905 integer2Wordzh_fast
906 {
907   /* R1 = s; R2 = d */
908   W_ r, s;
909
910   s = R1;
911   if (s == 0) {
912     r = 0;
913   } else {
914     r = W_[R2 + SIZEOF_StgArrWords];
915     if (%lt(s,0)) {
916         r = -r;
917     }
918   }
919   /* Result parked in R1, return via info-pointer at TOS */
920   R1 = r;
921   jump %ENTRY_CODE(Sp(0));
922 }
923
924 decodeFloatzh_fast
925
926     W_ p;
927     F_ arg;
928     FETCH_MP_TEMP(mp_tmp1);
929     FETCH_MP_TEMP(mp_tmp_w);
930     
931     /* arguments: F1 = Float# */
932     arg = F1;
933     
934     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
935     
936     /* Be prepared to tell Lennart-coded __decodeFloat
937        where mantissa._mp_d can be put (it does not care about the rest) */
938     p = Hp - SIZEOF_StgArrWords;
939     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
940     StgArrWords_words(p) = 1;
941     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
942     
943     /* Perform the operation */
944     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
945     
946     /* returns: (Int# (expn), Int#, ByteArray#) */
947     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
948 }
949
950 decodeFloatzuIntzh_fast
951
952     W_ p;
953     F_ arg;
954     FETCH_MP_TEMP(mp_tmp1);
955     FETCH_MP_TEMP(mp_tmp_w);
956     
957     /* arguments: F1 = Float# */
958     arg = F1;
959     
960     /* Perform the operation */
961     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
962     
963     /* returns: (Int# (mantissa), Int# (exponent)) */
964     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
965 }
966
967 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
968 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
969
970 decodeDoublezh_fast
971
972     D_ arg;
973     W_ p;
974     FETCH_MP_TEMP(mp_tmp1);
975     FETCH_MP_TEMP(mp_tmp_w);
976
977     /* arguments: D1 = Double# */
978     arg = D1;
979
980     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
981     
982     /* Be prepared to tell Lennart-coded __decodeDouble
983        where mantissa.d can be put (it does not care about the rest) */
984     p = Hp - ARR_SIZE + WDS(1);
985     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
986     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
987     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
988
989     /* Perform the operation */
990     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
991     
992     /* returns: (Int# (expn), Int#, ByteArray#) */
993     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
994 }
995
996 decodeDoublezu2Intzh_fast
997
998     D_ arg;
999     W_ p;
1000     FETCH_MP_TEMP(mp_tmp1);
1001     FETCH_MP_TEMP(mp_tmp2);
1002     FETCH_MP_TEMP(mp_result1);
1003     FETCH_MP_TEMP(mp_result2);
1004
1005     /* arguments: D1 = Double# */
1006     arg = D1;
1007
1008     /* Perform the operation */
1009     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1010                                     mp_result1 "ptr", mp_result2 "ptr",
1011                                     arg) [];
1012
1013     /* returns:
1014        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1015     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1016 }
1017
1018 /* -----------------------------------------------------------------------------
1019  * Concurrency primitives
1020  * -------------------------------------------------------------------------- */
1021
1022 forkzh_fast
1023 {
1024   /* args: R1 = closure to spark */
1025
1026   MAYBE_GC(R1_PTR, forkzh_fast);
1027
1028   W_ closure;
1029   W_ threadid;
1030   closure = R1;
1031
1032   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1033                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1034                                 closure "ptr") [];
1035
1036   /* start blocked if the current thread is blocked */
1037   StgTSO_flags(threadid) = 
1038      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1039                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1040
1041   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1042
1043   // switch at the earliest opportunity
1044   Capability_context_switch(MyCapability()) = 1 :: CInt;
1045   
1046   RET_P(threadid);
1047 }
1048
1049 forkOnzh_fast
1050 {
1051   /* args: R1 = cpu, R2 = closure to spark */
1052
1053   MAYBE_GC(R2_PTR, forkOnzh_fast);
1054
1055   W_ cpu;
1056   W_ closure;
1057   W_ threadid;
1058   cpu = R1;
1059   closure = R2;
1060
1061   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1062                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1063                                 closure "ptr") [];
1064
1065   /* start blocked if the current thread is blocked */
1066   StgTSO_flags(threadid) = 
1067      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1068                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1069
1070   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1071
1072   // switch at the earliest opportunity
1073   Capability_context_switch(MyCapability()) = 1 :: CInt;
1074   
1075   RET_P(threadid);
1076 }
1077
1078 yieldzh_fast
1079 {
1080   jump stg_yield_noregs;
1081 }
1082
1083 myThreadIdzh_fast
1084 {
1085   /* no args. */
1086   RET_P(CurrentTSO);
1087 }
1088
1089 labelThreadzh_fast
1090 {
1091   /* args: 
1092         R1 = ThreadId#
1093         R2 = Addr# */
1094 #ifdef DEBUG
1095   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1096 #endif
1097   jump %ENTRY_CODE(Sp(0));
1098 }
1099
1100 isCurrentThreadBoundzh_fast
1101 {
1102   /* no args */
1103   W_ r;
1104   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1105   RET_N(r);
1106 }
1107
1108 threadStatuszh_fast
1109 {
1110     /* args: R1 :: ThreadId# */
1111     W_ tso;
1112     W_ why_blocked;
1113     W_ what_next;
1114     W_ ret;
1115
1116     tso = R1;
1117     loop:
1118       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1119           tso = StgTSO__link(tso);
1120           goto loop;
1121       }
1122
1123     what_next   = TO_W_(StgTSO_what_next(tso));
1124     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1125     // Note: these two reads are not atomic, so they might end up
1126     // being inconsistent.  It doesn't matter, since we
1127     // only return one or the other.  If we wanted to return the
1128     // contents of block_info too, then we'd have to do some synchronisation.
1129
1130     if (what_next == ThreadComplete) {
1131         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1132     } else {
1133         if (what_next == ThreadKilled) {
1134             ret = 17;
1135         } else {
1136             ret = why_blocked;
1137         }
1138     }
1139     RET_N(ret);
1140 }
1141
1142 /* -----------------------------------------------------------------------------
1143  * TVar primitives
1144  * -------------------------------------------------------------------------- */
1145
1146 #define SP_OFF 0
1147
1148 // Catch retry frame ------------------------------------------------------------
1149
1150 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1151 #if defined(PROFILING)
1152   W_ unused1, W_ unused2,
1153 #endif
1154   W_ unused3, P_ unused4, P_ unused5)
1155 {
1156    W_ r, frame, trec, outer;
1157
1158    frame = Sp;
1159    trec = StgTSO_trec(CurrentTSO);
1160    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1161    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1162    if (r != 0) {
1163      /* Succeeded (either first branch or second branch) */
1164      StgTSO_trec(CurrentTSO) = outer;
1165      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1166      jump %ENTRY_CODE(Sp(SP_OFF));
1167    } else {
1168      /* Did not commit: re-execute */
1169      W_ new_trec;
1170      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1171      StgTSO_trec(CurrentTSO) = new_trec;
1172      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1173        R1 = StgCatchRetryFrame_alt_code(frame);
1174      } else {
1175        R1 = StgCatchRetryFrame_first_code(frame);
1176      }
1177      jump stg_ap_v_fast;
1178    }
1179 }
1180
1181
1182 // Atomically frame ------------------------------------------------------------
1183
1184 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1185 #if defined(PROFILING)
1186   W_ unused1, W_ unused2,
1187 #endif
1188   P_ unused3, P_ unused4)
1189 {
1190   W_ frame, trec, valid, next_invariant, q, outer;
1191
1192   frame = Sp;
1193   trec = StgTSO_trec(CurrentTSO);
1194   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1195
1196   if (outer == NO_TREC) {
1197     /* First time back at the atomically frame -- pick up invariants */
1198     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1199     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1200
1201   } else {
1202     /* Second/subsequent time back at the atomically frame -- abort the
1203      * tx that's checking the invariant and move on to the next one */
1204     StgTSO_trec(CurrentTSO) = outer;
1205     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1206     StgInvariantCheckQueue_my_execution(q) = trec;
1207     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1208     /* Don't free trec -- it's linked from q and will be stashed in the
1209      * invariant if we eventually commit. */
1210     q = StgInvariantCheckQueue_next_queue_entry(q);
1211     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1212     trec = outer;
1213   }
1214
1215   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1216
1217   if (q != END_INVARIANT_CHECK_QUEUE) {
1218     /* We can't commit yet: another invariant to check */
1219     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1220     StgTSO_trec(CurrentTSO) = trec;
1221
1222     next_invariant = StgInvariantCheckQueue_invariant(q);
1223     R1 = StgAtomicInvariant_code(next_invariant);
1224     jump stg_ap_v_fast;
1225
1226   } else {
1227
1228     /* We've got no more invariants to check, try to commit */
1229     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1230     if (valid != 0) {
1231       /* Transaction was valid: commit succeeded */
1232       StgTSO_trec(CurrentTSO) = NO_TREC;
1233       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1234       jump %ENTRY_CODE(Sp(SP_OFF));
1235     } else {
1236       /* Transaction was not valid: try again */
1237       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1238       StgTSO_trec(CurrentTSO) = trec;
1239       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1240       R1 = StgAtomicallyFrame_code(frame);
1241       jump stg_ap_v_fast;
1242     }
1243   }
1244 }
1245
1246 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1247 #if defined(PROFILING)
1248   W_ unused1, W_ unused2,
1249 #endif
1250   P_ unused3, P_ unused4)
1251 {
1252   W_ frame, trec, valid;
1253
1254   frame = Sp;
1255
1256   /* The TSO is currently waiting: should we stop waiting? */
1257   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1258   if (valid != 0) {
1259     /* Previous attempt is still valid: no point trying again yet */
1260     jump stg_block_noregs;
1261   } else {
1262     /* Previous attempt is no longer valid: try again */
1263     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1264     StgTSO_trec(CurrentTSO) = trec;
1265     StgHeader_info(frame) = stg_atomically_frame_info;
1266     R1 = StgAtomicallyFrame_code(frame);
1267     jump stg_ap_v_fast;
1268   }
1269 }
1270
1271 // STM catch frame --------------------------------------------------------------
1272
1273 #define SP_OFF 0
1274
1275 /* Catch frames are very similar to update frames, but when entering
1276  * one we just pop the frame off the stack and perform the correct
1277  * kind of return to the activation record underneath us on the stack.
1278  */
1279
1280 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1281 #if defined(PROFILING)
1282   W_ unused1, W_ unused2,
1283 #endif
1284   P_ unused3, P_ unused4)
1285    {
1286       W_ r, frame, trec, outer;
1287       frame = Sp;
1288       trec = StgTSO_trec(CurrentTSO);
1289       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1290       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1291       if (r != 0) {
1292         /* Commit succeeded */
1293         StgTSO_trec(CurrentTSO) = outer;
1294         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1295         jump Sp(SP_OFF);
1296       } else {
1297         /* Commit failed */
1298         W_ new_trec;
1299         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1300         StgTSO_trec(CurrentTSO) = new_trec;
1301         R1 = StgCatchSTMFrame_code(frame);
1302         jump stg_ap_v_fast;
1303       }
1304    }
1305
1306
1307 // Primop definition ------------------------------------------------------------
1308
1309 atomicallyzh_fast
1310 {
1311   W_ frame;
1312   W_ old_trec;
1313   W_ new_trec;
1314   
1315   // stmStartTransaction may allocate
1316   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1317
1318   /* Args: R1 = m :: STM a */
1319   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1320
1321   old_trec = StgTSO_trec(CurrentTSO);
1322
1323   /* Nested transactions are not allowed; raise an exception */
1324   if (old_trec != NO_TREC) {
1325      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1326      jump raisezh_fast;
1327   }
1328
1329   /* Set up the atomically frame */
1330   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1331   frame = Sp;
1332
1333   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1334   StgAtomicallyFrame_code(frame) = R1;
1335   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1336
1337   /* Start the memory transcation */
1338   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1339   StgTSO_trec(CurrentTSO) = new_trec;
1340
1341   /* Apply R1 to the realworld token */
1342   jump stg_ap_v_fast;
1343 }
1344
1345
1346 catchSTMzh_fast
1347 {
1348   W_ frame;
1349   
1350   /* Args: R1 :: STM a */
1351   /* Args: R2 :: Exception -> STM a */
1352   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1353
1354   /* Set up the catch frame */
1355   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1356   frame = Sp;
1357
1358   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1359   StgCatchSTMFrame_handler(frame) = R2;
1360   StgCatchSTMFrame_code(frame) = R1;
1361
1362   /* Start a nested transaction to run the body of the try block in */
1363   W_ cur_trec;  
1364   W_ new_trec;
1365   cur_trec = StgTSO_trec(CurrentTSO);
1366   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1367   StgTSO_trec(CurrentTSO) = new_trec;
1368
1369   /* Apply R1 to the realworld token */
1370   jump stg_ap_v_fast;
1371 }
1372
1373
1374 catchRetryzh_fast
1375 {
1376   W_ frame;
1377   W_ new_trec;
1378   W_ trec;
1379
1380   // stmStartTransaction may allocate
1381   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1382
1383   /* Args: R1 :: STM a */
1384   /* Args: R2 :: STM a */
1385   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1386
1387   /* Start a nested transaction within which to run the first code */
1388   trec = StgTSO_trec(CurrentTSO);
1389   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1390   StgTSO_trec(CurrentTSO) = new_trec;
1391
1392   /* Set up the catch-retry frame */
1393   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1394   frame = Sp;
1395   
1396   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1397   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1398   StgCatchRetryFrame_first_code(frame) = R1;
1399   StgCatchRetryFrame_alt_code(frame) = R2;
1400
1401   /* Apply R1 to the realworld token */
1402   jump stg_ap_v_fast;
1403 }
1404
1405
1406 retryzh_fast
1407 {
1408   W_ frame_type;
1409   W_ frame;
1410   W_ trec;
1411   W_ outer;
1412   W_ r;
1413
1414   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1415
1416   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1417 retry_pop_stack:
1418   StgTSO_sp(CurrentTSO) = Sp;
1419   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1420   Sp = StgTSO_sp(CurrentTSO);
1421   frame = Sp;
1422   trec = StgTSO_trec(CurrentTSO);
1423   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1424
1425   if (frame_type == CATCH_RETRY_FRAME) {
1426     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1427     ASSERT(outer != NO_TREC);
1428     // Abort the transaction attempting the current branch
1429     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1430     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1431     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1432       // Retry in the first branch: try the alternative
1433       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1434       StgTSO_trec(CurrentTSO) = trec;
1435       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1436       R1 = StgCatchRetryFrame_alt_code(frame);
1437       jump stg_ap_v_fast;
1438     } else {
1439       // Retry in the alternative code: propagate the retry
1440       StgTSO_trec(CurrentTSO) = outer;
1441       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1442       goto retry_pop_stack;
1443     }
1444   }
1445
1446   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1447   ASSERT(frame_type == ATOMICALLY_FRAME);
1448   if (outer != NO_TREC) {
1449     // We called retry while checking invariants, so abort the current
1450     // invariant check (merging its TVar accesses into the parents read
1451     // set so we'll wait on them)
1452     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1453     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1454     trec = outer;
1455     StgTSO_trec(CurrentTSO) = trec;
1456     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1457   }
1458   ASSERT(outer == NO_TREC);
1459
1460   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1461   if (r != 0) {
1462     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1463     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1464     Sp = frame;
1465     // Fix up the stack in the unregisterised case: the return convention is different.
1466     R3 = trec; // passing to stmWaitUnblock()
1467     jump stg_block_stmwait;
1468   } else {
1469     // Transaction was not valid: retry immediately
1470     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1471     StgTSO_trec(CurrentTSO) = trec;
1472     R1 = StgAtomicallyFrame_code(frame);
1473     Sp = frame;
1474     jump stg_ap_v_fast;
1475   }
1476 }
1477
1478
1479 checkzh_fast
1480 {
1481   W_ trec, closure;
1482
1483   /* Args: R1 = invariant closure */
1484   MAYBE_GC (R1_PTR, checkzh_fast); 
1485
1486   trec = StgTSO_trec(CurrentTSO);
1487   closure = R1;
1488   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1489                                      trec "ptr",
1490                                      closure "ptr") [];
1491
1492   jump %ENTRY_CODE(Sp(0));
1493 }
1494
1495
1496 newTVarzh_fast
1497 {
1498   W_ tv;
1499   W_ new_value;
1500
1501   /* Args: R1 = initialisation value */
1502
1503   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1504   new_value = R1;
1505   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1506   RET_P(tv);
1507 }
1508
1509
1510 readTVarzh_fast
1511 {
1512   W_ trec;
1513   W_ tvar;
1514   W_ result;
1515
1516   /* Args: R1 = TVar closure */
1517
1518   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1519   trec = StgTSO_trec(CurrentTSO);
1520   tvar = R1;
1521   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1522
1523   RET_P(result);
1524 }
1525
1526 readTVarIOzh_fast
1527 {
1528     W_ result;
1529
1530 again:
1531     result = StgTVar_current_value(R1);
1532     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1533         goto again;
1534     }
1535     RET_P(result);
1536 }
1537
1538 writeTVarzh_fast
1539 {
1540   W_ trec;
1541   W_ tvar;
1542   W_ new_value;
1543   
1544   /* Args: R1 = TVar closure */
1545   /*       R2 = New value    */
1546
1547   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1548   trec = StgTSO_trec(CurrentTSO);
1549   tvar = R1;
1550   new_value = R2;
1551   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1552
1553   jump %ENTRY_CODE(Sp(0));
1554 }
1555
1556
1557 /* -----------------------------------------------------------------------------
1558  * MVar primitives
1559  *
1560  * take & putMVar work as follows.  Firstly, an important invariant:
1561  *
1562  *    If the MVar is full, then the blocking queue contains only
1563  *    threads blocked on putMVar, and if the MVar is empty then the
1564  *    blocking queue contains only threads blocked on takeMVar.
1565  *
1566  * takeMvar:
1567  *    MVar empty : then add ourselves to the blocking queue
1568  *    MVar full  : remove the value from the MVar, and
1569  *                 blocking queue empty     : return
1570  *                 blocking queue non-empty : perform the first blocked putMVar
1571  *                                            from the queue, and wake up the
1572  *                                            thread (MVar is now full again)
1573  *
1574  * putMVar is just the dual of the above algorithm.
1575  *
1576  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1577  * the stack of the thread waiting to do the putMVar.  See
1578  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1579  * the stack layout, and the PerformPut and PerformTake macros below.
1580  *
1581  * It is important that a blocked take or put is woken up with the
1582  * take/put already performed, because otherwise there would be a
1583  * small window of vulnerability where the thread could receive an
1584  * exception and never perform its take or put, and we'd end up with a
1585  * deadlock.
1586  *
1587  * -------------------------------------------------------------------------- */
1588
1589 isEmptyMVarzh_fast
1590 {
1591     /* args: R1 = MVar closure */
1592
1593     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1594         RET_N(1);
1595     } else {
1596         RET_N(0);
1597     }
1598 }
1599
1600 newMVarzh_fast
1601 {
1602     /* args: none */
1603     W_ mvar;
1604
1605     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1606   
1607     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1608     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1609         // MVARs start dirty: generation 0 has no mutable list
1610     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1611     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1612     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1613     RET_P(mvar);
1614 }
1615
1616
1617 #define PerformTake(tso, value)                         \
1618     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1619     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1620
1621 #define PerformPut(tso,lval)                    \
1622     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1623     lval = W_[StgTSO_sp(tso) - WDS(1)];
1624
1625 takeMVarzh_fast
1626 {
1627     W_ mvar, val, info, tso;
1628
1629     /* args: R1 = MVar closure */
1630     mvar = R1;
1631
1632 #if defined(THREADED_RTS)
1633     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1634 #else
1635     info = GET_INFO(mvar);
1636 #endif
1637         
1638     if (info == stg_MVAR_CLEAN_info) {
1639         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1640     }
1641
1642     /* If the MVar is empty, put ourselves on its blocking queue,
1643      * and wait until we're woken up.
1644      */
1645     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1646         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1647             StgMVar_head(mvar) = CurrentTSO;
1648         } else {
1649             foreign "C" setTSOLink(MyCapability() "ptr", 
1650                                    StgMVar_tail(mvar) "ptr",
1651                                    CurrentTSO) [];
1652         }
1653         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1654         StgTSO_block_info(CurrentTSO)  = mvar;
1655         // write barrier for throwTo(), which looks at block_info
1656         // if why_blocked==BlockedOnMVar.
1657         prim %write_barrier() [];
1658         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1659         StgMVar_tail(mvar) = CurrentTSO;
1660         
1661         R1 = mvar;
1662         jump stg_block_takemvar;
1663   }
1664
1665   /* we got the value... */
1666   val = StgMVar_value(mvar);
1667
1668   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1669   {
1670       /* There are putMVar(s) waiting... 
1671        * wake up the first thread on the queue
1672        */
1673       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1674
1675       /* actually perform the putMVar for the thread that we just woke up */
1676       tso = StgMVar_head(mvar);
1677       PerformPut(tso,StgMVar_value(mvar));
1678
1679       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1680           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1681       }
1682
1683       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1684                                             StgMVar_head(mvar) "ptr", 1) [];
1685       StgMVar_head(mvar) = tso;
1686
1687       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1688           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1689       }
1690
1691 #if defined(THREADED_RTS)
1692       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1693 #else
1694       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1695 #endif
1696       RET_P(val);
1697   } 
1698   else
1699   {
1700       /* No further putMVars, MVar is now empty */
1701       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1702  
1703 #if defined(THREADED_RTS)
1704       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1705 #else
1706       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1707 #endif
1708
1709       RET_P(val);
1710   }
1711 }
1712
1713
1714 tryTakeMVarzh_fast
1715 {
1716     W_ mvar, val, info, tso;
1717
1718     /* args: R1 = MVar closure */
1719
1720     mvar = R1;
1721
1722 #if defined(THREADED_RTS)
1723     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1724 #else
1725     info = GET_INFO(mvar);
1726 #endif
1727
1728     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1729 #if defined(THREADED_RTS)
1730         unlockClosure(mvar, info);
1731 #endif
1732         /* HACK: we need a pointer to pass back, 
1733          * so we abuse NO_FINALIZER_closure
1734          */
1735         RET_NP(0, stg_NO_FINALIZER_closure);
1736     }
1737
1738     if (info == stg_MVAR_CLEAN_info) {
1739         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1740     }
1741
1742     /* we got the value... */
1743     val = StgMVar_value(mvar);
1744
1745     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1746
1747         /* There are putMVar(s) waiting... 
1748          * wake up the first thread on the queue
1749          */
1750         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1751
1752         /* actually perform the putMVar for the thread that we just woke up */
1753         tso = StgMVar_head(mvar);
1754         PerformPut(tso,StgMVar_value(mvar));
1755         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1756             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1757         }
1758
1759         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1760                                               StgMVar_head(mvar) "ptr", 1) [];
1761         StgMVar_head(mvar) = tso;
1762
1763         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1764             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1765         }
1766 #if defined(THREADED_RTS)
1767         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1768 #else
1769         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1770 #endif
1771     }
1772     else 
1773     {
1774         /* No further putMVars, MVar is now empty */
1775         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1776 #if defined(THREADED_RTS)
1777         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1778 #else
1779         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1780 #endif
1781     }
1782     
1783     RET_NP(1, val);
1784 }
1785
1786
1787 putMVarzh_fast
1788 {
1789     W_ mvar, val, info, tso;
1790
1791     /* args: R1 = MVar, R2 = value */
1792     mvar = R1;
1793     val  = R2;
1794
1795 #if defined(THREADED_RTS)
1796     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1797 #else
1798     info = GET_INFO(mvar);
1799 #endif
1800
1801     if (info == stg_MVAR_CLEAN_info) {
1802         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1803     }
1804
1805     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1806         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1807             StgMVar_head(mvar) = CurrentTSO;
1808         } else {
1809             foreign "C" setTSOLink(MyCapability() "ptr", 
1810                                    StgMVar_tail(mvar) "ptr",
1811                                    CurrentTSO) [];
1812         }
1813         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1814         StgTSO_block_info(CurrentTSO)  = mvar;
1815         // write barrier for throwTo(), which looks at block_info
1816         // if why_blocked==BlockedOnMVar.
1817         prim %write_barrier() [];
1818         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1819         StgMVar_tail(mvar) = CurrentTSO;
1820         
1821         R1 = mvar;
1822         R2 = val;
1823         jump stg_block_putmvar;
1824     }
1825   
1826     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1827
1828         /* There are takeMVar(s) waiting: wake up the first one
1829          */
1830         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1831
1832         /* actually perform the takeMVar */
1833         tso = StgMVar_head(mvar);
1834         PerformTake(tso, val);
1835         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1836             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1837         }
1838       
1839         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1840                                               StgMVar_head(mvar) "ptr", 1) [];
1841         StgMVar_head(mvar) = tso;
1842
1843         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1844             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1845         }
1846
1847 #if defined(THREADED_RTS)
1848         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1849 #else
1850         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1851 #endif
1852         jump %ENTRY_CODE(Sp(0));
1853     }
1854     else
1855     {
1856         /* No further takes, the MVar is now full. */
1857         StgMVar_value(mvar) = val;
1858
1859 #if defined(THREADED_RTS)
1860         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1861 #else
1862         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1863 #endif
1864         jump %ENTRY_CODE(Sp(0));
1865     }
1866     
1867     /* ToDo: yield afterward for better communication performance? */
1868 }
1869
1870
1871 tryPutMVarzh_fast
1872 {
1873     W_ mvar, info, tso;
1874
1875     /* args: R1 = MVar, R2 = value */
1876     mvar = R1;
1877
1878 #if defined(THREADED_RTS)
1879     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1880 #else
1881     info = GET_INFO(mvar);
1882 #endif
1883
1884     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1885 #if defined(THREADED_RTS)
1886         unlockClosure(mvar, info);
1887 #endif
1888         RET_N(0);
1889     }
1890   
1891     if (info == stg_MVAR_CLEAN_info) {
1892         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1893     }
1894
1895     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1896
1897         /* There are takeMVar(s) waiting: wake up the first one
1898          */
1899         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1900         
1901         /* actually perform the takeMVar */
1902         tso = StgMVar_head(mvar);
1903         PerformTake(tso, R2);
1904         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1905             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1906         }
1907       
1908         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1909                                               StgMVar_head(mvar) "ptr", 1) [];
1910         StgMVar_head(mvar) = tso;
1911
1912         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1913             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1914         }
1915
1916 #if defined(THREADED_RTS)
1917         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1918 #else
1919         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1920 #endif
1921     }
1922     else
1923     {
1924         /* No further takes, the MVar is now full. */
1925         StgMVar_value(mvar) = R2;
1926
1927 #if defined(THREADED_RTS)
1928         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1929 #else
1930         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1931 #endif
1932     }
1933     
1934     RET_N(1);
1935     /* ToDo: yield afterward for better communication performance? */
1936 }
1937
1938
1939 /* -----------------------------------------------------------------------------
1940    Stable pointer primitives
1941    -------------------------------------------------------------------------  */
1942
1943 makeStableNamezh_fast
1944 {
1945     W_ index, sn_obj;
1946
1947     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1948   
1949     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1950
1951     /* Is there already a StableName for this heap object?
1952      *  stable_ptr_table is a pointer to an array of snEntry structs.
1953      */
1954     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1955         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1956         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1957         StgStableName_sn(sn_obj) = index;
1958         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1959     } else {
1960         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1961     }
1962     
1963     RET_P(sn_obj);
1964 }
1965
1966
1967 makeStablePtrzh_fast
1968 {
1969     /* Args: R1 = a */
1970     W_ sp;
1971     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1972     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1973     RET_N(sp);
1974 }
1975
1976 deRefStablePtrzh_fast
1977 {
1978     /* Args: R1 = the stable ptr */
1979     W_ r, sp;
1980     sp = R1;
1981     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1982     RET_P(r);
1983 }
1984
1985 /* -----------------------------------------------------------------------------
1986    Bytecode object primitives
1987    -------------------------------------------------------------------------  */
1988
1989 newBCOzh_fast
1990 {
1991     /* R1 = instrs
1992        R2 = literals
1993        R3 = ptrs
1994        R4 = arity
1995        R5 = bitmap array
1996     */
1997     W_ bco, bitmap_arr, bytes, words;
1998     
1999     bitmap_arr = R5;
2000
2001     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
2002     bytes = WDS(words);
2003
2004     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
2005
2006     bco = Hp - bytes + WDS(1);
2007     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2008     
2009     StgBCO_instrs(bco)     = R1;
2010     StgBCO_literals(bco)   = R2;
2011     StgBCO_ptrs(bco)       = R3;
2012     StgBCO_arity(bco)      = HALF_W_(R4);
2013     StgBCO_size(bco)       = HALF_W_(words);
2014     
2015     // Copy the arity/bitmap info into the BCO
2016     W_ i;
2017     i = 0;
2018 for:
2019     if (i < StgArrWords_words(bitmap_arr)) {
2020         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2021         i = i + 1;
2022         goto for;
2023     }
2024     
2025     RET_P(bco);
2026 }
2027
2028
2029 mkApUpd0zh_fast
2030 {
2031     // R1 = the BCO# for the AP
2032     //  
2033     W_ ap;
2034
2035     // This function is *only* used to wrap zero-arity BCOs in an
2036     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2037     // saturated and always points directly to a FUN or BCO.
2038     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2039            StgBCO_arity(R1) == HALF_W_(0));
2040
2041     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2042     TICK_ALLOC_UP_THK(0, 0);
2043     CCCS_ALLOC(SIZEOF_StgAP);
2044
2045     ap = Hp - SIZEOF_StgAP + WDS(1);
2046     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2047     
2048     StgAP_n_args(ap) = HALF_W_(0);
2049     StgAP_fun(ap) = R1;
2050     
2051     RET_P(ap);
2052 }
2053
2054 unpackClosurezh_fast
2055 {
2056 /* args: R1 = closure to analyze */
2057 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2058
2059     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2060     info  = %GET_STD_INFO(UNTAG(R1));
2061
2062     // Some closures have non-standard layout, so we omit those here.
2063     W_ type;
2064     type = TO_W_(%INFO_TYPE(info));
2065     switch [0 .. N_CLOSURE_TYPES] type {
2066     case THUNK_SELECTOR : {
2067         ptrs = 1;
2068         nptrs = 0;
2069         goto out;
2070     }
2071     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2072          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2073         ptrs = 0;
2074         nptrs = 0;
2075         goto out;
2076     }
2077     default: {
2078         ptrs  = TO_W_(%INFO_PTRS(info)); 
2079         nptrs = TO_W_(%INFO_NPTRS(info));
2080         goto out;
2081     }}
2082 out:
2083
2084     W_ ptrs_arr_sz, nptrs_arr_sz;
2085     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2086     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2087
2088     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2089
2090     W_ clos;
2091     clos = UNTAG(R1);
2092
2093     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2094     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2095
2096     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2097     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2098     p = 0;
2099 for:
2100     if(p < ptrs) {
2101          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2102          p = p + 1;
2103          goto for;
2104     }
2105     
2106     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2107     StgArrWords_words(nptrs_arr) = nptrs;
2108     p = 0;
2109 for2:
2110     if(p < nptrs) {
2111          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2112          p = p + 1;
2113          goto for2;
2114     }
2115     RET_NPP(info, ptrs_arr, nptrs_arr);
2116 }
2117
2118 /* -----------------------------------------------------------------------------
2119    Thread I/O blocking primitives
2120    -------------------------------------------------------------------------- */
2121
2122 /* Add a thread to the end of the blocked queue. (C-- version of the C
2123  * macro in Schedule.h).
2124  */
2125 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2126     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2127     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2128       W_[blocked_queue_hd] = tso;                       \
2129     } else {                                            \
2130       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2131     }                                                   \
2132     W_[blocked_queue_tl] = tso;
2133
2134 waitReadzh_fast
2135 {
2136     /* args: R1 */
2137 #ifdef THREADED_RTS
2138     foreign "C" barf("waitRead# on threaded RTS") never returns;
2139 #else
2140
2141     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2142     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2143     StgTSO_block_info(CurrentTSO) = R1;
2144     // No locking - we're not going to use this interface in the
2145     // threaded RTS anyway.
2146     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2147     jump stg_block_noregs;
2148 #endif
2149 }
2150
2151 waitWritezh_fast
2152 {
2153     /* args: R1 */
2154 #ifdef THREADED_RTS
2155     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2156 #else
2157
2158     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2159     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2160     StgTSO_block_info(CurrentTSO) = R1;
2161     // No locking - we're not going to use this interface in the
2162     // threaded RTS anyway.
2163     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2164     jump stg_block_noregs;
2165 #endif
2166 }
2167
2168
2169 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2170 delayzh_fast
2171 {
2172 #ifdef mingw32_HOST_OS
2173     W_ ares;
2174     CInt reqID;
2175 #else
2176     W_ t, prev, target;
2177 #endif
2178
2179 #ifdef THREADED_RTS
2180     foreign "C" barf("delay# on threaded RTS") never returns;
2181 #else
2182
2183     /* args: R1 (microsecond delay amount) */
2184     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2185     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2186
2187 #ifdef mingw32_HOST_OS
2188
2189     /* could probably allocate this on the heap instead */
2190     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2191                                             stg_delayzh_malloc_str);
2192     (reqID) = foreign "C" addDelayRequest(R1);
2193     StgAsyncIOResult_reqID(ares)   = reqID;
2194     StgAsyncIOResult_len(ares)     = 0;
2195     StgAsyncIOResult_errCode(ares) = 0;
2196     StgTSO_block_info(CurrentTSO)  = ares;
2197
2198     /* Having all async-blocked threads reside on the blocked_queue
2199      * simplifies matters, so change the status to OnDoProc put the
2200      * delayed thread on the blocked_queue.
2201      */
2202     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2203     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2204     jump stg_block_async_void;
2205
2206 #else
2207
2208     W_ time;
2209     W_ divisor;
2210     (time) = foreign "C" getourtimeofday() [R1];
2211     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2212     if (divisor == 0) {
2213         divisor = 50;
2214     }
2215     divisor = divisor * 1000;
2216     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2217            + time + 1; /* Add 1 as getourtimeofday rounds down */
2218     StgTSO_block_info(CurrentTSO) = target;
2219
2220     /* Insert the new thread in the sleeping queue. */
2221     prev = NULL;
2222     t = W_[sleeping_queue];
2223 while:
2224     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2225         prev = t;
2226         t = StgTSO__link(t);
2227         goto while;
2228     }
2229
2230     StgTSO__link(CurrentTSO) = t;
2231     if (prev == NULL) {
2232         W_[sleeping_queue] = CurrentTSO;
2233     } else {
2234         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2235     }
2236     jump stg_block_noregs;
2237 #endif
2238 #endif /* !THREADED_RTS */
2239 }
2240
2241
2242 #ifdef mingw32_HOST_OS
2243 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2244 asyncReadzh_fast
2245 {
2246     W_ ares;
2247     CInt reqID;
2248
2249 #ifdef THREADED_RTS
2250     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2251 #else
2252
2253     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2254     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2255     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2256
2257     /* could probably allocate this on the heap instead */
2258     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2259                                             stg_asyncReadzh_malloc_str)
2260                         [R1,R2,R3,R4];
2261     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2262     StgAsyncIOResult_reqID(ares)   = reqID;
2263     StgAsyncIOResult_len(ares)     = 0;
2264     StgAsyncIOResult_errCode(ares) = 0;
2265     StgTSO_block_info(CurrentTSO)  = ares;
2266     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2267     jump stg_block_async;
2268 #endif
2269 }
2270
2271 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2272 asyncWritezh_fast
2273 {
2274     W_ ares;
2275     CInt reqID;
2276
2277 #ifdef THREADED_RTS
2278     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2279 #else
2280
2281     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2282     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2283     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2284
2285     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2286                                             stg_asyncWritezh_malloc_str)
2287                         [R1,R2,R3,R4];
2288     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2289
2290     StgAsyncIOResult_reqID(ares)   = reqID;
2291     StgAsyncIOResult_len(ares)     = 0;
2292     StgAsyncIOResult_errCode(ares) = 0;
2293     StgTSO_block_info(CurrentTSO)  = ares;
2294     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2295     jump stg_block_async;
2296 #endif
2297 }
2298
2299 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2300 asyncDoProczh_fast
2301 {
2302     W_ ares;
2303     CInt reqID;
2304
2305 #ifdef THREADED_RTS
2306     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2307 #else
2308
2309     /* args: R1 = proc, R2 = param */
2310     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2311     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2312
2313     /* could probably allocate this on the heap instead */
2314     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2315                                             stg_asyncDoProczh_malloc_str) 
2316                                 [R1,R2];
2317     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2318     StgAsyncIOResult_reqID(ares)   = reqID;
2319     StgAsyncIOResult_len(ares)     = 0;
2320     StgAsyncIOResult_errCode(ares) = 0;
2321     StgTSO_block_info(CurrentTSO) = ares;
2322     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2323     jump stg_block_async;
2324 #endif
2325 }
2326 #endif
2327
2328 // noDuplicate# tries to ensure that none of the thunks under
2329 // evaluation by the current thread are also under evaluation by
2330 // another thread.  It relies on *both* threads doing noDuplicate#;
2331 // the second one will get blocked if they are duplicating some work.
2332 noDuplicatezh_fast
2333 {
2334     SAVE_THREAD_STATE();
2335     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2336     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2337     
2338     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2339         jump stg_threadFinished;
2340     } else {
2341         LOAD_THREAD_STATE();
2342         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2343         jump %ENTRY_CODE(Sp(0));
2344     }
2345 }
2346
2347 getApStackValzh_fast
2348 {
2349    W_ ap_stack, offset, val, ok;
2350
2351    /* args: R1 = AP_STACK, R2 = offset */
2352    ap_stack = R1;
2353    offset   = R2;
2354
2355    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2356         ok = 1;
2357         val = StgAP_STACK_payload(ap_stack,offset); 
2358    } else {
2359         ok = 0;
2360         val = R1;
2361    }
2362    RET_NP(ok,val);
2363 }
2364
2365 /* -----------------------------------------------------------------------------
2366    Misc. primitives
2367    -------------------------------------------------------------------------- */
2368
2369 // Write the cost center stack of the first argument on stderr; return
2370 // the second.  Possibly only makes sense for already evaluated
2371 // things?
2372 traceCcszh_fast
2373 {
2374     W_ ccs;
2375
2376 #ifdef PROFILING
2377     ccs = StgHeader_ccs(UNTAG(R1));
2378     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2379 #endif
2380
2381     R1 = R2;
2382     ENTER();
2383 }
2384
2385 getSparkzh_fast
2386 {
2387    W_ spark;
2388
2389 #ifndef THREADED_RTS
2390    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2391 #else
2392    (spark) = foreign "C" findSpark(MyCapability());
2393    if (spark != 0) {
2394       RET_NP(1,spark);
2395    } else {
2396       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2397    }
2398 #endif
2399 }