fix cut-and-pasto in mkWeakForeignEnv#, causing random segfaults
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 #define BA_ALIGN 16
87 #define BA_MASK  (BA_ALIGN-1)
88
89 newPinnedByteArrayzh_fast
90 {
91     W_ words, bytes, payload_words, p;
92
93     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
94     bytes = R1;
95     /* payload_words is what we will tell the profiler we had to allocate */
96     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
97     /* When we actually allocate memory, we need to allow space for the
98        header: */
99     bytes = bytes + SIZEOF_StgArrWords;
100     /* And we want to align to BA_ALIGN bytes, so we need to allow space
101        to shift up to BA_ALIGN - 1 bytes: */
102     bytes = bytes + BA_ALIGN - 1;
103     /* Now we convert to a number of words: */
104     words = ROUNDUP_BYTES_TO_WDS(bytes);
105
106     ("ptr" p) = foreign "C" allocatePinned(words) [];
107     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
108
109     /* Now we need to move p forward so that the payload is aligned
110        to BA_ALIGN bytes: */
111     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newAlignedPinnedByteArrayzh_fast
119 {
120     W_ words, bytes, payload_words, p, alignment;
121
122     MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
123     bytes = R1;
124     alignment = R2;
125
126     /* payload_words is what we will tell the profiler we had to allocate */
127     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
128
129     /* When we actually allocate memory, we need to allow space for the
130        header: */
131     bytes = bytes + SIZEOF_StgArrWords;
132     /* And we want to align to <alignment> bytes, so we need to allow space
133        to shift up to <alignment - 1> bytes: */
134     bytes = bytes + alignment - 1;
135     /* Now we convert to a number of words: */
136     words = ROUNDUP_BYTES_TO_WDS(bytes);
137
138     ("ptr" p) = foreign "C" allocatePinned(words) [];
139     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
140
141     /* Now we need to move p forward so that the payload is aligned
142        to <alignment> bytes. Note that we are assuming that
143        <alignment> is a power of 2, which is technically not guaranteed */
144     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
145
146     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
147     StgArrWords_words(p) = payload_words;
148     RET_P(p);
149 }
150
151 newArrayzh_fast
152 {
153     W_ words, n, init, arr, p;
154     /* Args: R1 = words, R2 = initialisation value */
155
156     n = R1;
157     MAYBE_GC(R2_PTR,newArrayzh_fast);
158
159     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
160     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
161     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
162
163     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
164     StgMutArrPtrs_ptrs(arr) = n;
165
166     // Initialise all elements of the the array with the value in R2
167     init = R2;
168     p = arr + SIZEOF_StgMutArrPtrs;
169   for:
170     if (p < arr + WDS(words)) {
171         W_[p] = init;
172         p = p + WDS(1);
173         goto for;
174     }
175
176     RET_P(arr);
177 }
178
179 unsafeThawArrayzh_fast
180 {
181   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
182   //
183   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
184   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
185   // it on the mutable list for the GC to remove (removing something from
186   // the mutable list is not easy, because the mut_list is only singly-linked).
187   // 
188   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
189   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
190   // to indicate that it is still on the mutable list.
191   //
192   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
193   // either it is on a mut_list, or it isn't.  We adopt the convention that
194   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
195   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
196   // we put it on the mutable list more than once, but it would get scavenged
197   // multiple times during GC, which would be unnecessarily slow.
198   //
199   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
200         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
201         recordMutable(R1, R1);
202         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
203         RET_P(R1);
204   } else {
205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
206         RET_P(R1);
207   }
208 }
209
210 /* -----------------------------------------------------------------------------
211    MutVar primitives
212    -------------------------------------------------------------------------- */
213
214 newMutVarzh_fast
215 {
216     W_ mv;
217     /* Args: R1 = initialisation value */
218
219     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
220
221     mv = Hp - SIZEOF_StgMutVar + WDS(1);
222     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
223     StgMutVar_var(mv) = R1;
224     
225     RET_P(mv);
226 }
227
228 atomicModifyMutVarzh_fast
229 {
230     W_ mv, f, z, x, y, r, h;
231     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
232
233     /* If x is the current contents of the MutVar#, then 
234        We want to make the new contents point to
235
236          (sel_0 (f x))
237  
238        and the return value is
239          
240          (sel_1 (f x))
241
242         obviously we can share (f x).
243
244          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
245          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
246          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
247     */
248
249 #if MIN_UPD_SIZE > 1
250 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
251 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
252 #else
253 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
254 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
255 #endif
256
257 #if MIN_UPD_SIZE > 2
258 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
259 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
260 #else
261 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
262 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
263 #endif
264
265 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
266
267    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
268
269    mv = R1;
270    f = R2;
271
272    TICK_ALLOC_THUNK_2();
273    CCCS_ALLOC(THUNK_2_SIZE);
274    z = Hp - THUNK_2_SIZE + WDS(1);
275    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
276    LDV_RECORD_CREATE(z);
277    StgThunk_payload(z,0) = f;
278
279    TICK_ALLOC_THUNK_1();
280    CCCS_ALLOC(THUNK_1_SIZE);
281    y = z - THUNK_1_SIZE;
282    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
283    LDV_RECORD_CREATE(y);
284    StgThunk_payload(y,0) = z;
285
286    TICK_ALLOC_THUNK_1();
287    CCCS_ALLOC(THUNK_1_SIZE);
288    r = y - THUNK_1_SIZE;
289    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
290    LDV_RECORD_CREATE(r);
291    StgThunk_payload(r,0) = z;
292
293  retry:
294    x = StgMutVar_var(mv);
295    StgThunk_payload(z,1) = x;
296 #ifdef THREADED_RTS
297    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
298    if (h != x) { goto retry; }
299 #else
300    StgMutVar_var(mv) = y;
301 #endif
302
303    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
304      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
305    }
306
307    RET_P(r);
308 }
309
310 /* -----------------------------------------------------------------------------
311    Weak Pointer Primitives
312    -------------------------------------------------------------------------- */
313
314 STRING(stg_weak_msg,"New weak pointer at %p\n")
315
316 mkWeakzh_fast
317 {
318   /* R1 = key
319      R2 = value
320      R3 = finalizer (or NULL)
321   */
322   W_ w;
323
324   if (R3 == NULL) {
325     R3 = stg_NO_FINALIZER_closure;
326   }
327
328   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
329
330   w = Hp - SIZEOF_StgWeak + WDS(1);
331   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
332
333   // We don't care about cfinalizer here.
334   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
335   // something else?
336
337   StgWeak_key(w)        = R1;
338   StgWeak_value(w)      = R2;
339   StgWeak_finalizer(w)  = R3;
340   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
341
342   StgWeak_link(w)       = W_[weak_ptr_list];
343   W_[weak_ptr_list]     = w;
344
345   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
346
347   RET_P(w);
348 }
349
350 mkWeakForeignEnvzh_fast
351 {
352   /* R1 = key
353      R2 = value
354      R3 = finalizer
355      R4 = pointer
356      R5 = has environment (0 or 1)
357      R6 = environment
358   */
359   W_ w, payload_words, words, p;
360
361   W_ key, val, fptr, ptr, flag, eptr;
362
363   key  = R1;
364   val  = R2;
365   fptr = R3;
366   ptr  = R4;
367   flag = R5;
368   eptr = R6;
369
370   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast );
371
372   w = Hp - SIZEOF_StgWeak + WDS(1);
373   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
374
375   payload_words = 4;
376   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
377   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
378
379   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
380   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
381
382   StgArrWords_words(p)     = payload_words;
383   StgArrWords_payload(p,0) = fptr;
384   StgArrWords_payload(p,1) = ptr;
385   StgArrWords_payload(p,2) = eptr;
386   StgArrWords_payload(p,3) = flag;
387
388   // We don't care about the value here.
389   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
390
391   StgWeak_key(w)        = key;
392   StgWeak_value(w)      = val;
393   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
394   StgWeak_cfinalizer(w) = p;
395
396   StgWeak_link(w)   = W_[weak_ptr_list];
397   W_[weak_ptr_list] = w;
398
399   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
400
401   RET_P(w);
402 }
403
404 finalizzeWeakzh_fast
405 {
406   /* R1 = weak ptr
407    */
408   W_ w, f, arr;
409
410   w = R1;
411
412   // already dead?
413   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
414       RET_NP(0,stg_NO_FINALIZER_closure);
415   }
416
417   // kill it
418 #ifdef PROFILING
419   // @LDV profiling
420   // A weak pointer is inherently used, so we do not need to call
421   // LDV_recordDead_FILL_SLOP_DYNAMIC():
422   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
423   // or, LDV_recordDead():
424   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
425   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
426   // large as weak pointers, so there is no need to fill the slop, either.
427   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
428 #endif
429
430   //
431   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
432   //
433   SET_INFO(w,stg_DEAD_WEAK_info);
434   LDV_RECORD_CREATE(w);
435
436   f   = StgWeak_finalizer(w);
437   arr = StgWeak_cfinalizer(w);
438
439   StgDeadWeak_link(w) = StgWeak_link(w);
440
441   if (arr != stg_NO_FINALIZER_closure) {
442     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
443                               StgArrWords_payload(arr,1),
444                               StgArrWords_payload(arr,2),
445                               StgArrWords_payload(arr,3)) [];
446   }
447
448   /* return the finalizer */
449   if (f == stg_NO_FINALIZER_closure) {
450       RET_NP(0,stg_NO_FINALIZER_closure);
451   } else {
452       RET_NP(1,f);
453   }
454 }
455
456 deRefWeakzh_fast
457 {
458   /* R1 = weak ptr */
459   W_ w, code, val;
460
461   w = R1;
462   if (GET_INFO(w) == stg_WEAK_info) {
463     code = 1;
464     val = StgWeak_value(w);
465   } else {
466     code = 0;
467     val = w;
468   }
469   RET_NP(code,val);
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474
475    There are some assumptions in this code that mp_limb_t == W_.  This is
476    the case for all the platforms that GHC supports, currently.
477    -------------------------------------------------------------------------- */
478
479 int2Integerzh_fast
480 {
481    /* arguments: R1 = Int# */
482
483    W_ val, s, p;        /* to avoid aliasing */
484
485    val = R1;
486    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
487
488    p = Hp - SIZEOF_StgArrWords;
489    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
490    StgArrWords_words(p) = 1;
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (%lt(val,0)) { 
494         s  = -1;
495         Hp(0) = -val;
496    } else { 
497      if (%gt(val,0)) {
498         s = 1;
499         Hp(0) = val;
500      } else {
501         s = 0;
502      }
503   }
504
505    /* returns (# size  :: Int#, 
506                  data  :: ByteArray# 
507                #)
508    */
509    RET_NP(s,p);
510 }
511
512 word2Integerzh_fast
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val, s, p;        /* to avoid aliasing */
517
518    val = R1;
519
520    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
521
522    p = Hp - SIZEOF_StgArrWords;
523    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
524    StgArrWords_words(p) = 1;
525
526    if (val != 0) {
527         s = 1;
528         W_[Hp] = val;
529    } else {
530         s = 0;
531    }
532
533    /* returns (# size  :: Int#, 
534                  data  :: ByteArray# #)
535    */
536    RET_NP(s,p);
537 }
538
539
540 /*
541  * 'long long' primops for converting to/from Integers.
542  */
543
544 #ifdef SUPPORT_LONG_LONGS
545
546 int64ToIntegerzh_fast
547 {
548    /* arguments: L1 = Int64# */
549
550    L_ val;
551    W_ hi, lo, s, neg, words_needed, p;
552
553    val = L1;
554    neg = 0;
555
556    hi = TO_W_(val >> 32);
557    lo = TO_W_(val);
558
559    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
560        // minimum is one word
561        words_needed = 1;
562    } else { 
563        words_needed = 2;
564    }
565
566    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
567                NO_PTRS, int64ToIntegerzh_fast );
568
569    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
570    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
571    StgArrWords_words(p) = words_needed;
572
573    if ( %lt(hi,0) ) {
574      neg = 1;
575      lo = -lo;
576      if(lo == 0) {
577        hi = -hi;
578      } else {
579        hi = -hi - 1;
580      }
581    }
582
583    if ( words_needed == 2 )  { 
584       s = 2;
585       Hp(-1) = lo;
586       Hp(0) = hi;
587    } else { 
588        if ( lo != 0 ) {
589            s = 1;
590            Hp(0) = lo;
591        } else /* val==0 */  {
592            s = 0;
593        }
594    }
595    if ( neg != 0 ) {
596         s = -s;
597    }
598
599    /* returns (# size  :: Int#, 
600                  data  :: ByteArray# #)
601    */
602    RET_NP(s,p);
603 }
604 word64ToIntegerzh_fast
605 {
606    /* arguments: L1 = Word64# */
607
608    L_ val;
609    W_ hi, lo, s, words_needed, p;
610
611    val = L1;
612    hi = TO_W_(val >> 32);
613    lo = TO_W_(val);
614
615    if ( hi != 0 ) {
616       words_needed = 2;
617    } else {
618       words_needed = 1;
619    }
620
621    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
622                NO_PTRS, word64ToIntegerzh_fast );
623
624    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
625    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
626    StgArrWords_words(p) = words_needed;
627
628    if ( hi != 0 ) { 
629      s = 2;
630      Hp(-1) = lo;
631      Hp(0)  = hi;
632    } else {
633       if ( lo != 0 ) {
634         s = 1;
635         Hp(0) = lo;
636      } else /* val==0 */  {
637       s = 0;
638      }
639   }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# #)
643    */
644    RET_NP(s,p);
645 }
646
647
648
649 #endif /* SUPPORT_LONG_LONGS */
650
651 /* ToDo: this is shockingly inefficient */
652
653 #ifndef THREADED_RTS
654 section "bss" {
655   mp_tmp1:
656     bits8 [SIZEOF_MP_INT];
657 }
658
659 section "bss" {
660   mp_tmp2:
661     bits8 [SIZEOF_MP_INT];
662 }
663
664 section "bss" {
665   mp_result1:
666     bits8 [SIZEOF_MP_INT];
667 }
668
669 section "bss" {
670   mp_result2:
671     bits8 [SIZEOF_MP_INT];
672 }
673 #endif
674
675 #ifdef THREADED_RTS
676 #define FETCH_MP_TEMP(X) \
677 W_ X; \
678 X = BaseReg + (OFFSET_StgRegTable_r ## X);
679 #else
680 #define FETCH_MP_TEMP(X) /* Nothing */
681 #endif
682
683 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
684 name                                                                    \
685 {                                                                       \
686   CInt s1, s2;                                                          \
687   W_ d1, d2;                                                            \
688   FETCH_MP_TEMP(mp_tmp1);                                               \
689   FETCH_MP_TEMP(mp_tmp2);                                               \
690   FETCH_MP_TEMP(mp_result1)                                             \
691   FETCH_MP_TEMP(mp_result2);                                            \
692                                                                         \
693   /* call doYouWantToGC() */                                            \
694   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
695                                                                         \
696   s1 = W_TO_INT(R1);                                                    \
697   d1 = R2;                                                              \
698   s2 = W_TO_INT(R3);                                                    \
699   d2 = R4;                                                              \
700                                                                         \
701   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
702   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
703   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
704   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
705   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
706   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
707                                                                         \
708   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
709                                                                         \
710   /* Perform the operation */                                           \
711   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
712                                                                         \
713   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
714          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
715 }
716
717 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
718 name                                                                    \
719 {                                                                       \
720   CInt s1;                                                              \
721   W_ d1;                                                                \
722   FETCH_MP_TEMP(mp_tmp1);                                               \
723   FETCH_MP_TEMP(mp_result1)                                             \
724                                                                         \
725   /* call doYouWantToGC() */                                            \
726   MAYBE_GC(R2_PTR, name);                                               \
727                                                                         \
728   d1 = R2;                                                              \
729   s1 = W_TO_INT(R1);                                                    \
730                                                                         \
731   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
732   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
733   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
734                                                                         \
735   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
736                                                                         \
737   /* Perform the operation */                                           \
738   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
739                                                                         \
740   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
741          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
742 }
743
744 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
745 name                                                                                    \
746 {                                                                                       \
747   CInt s1, s2;                                                                          \
748   W_ d1, d2;                                                                            \
749   FETCH_MP_TEMP(mp_tmp1);                                                               \
750   FETCH_MP_TEMP(mp_tmp2);                                                               \
751   FETCH_MP_TEMP(mp_result1)                                                             \
752   FETCH_MP_TEMP(mp_result2)                                                             \
753                                                                                         \
754   /* call doYouWantToGC() */                                                            \
755   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
756                                                                                         \
757   s1 = W_TO_INT(R1);                                                                    \
758   d1 = R2;                                                                              \
759   s2 = W_TO_INT(R3);                                                                    \
760   d2 = R4;                                                                              \
761                                                                                         \
762   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
763   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
764   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
765   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
766   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
767   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
768                                                                                         \
769   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
770   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
771                                                                                         \
772   /* Perform the operation */                                                           \
773   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
774                                                                                         \
775   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
776            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
777            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
778            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
779 }
780
781 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
782 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
783 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
784 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
785 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
786 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
787 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
788 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
789 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
790 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
791 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
792
793 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
794 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
795
796 #ifndef THREADED_RTS
797 section "bss" {
798   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
799 }
800 #endif
801
802 gcdIntzh_fast
803 {
804     /* R1 = the first Int#; R2 = the second Int# */
805     W_ r; 
806     FETCH_MP_TEMP(mp_tmp_w);
807
808     W_[mp_tmp_w] = R1;
809     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
810
811     R1 = r;
812     /* Result parked in R1, return via info-pointer at TOS */
813     jump %ENTRY_CODE(Sp(0));
814 }
815
816
817 gcdIntegerIntzh_fast
818 {
819     /* R1 = s1; R2 = d1; R3 = the int */
820     W_ s1;
821     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
822     R1 = s1;
823     
824     /* Result parked in R1, return via info-pointer at TOS */
825     jump %ENTRY_CODE(Sp(0));
826 }
827
828
829 cmpIntegerIntzh_fast
830 {
831     /* R1 = s1; R2 = d1; R3 = the int */
832     W_ usize, vsize, v_digit, u_digit;
833
834     usize = R1;
835     vsize = 0;
836     v_digit = R3;
837
838     // paraphrased from __gmpz_cmp_si() in the GMP sources
839     if (%gt(v_digit,0)) {
840         vsize = 1;
841     } else { 
842         if (%lt(v_digit,0)) {
843             vsize = -1;
844             v_digit = -v_digit;
845         }
846     }
847
848     if (usize != vsize) {
849         R1 = usize - vsize; 
850         jump %ENTRY_CODE(Sp(0));
851     }
852
853     if (usize == 0) {
854         R1 = 0; 
855         jump %ENTRY_CODE(Sp(0));
856     }
857
858     u_digit = W_[BYTE_ARR_CTS(R2)];
859
860     if (u_digit == v_digit) {
861         R1 = 0; 
862         jump %ENTRY_CODE(Sp(0));
863     }
864
865     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
866         R1 = usize; 
867     } else {
868         R1 = -usize; 
869     }
870
871     jump %ENTRY_CODE(Sp(0));
872 }
873
874 cmpIntegerzh_fast
875 {
876     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
877     W_ usize, vsize, size, up, vp;
878     CInt cmp;
879
880     // paraphrased from __gmpz_cmp() in the GMP sources
881     usize = R1;
882     vsize = R3;
883
884     if (usize != vsize) {
885         R1 = usize - vsize; 
886         jump %ENTRY_CODE(Sp(0));
887     }
888
889     if (usize == 0) {
890         R1 = 0; 
891         jump %ENTRY_CODE(Sp(0));
892     }
893
894     if (%lt(usize,0)) { // NB. not <, which is unsigned
895         size = -usize;
896     } else {
897         size = usize;
898     }
899
900     up = BYTE_ARR_CTS(R2);
901     vp = BYTE_ARR_CTS(R4);
902
903     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
904
905     if (cmp == 0 :: CInt) {
906         R1 = 0; 
907         jump %ENTRY_CODE(Sp(0));
908     }
909
910     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
911         R1 = 1;
912     } else {
913         R1 = (-1); 
914     }
915     /* Result parked in R1, return via info-pointer at TOS */
916     jump %ENTRY_CODE(Sp(0));
917 }
918
919 integer2Intzh_fast
920 {
921     /* R1 = s; R2 = d */
922     W_ r, s;
923
924     s = R1;
925     if (s == 0) {
926         r = 0;
927     } else {
928         r = W_[R2 + SIZEOF_StgArrWords];
929         if (%lt(s,0)) {
930             r = -r;
931         }
932     }
933     /* Result parked in R1, return via info-pointer at TOS */
934     R1 = r;
935     jump %ENTRY_CODE(Sp(0));
936 }
937
938 integer2Wordzh_fast
939 {
940   /* R1 = s; R2 = d */
941   W_ r, s;
942
943   s = R1;
944   if (s == 0) {
945     r = 0;
946   } else {
947     r = W_[R2 + SIZEOF_StgArrWords];
948     if (%lt(s,0)) {
949         r = -r;
950     }
951   }
952   /* Result parked in R1, return via info-pointer at TOS */
953   R1 = r;
954   jump %ENTRY_CODE(Sp(0));
955 }
956
957 decodeFloatzh_fast
958
959     W_ p;
960     F_ arg;
961     FETCH_MP_TEMP(mp_tmp1);
962     FETCH_MP_TEMP(mp_tmp_w);
963     
964     /* arguments: F1 = Float# */
965     arg = F1;
966     
967     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
968     
969     /* Be prepared to tell Lennart-coded __decodeFloat
970        where mantissa._mp_d can be put (it does not care about the rest) */
971     p = Hp - SIZEOF_StgArrWords;
972     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
973     StgArrWords_words(p) = 1;
974     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
975     
976     /* Perform the operation */
977     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
978     
979     /* returns: (Int# (expn), Int#, ByteArray#) */
980     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
981 }
982
983 decodeFloatzuIntzh_fast
984
985     W_ p;
986     F_ arg;
987     FETCH_MP_TEMP(mp_tmp1);
988     FETCH_MP_TEMP(mp_tmp_w);
989     
990     /* arguments: F1 = Float# */
991     arg = F1;
992     
993     /* Perform the operation */
994     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
995     
996     /* returns: (Int# (mantissa), Int# (exponent)) */
997     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
998 }
999
1000 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
1001 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
1002
1003 decodeDoublezh_fast
1004
1005     D_ arg;
1006     W_ p;
1007     FETCH_MP_TEMP(mp_tmp1);
1008     FETCH_MP_TEMP(mp_tmp_w);
1009
1010     /* arguments: D1 = Double# */
1011     arg = D1;
1012
1013     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
1014     
1015     /* Be prepared to tell Lennart-coded __decodeDouble
1016        where mantissa.d can be put (it does not care about the rest) */
1017     p = Hp - ARR_SIZE + WDS(1);
1018     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
1019     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
1020     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
1021
1022     /* Perform the operation */
1023     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
1024     
1025     /* returns: (Int# (expn), Int#, ByteArray#) */
1026     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
1027 }
1028
1029 decodeDoublezu2Intzh_fast
1030
1031     D_ arg;
1032     W_ p;
1033     FETCH_MP_TEMP(mp_tmp1);
1034     FETCH_MP_TEMP(mp_tmp2);
1035     FETCH_MP_TEMP(mp_result1);
1036     FETCH_MP_TEMP(mp_result2);
1037
1038     /* arguments: D1 = Double# */
1039     arg = D1;
1040
1041     /* Perform the operation */
1042     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1043                                     mp_result1 "ptr", mp_result2 "ptr",
1044                                     arg) [];
1045
1046     /* returns:
1047        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1048     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1049 }
1050
1051 /* -----------------------------------------------------------------------------
1052  * Concurrency primitives
1053  * -------------------------------------------------------------------------- */
1054
1055 forkzh_fast
1056 {
1057   /* args: R1 = closure to spark */
1058
1059   MAYBE_GC(R1_PTR, forkzh_fast);
1060
1061   W_ closure;
1062   W_ threadid;
1063   closure = R1;
1064
1065   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1066                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1067                                 closure "ptr") [];
1068
1069   /* start blocked if the current thread is blocked */
1070   StgTSO_flags(threadid) = 
1071      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1072                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1073
1074   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1075
1076   // context switch soon, but not immediately: we don't want every
1077   // forkIO to force a context-switch.
1078   Capability_context_switch(MyCapability()) = 1 :: CInt;
1079   
1080   RET_P(threadid);
1081 }
1082
1083 forkOnzh_fast
1084 {
1085   /* args: R1 = cpu, R2 = closure to spark */
1086
1087   MAYBE_GC(R2_PTR, forkOnzh_fast);
1088
1089   W_ cpu;
1090   W_ closure;
1091   W_ threadid;
1092   cpu = R1;
1093   closure = R2;
1094
1095   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1096                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1097                                 closure "ptr") [];
1098
1099   /* start blocked if the current thread is blocked */
1100   StgTSO_flags(threadid) = 
1101      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1102                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1103
1104   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1105
1106   // context switch soon, but not immediately: we don't want every
1107   // forkIO to force a context-switch.
1108   Capability_context_switch(MyCapability()) = 1 :: CInt;
1109   
1110   RET_P(threadid);
1111 }
1112
1113 yieldzh_fast
1114 {
1115   jump stg_yield_noregs;
1116 }
1117
1118 myThreadIdzh_fast
1119 {
1120   /* no args. */
1121   RET_P(CurrentTSO);
1122 }
1123
1124 labelThreadzh_fast
1125 {
1126   /* args: 
1127         R1 = ThreadId#
1128         R2 = Addr# */
1129 #ifdef DEBUG
1130   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1131 #endif
1132   jump %ENTRY_CODE(Sp(0));
1133 }
1134
1135 isCurrentThreadBoundzh_fast
1136 {
1137   /* no args */
1138   W_ r;
1139   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1140   RET_N(r);
1141 }
1142
1143 threadStatuszh_fast
1144 {
1145     /* args: R1 :: ThreadId# */
1146     W_ tso;
1147     W_ why_blocked;
1148     W_ what_next;
1149     W_ ret;
1150
1151     tso = R1;
1152     loop:
1153       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1154           tso = StgTSO__link(tso);
1155           goto loop;
1156       }
1157
1158     what_next   = TO_W_(StgTSO_what_next(tso));
1159     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1160     // Note: these two reads are not atomic, so they might end up
1161     // being inconsistent.  It doesn't matter, since we
1162     // only return one or the other.  If we wanted to return the
1163     // contents of block_info too, then we'd have to do some synchronisation.
1164
1165     if (what_next == ThreadComplete) {
1166         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1167     } else {
1168         if (what_next == ThreadKilled) {
1169             ret = 17;
1170         } else {
1171             ret = why_blocked;
1172         }
1173     }
1174     RET_N(ret);
1175 }
1176
1177 /* -----------------------------------------------------------------------------
1178  * TVar primitives
1179  * -------------------------------------------------------------------------- */
1180
1181 #define SP_OFF 0
1182
1183 // Catch retry frame ------------------------------------------------------------
1184
1185 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1186 #if defined(PROFILING)
1187   W_ unused1, W_ unused2,
1188 #endif
1189   W_ unused3, P_ unused4, P_ unused5)
1190 {
1191    W_ r, frame, trec, outer;
1192
1193    frame = Sp;
1194    trec = StgTSO_trec(CurrentTSO);
1195    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1196    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1197    if (r != 0) {
1198      /* Succeeded (either first branch or second branch) */
1199      StgTSO_trec(CurrentTSO) = outer;
1200      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1201      jump %ENTRY_CODE(Sp(SP_OFF));
1202    } else {
1203      /* Did not commit: re-execute */
1204      W_ new_trec;
1205      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1206      StgTSO_trec(CurrentTSO) = new_trec;
1207      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1208        R1 = StgCatchRetryFrame_alt_code(frame);
1209      } else {
1210        R1 = StgCatchRetryFrame_first_code(frame);
1211      }
1212      jump stg_ap_v_fast;
1213    }
1214 }
1215
1216
1217 // Atomically frame ------------------------------------------------------------
1218
1219 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1220 #if defined(PROFILING)
1221   W_ unused1, W_ unused2,
1222 #endif
1223   P_ unused3, P_ unused4)
1224 {
1225   W_ frame, trec, valid, next_invariant, q, outer;
1226
1227   frame = Sp;
1228   trec = StgTSO_trec(CurrentTSO);
1229   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1230
1231   if (outer == NO_TREC) {
1232     /* First time back at the atomically frame -- pick up invariants */
1233     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1234     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1235
1236   } else {
1237     /* Second/subsequent time back at the atomically frame -- abort the
1238      * tx that's checking the invariant and move on to the next one */
1239     StgTSO_trec(CurrentTSO) = outer;
1240     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1241     StgInvariantCheckQueue_my_execution(q) = trec;
1242     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1243     /* Don't free trec -- it's linked from q and will be stashed in the
1244      * invariant if we eventually commit. */
1245     q = StgInvariantCheckQueue_next_queue_entry(q);
1246     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1247     trec = outer;
1248   }
1249
1250   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1251
1252   if (q != END_INVARIANT_CHECK_QUEUE) {
1253     /* We can't commit yet: another invariant to check */
1254     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1255     StgTSO_trec(CurrentTSO) = trec;
1256
1257     next_invariant = StgInvariantCheckQueue_invariant(q);
1258     R1 = StgAtomicInvariant_code(next_invariant);
1259     jump stg_ap_v_fast;
1260
1261   } else {
1262
1263     /* We've got no more invariants to check, try to commit */
1264     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1265     if (valid != 0) {
1266       /* Transaction was valid: commit succeeded */
1267       StgTSO_trec(CurrentTSO) = NO_TREC;
1268       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1269       jump %ENTRY_CODE(Sp(SP_OFF));
1270     } else {
1271       /* Transaction was not valid: try again */
1272       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1273       StgTSO_trec(CurrentTSO) = trec;
1274       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1275       R1 = StgAtomicallyFrame_code(frame);
1276       jump stg_ap_v_fast;
1277     }
1278   }
1279 }
1280
1281 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1282 #if defined(PROFILING)
1283   W_ unused1, W_ unused2,
1284 #endif
1285   P_ unused3, P_ unused4)
1286 {
1287   W_ frame, trec, valid;
1288
1289   frame = Sp;
1290
1291   /* The TSO is currently waiting: should we stop waiting? */
1292   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1293   if (valid != 0) {
1294     /* Previous attempt is still valid: no point trying again yet */
1295     jump stg_block_noregs;
1296   } else {
1297     /* Previous attempt is no longer valid: try again */
1298     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1299     StgTSO_trec(CurrentTSO) = trec;
1300     StgHeader_info(frame) = stg_atomically_frame_info;
1301     R1 = StgAtomicallyFrame_code(frame);
1302     jump stg_ap_v_fast;
1303   }
1304 }
1305
1306 // STM catch frame --------------------------------------------------------------
1307
1308 #define SP_OFF 0
1309
1310 /* Catch frames are very similar to update frames, but when entering
1311  * one we just pop the frame off the stack and perform the correct
1312  * kind of return to the activation record underneath us on the stack.
1313  */
1314
1315 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1316 #if defined(PROFILING)
1317   W_ unused1, W_ unused2,
1318 #endif
1319   P_ unused3, P_ unused4)
1320    {
1321       W_ r, frame, trec, outer;
1322       frame = Sp;
1323       trec = StgTSO_trec(CurrentTSO);
1324       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1325       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1326       if (r != 0) {
1327         /* Commit succeeded */
1328         StgTSO_trec(CurrentTSO) = outer;
1329         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1330         jump Sp(SP_OFF);
1331       } else {
1332         /* Commit failed */
1333         W_ new_trec;
1334         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1335         StgTSO_trec(CurrentTSO) = new_trec;
1336         R1 = StgCatchSTMFrame_code(frame);
1337         jump stg_ap_v_fast;
1338       }
1339    }
1340
1341
1342 // Primop definition ------------------------------------------------------------
1343
1344 atomicallyzh_fast
1345 {
1346   W_ frame;
1347   W_ old_trec;
1348   W_ new_trec;
1349   
1350   // stmStartTransaction may allocate
1351   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1352
1353   /* Args: R1 = m :: STM a */
1354   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1355
1356   old_trec = StgTSO_trec(CurrentTSO);
1357
1358   /* Nested transactions are not allowed; raise an exception */
1359   if (old_trec != NO_TREC) {
1360      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1361      jump raisezh_fast;
1362   }
1363
1364   /* Set up the atomically frame */
1365   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1366   frame = Sp;
1367
1368   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1369   StgAtomicallyFrame_code(frame) = R1;
1370   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1371
1372   /* Start the memory transcation */
1373   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1374   StgTSO_trec(CurrentTSO) = new_trec;
1375
1376   /* Apply R1 to the realworld token */
1377   jump stg_ap_v_fast;
1378 }
1379
1380
1381 catchSTMzh_fast
1382 {
1383   W_ frame;
1384   
1385   /* Args: R1 :: STM a */
1386   /* Args: R2 :: Exception -> STM a */
1387   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1388
1389   /* Set up the catch frame */
1390   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1391   frame = Sp;
1392
1393   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1394   StgCatchSTMFrame_handler(frame) = R2;
1395   StgCatchSTMFrame_code(frame) = R1;
1396
1397   /* Start a nested transaction to run the body of the try block in */
1398   W_ cur_trec;  
1399   W_ new_trec;
1400   cur_trec = StgTSO_trec(CurrentTSO);
1401   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1402   StgTSO_trec(CurrentTSO) = new_trec;
1403
1404   /* Apply R1 to the realworld token */
1405   jump stg_ap_v_fast;
1406 }
1407
1408
1409 catchRetryzh_fast
1410 {
1411   W_ frame;
1412   W_ new_trec;
1413   W_ trec;
1414
1415   // stmStartTransaction may allocate
1416   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1417
1418   /* Args: R1 :: STM a */
1419   /* Args: R2 :: STM a */
1420   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1421
1422   /* Start a nested transaction within which to run the first code */
1423   trec = StgTSO_trec(CurrentTSO);
1424   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1425   StgTSO_trec(CurrentTSO) = new_trec;
1426
1427   /* Set up the catch-retry frame */
1428   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1429   frame = Sp;
1430   
1431   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1432   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1433   StgCatchRetryFrame_first_code(frame) = R1;
1434   StgCatchRetryFrame_alt_code(frame) = R2;
1435
1436   /* Apply R1 to the realworld token */
1437   jump stg_ap_v_fast;
1438 }
1439
1440
1441 retryzh_fast
1442 {
1443   W_ frame_type;
1444   W_ frame;
1445   W_ trec;
1446   W_ outer;
1447   W_ r;
1448
1449   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1450
1451   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1452 retry_pop_stack:
1453   StgTSO_sp(CurrentTSO) = Sp;
1454   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1455   Sp = StgTSO_sp(CurrentTSO);
1456   frame = Sp;
1457   trec = StgTSO_trec(CurrentTSO);
1458   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1459
1460   if (frame_type == CATCH_RETRY_FRAME) {
1461     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1462     ASSERT(outer != NO_TREC);
1463     // Abort the transaction attempting the current branch
1464     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1465     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1466     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1467       // Retry in the first branch: try the alternative
1468       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1469       StgTSO_trec(CurrentTSO) = trec;
1470       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1471       R1 = StgCatchRetryFrame_alt_code(frame);
1472       jump stg_ap_v_fast;
1473     } else {
1474       // Retry in the alternative code: propagate the retry
1475       StgTSO_trec(CurrentTSO) = outer;
1476       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1477       goto retry_pop_stack;
1478     }
1479   }
1480
1481   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1482   ASSERT(frame_type == ATOMICALLY_FRAME);
1483   if (outer != NO_TREC) {
1484     // We called retry while checking invariants, so abort the current
1485     // invariant check (merging its TVar accesses into the parents read
1486     // set so we'll wait on them)
1487     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1488     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1489     trec = outer;
1490     StgTSO_trec(CurrentTSO) = trec;
1491     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1492   }
1493   ASSERT(outer == NO_TREC);
1494
1495   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1496   if (r != 0) {
1497     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1498     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1499     Sp = frame;
1500     // Fix up the stack in the unregisterised case: the return convention is different.
1501     R3 = trec; // passing to stmWaitUnblock()
1502     jump stg_block_stmwait;
1503   } else {
1504     // Transaction was not valid: retry immediately
1505     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1506     StgTSO_trec(CurrentTSO) = trec;
1507     R1 = StgAtomicallyFrame_code(frame);
1508     Sp = frame;
1509     jump stg_ap_v_fast;
1510   }
1511 }
1512
1513
1514 checkzh_fast
1515 {
1516   W_ trec, closure;
1517
1518   /* Args: R1 = invariant closure */
1519   MAYBE_GC (R1_PTR, checkzh_fast); 
1520
1521   trec = StgTSO_trec(CurrentTSO);
1522   closure = R1;
1523   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1524                                      trec "ptr",
1525                                      closure "ptr") [];
1526
1527   jump %ENTRY_CODE(Sp(0));
1528 }
1529
1530
1531 newTVarzh_fast
1532 {
1533   W_ tv;
1534   W_ new_value;
1535
1536   /* Args: R1 = initialisation value */
1537
1538   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1539   new_value = R1;
1540   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1541   RET_P(tv);
1542 }
1543
1544
1545 readTVarzh_fast
1546 {
1547   W_ trec;
1548   W_ tvar;
1549   W_ result;
1550
1551   /* Args: R1 = TVar closure */
1552
1553   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1554   trec = StgTSO_trec(CurrentTSO);
1555   tvar = R1;
1556   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1557
1558   RET_P(result);
1559 }
1560
1561 readTVarIOzh_fast
1562 {
1563     W_ result;
1564
1565 again:
1566     result = StgTVar_current_value(R1);
1567     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1568         goto again;
1569     }
1570     RET_P(result);
1571 }
1572
1573 writeTVarzh_fast
1574 {
1575   W_ trec;
1576   W_ tvar;
1577   W_ new_value;
1578   
1579   /* Args: R1 = TVar closure */
1580   /*       R2 = New value    */
1581
1582   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1583   trec = StgTSO_trec(CurrentTSO);
1584   tvar = R1;
1585   new_value = R2;
1586   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1587
1588   jump %ENTRY_CODE(Sp(0));
1589 }
1590
1591
1592 /* -----------------------------------------------------------------------------
1593  * MVar primitives
1594  *
1595  * take & putMVar work as follows.  Firstly, an important invariant:
1596  *
1597  *    If the MVar is full, then the blocking queue contains only
1598  *    threads blocked on putMVar, and if the MVar is empty then the
1599  *    blocking queue contains only threads blocked on takeMVar.
1600  *
1601  * takeMvar:
1602  *    MVar empty : then add ourselves to the blocking queue
1603  *    MVar full  : remove the value from the MVar, and
1604  *                 blocking queue empty     : return
1605  *                 blocking queue non-empty : perform the first blocked putMVar
1606  *                                            from the queue, and wake up the
1607  *                                            thread (MVar is now full again)
1608  *
1609  * putMVar is just the dual of the above algorithm.
1610  *
1611  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1612  * the stack of the thread waiting to do the putMVar.  See
1613  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1614  * the stack layout, and the PerformPut and PerformTake macros below.
1615  *
1616  * It is important that a blocked take or put is woken up with the
1617  * take/put already performed, because otherwise there would be a
1618  * small window of vulnerability where the thread could receive an
1619  * exception and never perform its take or put, and we'd end up with a
1620  * deadlock.
1621  *
1622  * -------------------------------------------------------------------------- */
1623
1624 isEmptyMVarzh_fast
1625 {
1626     /* args: R1 = MVar closure */
1627
1628     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1629         RET_N(1);
1630     } else {
1631         RET_N(0);
1632     }
1633 }
1634
1635 newMVarzh_fast
1636 {
1637     /* args: none */
1638     W_ mvar;
1639
1640     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1641   
1642     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1643     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1644         // MVARs start dirty: generation 0 has no mutable list
1645     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1646     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1647     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1648     RET_P(mvar);
1649 }
1650
1651
1652 #define PerformTake(tso, value)                         \
1653     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1654     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1655
1656 #define PerformPut(tso,lval)                    \
1657     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1658     lval = W_[StgTSO_sp(tso) - WDS(1)];
1659
1660 takeMVarzh_fast
1661 {
1662     W_ mvar, val, info, tso;
1663
1664     /* args: R1 = MVar closure */
1665     mvar = R1;
1666
1667 #if defined(THREADED_RTS)
1668     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1669 #else
1670     info = GET_INFO(mvar);
1671 #endif
1672         
1673     if (info == stg_MVAR_CLEAN_info) {
1674         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1675     }
1676
1677     /* If the MVar is empty, put ourselves on its blocking queue,
1678      * and wait until we're woken up.
1679      */
1680     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1681         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1682             StgMVar_head(mvar) = CurrentTSO;
1683         } else {
1684             foreign "C" setTSOLink(MyCapability() "ptr", 
1685                                    StgMVar_tail(mvar) "ptr",
1686                                    CurrentTSO) [];
1687         }
1688         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1689         StgTSO_block_info(CurrentTSO)  = mvar;
1690         // write barrier for throwTo(), which looks at block_info
1691         // if why_blocked==BlockedOnMVar.
1692         prim %write_barrier() [];
1693         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1694         StgMVar_tail(mvar) = CurrentTSO;
1695         
1696         R1 = mvar;
1697         jump stg_block_takemvar;
1698   }
1699
1700   /* we got the value... */
1701   val = StgMVar_value(mvar);
1702
1703   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1704   {
1705       /* There are putMVar(s) waiting... 
1706        * wake up the first thread on the queue
1707        */
1708       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1709
1710       /* actually perform the putMVar for the thread that we just woke up */
1711       tso = StgMVar_head(mvar);
1712       PerformPut(tso,StgMVar_value(mvar));
1713
1714       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1715           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1716       }
1717
1718       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1719                                             StgMVar_head(mvar) "ptr", 1) [];
1720       StgMVar_head(mvar) = tso;
1721
1722       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1723           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1724       }
1725
1726 #if defined(THREADED_RTS)
1727       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1728 #else
1729       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1730 #endif
1731       RET_P(val);
1732   } 
1733   else
1734   {
1735       /* No further putMVars, MVar is now empty */
1736       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1737  
1738 #if defined(THREADED_RTS)
1739       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1740 #else
1741       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1742 #endif
1743
1744       RET_P(val);
1745   }
1746 }
1747
1748
1749 tryTakeMVarzh_fast
1750 {
1751     W_ mvar, val, info, tso;
1752
1753     /* args: R1 = MVar closure */
1754
1755     mvar = R1;
1756
1757 #if defined(THREADED_RTS)
1758     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1759 #else
1760     info = GET_INFO(mvar);
1761 #endif
1762
1763     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1764 #if defined(THREADED_RTS)
1765         unlockClosure(mvar, info);
1766 #endif
1767         /* HACK: we need a pointer to pass back, 
1768          * so we abuse NO_FINALIZER_closure
1769          */
1770         RET_NP(0, stg_NO_FINALIZER_closure);
1771     }
1772
1773     if (info == stg_MVAR_CLEAN_info) {
1774         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1775     }
1776
1777     /* we got the value... */
1778     val = StgMVar_value(mvar);
1779
1780     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1781
1782         /* There are putMVar(s) waiting... 
1783          * wake up the first thread on the queue
1784          */
1785         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1786
1787         /* actually perform the putMVar for the thread that we just woke up */
1788         tso = StgMVar_head(mvar);
1789         PerformPut(tso,StgMVar_value(mvar));
1790         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1791             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1792         }
1793
1794         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1795                                               StgMVar_head(mvar) "ptr", 1) [];
1796         StgMVar_head(mvar) = tso;
1797
1798         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1799             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1800         }
1801 #if defined(THREADED_RTS)
1802         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1803 #else
1804         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1805 #endif
1806     }
1807     else 
1808     {
1809         /* No further putMVars, MVar is now empty */
1810         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1811 #if defined(THREADED_RTS)
1812         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1813 #else
1814         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1815 #endif
1816     }
1817     
1818     RET_NP(1, val);
1819 }
1820
1821
1822 putMVarzh_fast
1823 {
1824     W_ mvar, val, info, tso;
1825
1826     /* args: R1 = MVar, R2 = value */
1827     mvar = R1;
1828     val  = R2;
1829
1830 #if defined(THREADED_RTS)
1831     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1832 #else
1833     info = GET_INFO(mvar);
1834 #endif
1835
1836     if (info == stg_MVAR_CLEAN_info) {
1837         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1838     }
1839
1840     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1841         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1842             StgMVar_head(mvar) = CurrentTSO;
1843         } else {
1844             foreign "C" setTSOLink(MyCapability() "ptr", 
1845                                    StgMVar_tail(mvar) "ptr",
1846                                    CurrentTSO) [];
1847         }
1848         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1849         StgTSO_block_info(CurrentTSO)  = mvar;
1850         // write barrier for throwTo(), which looks at block_info
1851         // if why_blocked==BlockedOnMVar.
1852         prim %write_barrier() [];
1853         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1854         StgMVar_tail(mvar) = CurrentTSO;
1855         
1856         R1 = mvar;
1857         R2 = val;
1858         jump stg_block_putmvar;
1859     }
1860   
1861     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1862
1863         /* There are takeMVar(s) waiting: wake up the first one
1864          */
1865         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1866
1867         /* actually perform the takeMVar */
1868         tso = StgMVar_head(mvar);
1869         PerformTake(tso, val);
1870         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1871             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1872         }
1873       
1874         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1875                                               StgMVar_head(mvar) "ptr", 1) [];
1876         StgMVar_head(mvar) = tso;
1877
1878         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1879             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1880         }
1881
1882 #if defined(THREADED_RTS)
1883         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1884 #else
1885         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1886 #endif
1887         jump %ENTRY_CODE(Sp(0));
1888     }
1889     else
1890     {
1891         /* No further takes, the MVar is now full. */
1892         StgMVar_value(mvar) = val;
1893
1894 #if defined(THREADED_RTS)
1895         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1896 #else
1897         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1898 #endif
1899         jump %ENTRY_CODE(Sp(0));
1900     }
1901     
1902     /* ToDo: yield afterward for better communication performance? */
1903 }
1904
1905
1906 tryPutMVarzh_fast
1907 {
1908     W_ mvar, info, tso;
1909
1910     /* args: R1 = MVar, R2 = value */
1911     mvar = R1;
1912
1913 #if defined(THREADED_RTS)
1914     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1915 #else
1916     info = GET_INFO(mvar);
1917 #endif
1918
1919     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1920 #if defined(THREADED_RTS)
1921         unlockClosure(mvar, info);
1922 #endif
1923         RET_N(0);
1924     }
1925   
1926     if (info == stg_MVAR_CLEAN_info) {
1927         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1928     }
1929
1930     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1931
1932         /* There are takeMVar(s) waiting: wake up the first one
1933          */
1934         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1935         
1936         /* actually perform the takeMVar */
1937         tso = StgMVar_head(mvar);
1938         PerformTake(tso, R2);
1939         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1940             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1941         }
1942       
1943         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1944                                               StgMVar_head(mvar) "ptr", 1) [];
1945         StgMVar_head(mvar) = tso;
1946
1947         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1948             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1949         }
1950
1951 #if defined(THREADED_RTS)
1952         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1953 #else
1954         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1955 #endif
1956     }
1957     else
1958     {
1959         /* No further takes, the MVar is now full. */
1960         StgMVar_value(mvar) = R2;
1961
1962 #if defined(THREADED_RTS)
1963         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1964 #else
1965         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1966 #endif
1967     }
1968     
1969     RET_N(1);
1970     /* ToDo: yield afterward for better communication performance? */
1971 }
1972
1973
1974 /* -----------------------------------------------------------------------------
1975    Stable pointer primitives
1976    -------------------------------------------------------------------------  */
1977
1978 makeStableNamezh_fast
1979 {
1980     W_ index, sn_obj;
1981
1982     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1983   
1984     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1985
1986     /* Is there already a StableName for this heap object?
1987      *  stable_ptr_table is a pointer to an array of snEntry structs.
1988      */
1989     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1990         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1991         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1992         StgStableName_sn(sn_obj) = index;
1993         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1994     } else {
1995         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1996     }
1997     
1998     RET_P(sn_obj);
1999 }
2000
2001
2002 makeStablePtrzh_fast
2003 {
2004     /* Args: R1 = a */
2005     W_ sp;
2006     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
2007     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
2008     RET_N(sp);
2009 }
2010
2011 deRefStablePtrzh_fast
2012 {
2013     /* Args: R1 = the stable ptr */
2014     W_ r, sp;
2015     sp = R1;
2016     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
2017     RET_P(r);
2018 }
2019
2020 /* -----------------------------------------------------------------------------
2021    Bytecode object primitives
2022    -------------------------------------------------------------------------  */
2023
2024 newBCOzh_fast
2025 {
2026     /* R1 = instrs
2027        R2 = literals
2028        R3 = ptrs
2029        R4 = arity
2030        R5 = bitmap array
2031     */
2032     W_ bco, bitmap_arr, bytes, words;
2033     
2034     bitmap_arr = R5;
2035
2036     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
2037     bytes = WDS(words);
2038
2039     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
2040
2041     bco = Hp - bytes + WDS(1);
2042     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2043     
2044     StgBCO_instrs(bco)     = R1;
2045     StgBCO_literals(bco)   = R2;
2046     StgBCO_ptrs(bco)       = R3;
2047     StgBCO_arity(bco)      = HALF_W_(R4);
2048     StgBCO_size(bco)       = HALF_W_(words);
2049     
2050     // Copy the arity/bitmap info into the BCO
2051     W_ i;
2052     i = 0;
2053 for:
2054     if (i < StgArrWords_words(bitmap_arr)) {
2055         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2056         i = i + 1;
2057         goto for;
2058     }
2059     
2060     RET_P(bco);
2061 }
2062
2063
2064 mkApUpd0zh_fast
2065 {
2066     // R1 = the BCO# for the AP
2067     //  
2068     W_ ap;
2069
2070     // This function is *only* used to wrap zero-arity BCOs in an
2071     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2072     // saturated and always points directly to a FUN or BCO.
2073     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2074            StgBCO_arity(R1) == HALF_W_(0));
2075
2076     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2077     TICK_ALLOC_UP_THK(0, 0);
2078     CCCS_ALLOC(SIZEOF_StgAP);
2079
2080     ap = Hp - SIZEOF_StgAP + WDS(1);
2081     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2082     
2083     StgAP_n_args(ap) = HALF_W_(0);
2084     StgAP_fun(ap) = R1;
2085     
2086     RET_P(ap);
2087 }
2088
2089 unpackClosurezh_fast
2090 {
2091 /* args: R1 = closure to analyze */
2092 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2093
2094     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2095     info  = %GET_STD_INFO(UNTAG(R1));
2096
2097     // Some closures have non-standard layout, so we omit those here.
2098     W_ type;
2099     type = TO_W_(%INFO_TYPE(info));
2100     switch [0 .. N_CLOSURE_TYPES] type {
2101     case THUNK_SELECTOR : {
2102         ptrs = 1;
2103         nptrs = 0;
2104         goto out;
2105     }
2106     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2107          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2108         ptrs = 0;
2109         nptrs = 0;
2110         goto out;
2111     }
2112     default: {
2113         ptrs  = TO_W_(%INFO_PTRS(info)); 
2114         nptrs = TO_W_(%INFO_NPTRS(info));
2115         goto out;
2116     }}
2117 out:
2118
2119     W_ ptrs_arr_sz, nptrs_arr_sz;
2120     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2121     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2122
2123     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2124
2125     W_ clos;
2126     clos = UNTAG(R1);
2127
2128     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2129     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2130
2131     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2132     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2133     p = 0;
2134 for:
2135     if(p < ptrs) {
2136          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2137          p = p + 1;
2138          goto for;
2139     }
2140     
2141     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2142     StgArrWords_words(nptrs_arr) = nptrs;
2143     p = 0;
2144 for2:
2145     if(p < nptrs) {
2146          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2147          p = p + 1;
2148          goto for2;
2149     }
2150     RET_NPP(info, ptrs_arr, nptrs_arr);
2151 }
2152
2153 /* -----------------------------------------------------------------------------
2154    Thread I/O blocking primitives
2155    -------------------------------------------------------------------------- */
2156
2157 /* Add a thread to the end of the blocked queue. (C-- version of the C
2158  * macro in Schedule.h).
2159  */
2160 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2161     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2162     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2163       W_[blocked_queue_hd] = tso;                       \
2164     } else {                                            \
2165       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2166     }                                                   \
2167     W_[blocked_queue_tl] = tso;
2168
2169 waitReadzh_fast
2170 {
2171     /* args: R1 */
2172 #ifdef THREADED_RTS
2173     foreign "C" barf("waitRead# on threaded RTS") never returns;
2174 #else
2175
2176     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2177     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2178     StgTSO_block_info(CurrentTSO) = R1;
2179     // No locking - we're not going to use this interface in the
2180     // threaded RTS anyway.
2181     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2182     jump stg_block_noregs;
2183 #endif
2184 }
2185
2186 waitWritezh_fast
2187 {
2188     /* args: R1 */
2189 #ifdef THREADED_RTS
2190     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2191 #else
2192
2193     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2194     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2195     StgTSO_block_info(CurrentTSO) = R1;
2196     // No locking - we're not going to use this interface in the
2197     // threaded RTS anyway.
2198     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2199     jump stg_block_noregs;
2200 #endif
2201 }
2202
2203
2204 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2205 delayzh_fast
2206 {
2207 #ifdef mingw32_HOST_OS
2208     W_ ares;
2209     CInt reqID;
2210 #else
2211     W_ t, prev, target;
2212 #endif
2213
2214 #ifdef THREADED_RTS
2215     foreign "C" barf("delay# on threaded RTS") never returns;
2216 #else
2217
2218     /* args: R1 (microsecond delay amount) */
2219     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2220     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2221
2222 #ifdef mingw32_HOST_OS
2223
2224     /* could probably allocate this on the heap instead */
2225     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2226                                             stg_delayzh_malloc_str);
2227     (reqID) = foreign "C" addDelayRequest(R1);
2228     StgAsyncIOResult_reqID(ares)   = reqID;
2229     StgAsyncIOResult_len(ares)     = 0;
2230     StgAsyncIOResult_errCode(ares) = 0;
2231     StgTSO_block_info(CurrentTSO)  = ares;
2232
2233     /* Having all async-blocked threads reside on the blocked_queue
2234      * simplifies matters, so change the status to OnDoProc put the
2235      * delayed thread on the blocked_queue.
2236      */
2237     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2238     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2239     jump stg_block_async_void;
2240
2241 #else
2242
2243     W_ time;
2244     W_ divisor;
2245     (time) = foreign "C" getourtimeofday() [R1];
2246     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2247     if (divisor == 0) {
2248         divisor = 50;
2249     }
2250     divisor = divisor * 1000;
2251     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2252            + time + 1; /* Add 1 as getourtimeofday rounds down */
2253     StgTSO_block_info(CurrentTSO) = target;
2254
2255     /* Insert the new thread in the sleeping queue. */
2256     prev = NULL;
2257     t = W_[sleeping_queue];
2258 while:
2259     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2260         prev = t;
2261         t = StgTSO__link(t);
2262         goto while;
2263     }
2264
2265     StgTSO__link(CurrentTSO) = t;
2266     if (prev == NULL) {
2267         W_[sleeping_queue] = CurrentTSO;
2268     } else {
2269         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2270     }
2271     jump stg_block_noregs;
2272 #endif
2273 #endif /* !THREADED_RTS */
2274 }
2275
2276
2277 #ifdef mingw32_HOST_OS
2278 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2279 asyncReadzh_fast
2280 {
2281     W_ ares;
2282     CInt reqID;
2283
2284 #ifdef THREADED_RTS
2285     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2286 #else
2287
2288     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2289     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2290     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2291
2292     /* could probably allocate this on the heap instead */
2293     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2294                                             stg_asyncReadzh_malloc_str)
2295                         [R1,R2,R3,R4];
2296     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2297     StgAsyncIOResult_reqID(ares)   = reqID;
2298     StgAsyncIOResult_len(ares)     = 0;
2299     StgAsyncIOResult_errCode(ares) = 0;
2300     StgTSO_block_info(CurrentTSO)  = ares;
2301     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2302     jump stg_block_async;
2303 #endif
2304 }
2305
2306 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2307 asyncWritezh_fast
2308 {
2309     W_ ares;
2310     CInt reqID;
2311
2312 #ifdef THREADED_RTS
2313     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2314 #else
2315
2316     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2317     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2318     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2319
2320     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2321                                             stg_asyncWritezh_malloc_str)
2322                         [R1,R2,R3,R4];
2323     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2324
2325     StgAsyncIOResult_reqID(ares)   = reqID;
2326     StgAsyncIOResult_len(ares)     = 0;
2327     StgAsyncIOResult_errCode(ares) = 0;
2328     StgTSO_block_info(CurrentTSO)  = ares;
2329     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2330     jump stg_block_async;
2331 #endif
2332 }
2333
2334 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2335 asyncDoProczh_fast
2336 {
2337     W_ ares;
2338     CInt reqID;
2339
2340 #ifdef THREADED_RTS
2341     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2342 #else
2343
2344     /* args: R1 = proc, R2 = param */
2345     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2346     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2347
2348     /* could probably allocate this on the heap instead */
2349     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2350                                             stg_asyncDoProczh_malloc_str) 
2351                                 [R1,R2];
2352     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2353     StgAsyncIOResult_reqID(ares)   = reqID;
2354     StgAsyncIOResult_len(ares)     = 0;
2355     StgAsyncIOResult_errCode(ares) = 0;
2356     StgTSO_block_info(CurrentTSO) = ares;
2357     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2358     jump stg_block_async;
2359 #endif
2360 }
2361 #endif
2362
2363 // noDuplicate# tries to ensure that none of the thunks under
2364 // evaluation by the current thread are also under evaluation by
2365 // another thread.  It relies on *both* threads doing noDuplicate#;
2366 // the second one will get blocked if they are duplicating some work.
2367 noDuplicatezh_fast
2368 {
2369     SAVE_THREAD_STATE();
2370     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2371     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2372     
2373     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2374         jump stg_threadFinished;
2375     } else {
2376         LOAD_THREAD_STATE();
2377         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2378         jump %ENTRY_CODE(Sp(0));
2379     }
2380 }
2381
2382 getApStackValzh_fast
2383 {
2384    W_ ap_stack, offset, val, ok;
2385
2386    /* args: R1 = AP_STACK, R2 = offset */
2387    ap_stack = R1;
2388    offset   = R2;
2389
2390    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2391         ok = 1;
2392         val = StgAP_STACK_payload(ap_stack,offset); 
2393    } else {
2394         ok = 0;
2395         val = R1;
2396    }
2397    RET_NP(ok,val);
2398 }
2399
2400 /* -----------------------------------------------------------------------------
2401    Misc. primitives
2402    -------------------------------------------------------------------------- */
2403
2404 // Write the cost center stack of the first argument on stderr; return
2405 // the second.  Possibly only makes sense for already evaluated
2406 // things?
2407 traceCcszh_fast
2408 {
2409     W_ ccs;
2410
2411 #ifdef PROFILING
2412     ccs = StgHeader_ccs(UNTAG(R1));
2413     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2414 #endif
2415
2416     R1 = R2;
2417     ENTER();
2418 }
2419
2420 getSparkzh_fast
2421 {
2422    W_ spark;
2423
2424 #ifndef THREADED_RTS
2425    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2426 #else
2427    (spark) = foreign "C" findSpark(MyCapability());
2428    if (spark != 0) {
2429       RET_NP(1,spark);
2430    } else {
2431       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2432    }
2433 #endif
2434 }