Remove the unused remains of __decodeFloat
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 #define BA_ALIGN 16
87 #define BA_MASK  (BA_ALIGN-1)
88
89 newPinnedByteArrayzh_fast
90 {
91     W_ words, bytes, payload_words, p;
92
93     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
94     bytes = R1;
95     /* payload_words is what we will tell the profiler we had to allocate */
96     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
97     /* When we actually allocate memory, we need to allow space for the
98        header: */
99     bytes = bytes + SIZEOF_StgArrWords;
100     /* And we want to align to BA_ALIGN bytes, so we need to allow space
101        to shift up to BA_ALIGN - 1 bytes: */
102     bytes = bytes + BA_ALIGN - 1;
103     /* Now we convert to a number of words: */
104     words = ROUNDUP_BYTES_TO_WDS(bytes);
105
106     ("ptr" p) = foreign "C" allocatePinned(words) [];
107     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
108
109     /* Now we need to move p forward so that the payload is aligned
110        to BA_ALIGN bytes: */
111     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newAlignedPinnedByteArrayzh_fast
119 {
120     W_ words, bytes, payload_words, p, alignment;
121
122     MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
123     bytes = R1;
124     alignment = R2;
125
126     /* payload_words is what we will tell the profiler we had to allocate */
127     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
128
129     /* When we actually allocate memory, we need to allow space for the
130        header: */
131     bytes = bytes + SIZEOF_StgArrWords;
132     /* And we want to align to <alignment> bytes, so we need to allow space
133        to shift up to <alignment - 1> bytes: */
134     bytes = bytes + alignment - 1;
135     /* Now we convert to a number of words: */
136     words = ROUNDUP_BYTES_TO_WDS(bytes);
137
138     ("ptr" p) = foreign "C" allocatePinned(words) [];
139     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
140
141     /* Now we need to move p forward so that the payload is aligned
142        to <alignment> bytes. Note that we are assuming that
143        <alignment> is a power of 2, which is technically not guaranteed */
144     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
145
146     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
147     StgArrWords_words(p) = payload_words;
148     RET_P(p);
149 }
150
151 newArrayzh_fast
152 {
153     W_ words, n, init, arr, p;
154     /* Args: R1 = words, R2 = initialisation value */
155
156     n = R1;
157     MAYBE_GC(R2_PTR,newArrayzh_fast);
158
159     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
160     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
161     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
162
163     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
164     StgMutArrPtrs_ptrs(arr) = n;
165
166     // Initialise all elements of the the array with the value in R2
167     init = R2;
168     p = arr + SIZEOF_StgMutArrPtrs;
169   for:
170     if (p < arr + WDS(words)) {
171         W_[p] = init;
172         p = p + WDS(1);
173         goto for;
174     }
175
176     RET_P(arr);
177 }
178
179 unsafeThawArrayzh_fast
180 {
181   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
182   //
183   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
184   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
185   // it on the mutable list for the GC to remove (removing something from
186   // the mutable list is not easy, because the mut_list is only singly-linked).
187   // 
188   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
189   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
190   // to indicate that it is still on the mutable list.
191   //
192   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
193   // either it is on a mut_list, or it isn't.  We adopt the convention that
194   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
195   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
196   // we put it on the mutable list more than once, but it would get scavenged
197   // multiple times during GC, which would be unnecessarily slow.
198   //
199   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
200         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
201         recordMutable(R1, R1);
202         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
203         RET_P(R1);
204   } else {
205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
206         RET_P(R1);
207   }
208 }
209
210 /* -----------------------------------------------------------------------------
211    MutVar primitives
212    -------------------------------------------------------------------------- */
213
214 newMutVarzh_fast
215 {
216     W_ mv;
217     /* Args: R1 = initialisation value */
218
219     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
220
221     mv = Hp - SIZEOF_StgMutVar + WDS(1);
222     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
223     StgMutVar_var(mv) = R1;
224     
225     RET_P(mv);
226 }
227
228 atomicModifyMutVarzh_fast
229 {
230     W_ mv, f, z, x, y, r, h;
231     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
232
233     /* If x is the current contents of the MutVar#, then 
234        We want to make the new contents point to
235
236          (sel_0 (f x))
237  
238        and the return value is
239          
240          (sel_1 (f x))
241
242         obviously we can share (f x).
243
244          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
245          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
246          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
247     */
248
249 #if MIN_UPD_SIZE > 1
250 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
251 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
252 #else
253 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
254 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
255 #endif
256
257 #if MIN_UPD_SIZE > 2
258 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
259 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
260 #else
261 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
262 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
263 #endif
264
265 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
266
267    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
268
269    mv = R1;
270    f = R2;
271
272    TICK_ALLOC_THUNK_2();
273    CCCS_ALLOC(THUNK_2_SIZE);
274    z = Hp - THUNK_2_SIZE + WDS(1);
275    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
276    LDV_RECORD_CREATE(z);
277    StgThunk_payload(z,0) = f;
278
279    TICK_ALLOC_THUNK_1();
280    CCCS_ALLOC(THUNK_1_SIZE);
281    y = z - THUNK_1_SIZE;
282    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
283    LDV_RECORD_CREATE(y);
284    StgThunk_payload(y,0) = z;
285
286    TICK_ALLOC_THUNK_1();
287    CCCS_ALLOC(THUNK_1_SIZE);
288    r = y - THUNK_1_SIZE;
289    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
290    LDV_RECORD_CREATE(r);
291    StgThunk_payload(r,0) = z;
292
293  retry:
294    x = StgMutVar_var(mv);
295    StgThunk_payload(z,1) = x;
296 #ifdef THREADED_RTS
297    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
298    if (h != x) { goto retry; }
299 #else
300    StgMutVar_var(mv) = y;
301 #endif
302
303    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
304      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
305    }
306
307    RET_P(r);
308 }
309
310 /* -----------------------------------------------------------------------------
311    Weak Pointer Primitives
312    -------------------------------------------------------------------------- */
313
314 STRING(stg_weak_msg,"New weak pointer at %p\n")
315
316 mkWeakzh_fast
317 {
318   /* R1 = key
319      R2 = value
320      R3 = finalizer (or NULL)
321   */
322   W_ w;
323
324   if (R3 == NULL) {
325     R3 = stg_NO_FINALIZER_closure;
326   }
327
328   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
329
330   w = Hp - SIZEOF_StgWeak + WDS(1);
331   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
332
333   // We don't care about cfinalizer here.
334   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
335   // something else?
336
337   StgWeak_key(w)        = R1;
338   StgWeak_value(w)      = R2;
339   StgWeak_finalizer(w)  = R3;
340   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
341
342   StgWeak_link(w)       = W_[weak_ptr_list];
343   W_[weak_ptr_list]     = w;
344
345   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
346
347   RET_P(w);
348 }
349
350 mkWeakForeignEnvzh_fast
351 {
352   /* R1 = key
353      R2 = value
354      R3 = finalizer
355      R4 = pointer
356      R5 = has environment (0 or 1)
357      R6 = environment
358   */
359   W_ w, payload_words, words, p;
360
361   W_ key, val, fptr, ptr, flag, eptr;
362
363   key  = R1;
364   val  = R2;
365   fptr = R3;
366   ptr  = R4;
367   flag = R5;
368   eptr = R6;
369
370   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast );
371
372   w = Hp - SIZEOF_StgWeak + WDS(1);
373   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
374
375   payload_words = 4;
376   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
377   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
378
379   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
380   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
381
382   StgArrWords_words(p)     = payload_words;
383   StgArrWords_payload(p,0) = fptr;
384   StgArrWords_payload(p,1) = ptr;
385   StgArrWords_payload(p,2) = eptr;
386   StgArrWords_payload(p,3) = flag;
387
388   // We don't care about the value here.
389   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
390
391   StgWeak_key(w)        = key;
392   StgWeak_value(w)      = val;
393   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
394   StgWeak_cfinalizer(w) = p;
395
396   StgWeak_link(w)   = W_[weak_ptr_list];
397   W_[weak_ptr_list] = w;
398
399   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
400
401   RET_P(w);
402 }
403
404 finalizzeWeakzh_fast
405 {
406   /* R1 = weak ptr
407    */
408   W_ w, f, arr;
409
410   w = R1;
411
412   // already dead?
413   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
414       RET_NP(0,stg_NO_FINALIZER_closure);
415   }
416
417   // kill it
418 #ifdef PROFILING
419   // @LDV profiling
420   // A weak pointer is inherently used, so we do not need to call
421   // LDV_recordDead_FILL_SLOP_DYNAMIC():
422   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
423   // or, LDV_recordDead():
424   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
425   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
426   // large as weak pointers, so there is no need to fill the slop, either.
427   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
428 #endif
429
430   //
431   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
432   //
433   SET_INFO(w,stg_DEAD_WEAK_info);
434   LDV_RECORD_CREATE(w);
435
436   f   = StgWeak_finalizer(w);
437   arr = StgWeak_cfinalizer(w);
438
439   StgDeadWeak_link(w) = StgWeak_link(w);
440
441   if (arr != stg_NO_FINALIZER_closure) {
442     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
443                               StgArrWords_payload(arr,1),
444                               StgArrWords_payload(arr,2),
445                               StgArrWords_payload(arr,3)) [];
446   }
447
448   /* return the finalizer */
449   if (f == stg_NO_FINALIZER_closure) {
450       RET_NP(0,stg_NO_FINALIZER_closure);
451   } else {
452       RET_NP(1,f);
453   }
454 }
455
456 deRefWeakzh_fast
457 {
458   /* R1 = weak ptr */
459   W_ w, code, val;
460
461   w = R1;
462   if (GET_INFO(w) == stg_WEAK_info) {
463     code = 1;
464     val = StgWeak_value(w);
465   } else {
466     code = 0;
467     val = w;
468   }
469   RET_NP(code,val);
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474
475    There are some assumptions in this code that mp_limb_t == W_.  This is
476    the case for all the platforms that GHC supports, currently.
477    -------------------------------------------------------------------------- */
478
479 int2Integerzh_fast
480 {
481    /* arguments: R1 = Int# */
482
483    W_ val, s, p;        /* to avoid aliasing */
484
485    val = R1;
486    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
487
488    p = Hp - SIZEOF_StgArrWords;
489    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
490    StgArrWords_words(p) = 1;
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (%lt(val,0)) { 
494         s  = -1;
495         Hp(0) = -val;
496    } else { 
497      if (%gt(val,0)) {
498         s = 1;
499         Hp(0) = val;
500      } else {
501         s = 0;
502      }
503   }
504
505    /* returns (# size  :: Int#, 
506                  data  :: ByteArray# 
507                #)
508    */
509    RET_NP(s,p);
510 }
511
512 word2Integerzh_fast
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val, s, p;        /* to avoid aliasing */
517
518    val = R1;
519
520    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
521
522    p = Hp - SIZEOF_StgArrWords;
523    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
524    StgArrWords_words(p) = 1;
525
526    if (val != 0) {
527         s = 1;
528         W_[Hp] = val;
529    } else {
530         s = 0;
531    }
532
533    /* returns (# size  :: Int#, 
534                  data  :: ByteArray# #)
535    */
536    RET_NP(s,p);
537 }
538
539
540 /*
541  * 'long long' primops for converting to/from Integers.
542  */
543
544 #ifdef SUPPORT_LONG_LONGS
545
546 int64ToIntegerzh_fast
547 {
548    /* arguments: L1 = Int64# */
549
550    L_ val;
551    W_ hi, lo, s, neg, words_needed, p;
552
553    val = L1;
554    neg = 0;
555
556    hi = TO_W_(val >> 32);
557    lo = TO_W_(val);
558
559    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
560        // minimum is one word
561        words_needed = 1;
562    } else { 
563        words_needed = 2;
564    }
565
566    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
567                NO_PTRS, int64ToIntegerzh_fast );
568
569    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
570    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
571    StgArrWords_words(p) = words_needed;
572
573    if ( %lt(hi,0) ) {
574      neg = 1;
575      lo = -lo;
576      if(lo == 0) {
577        hi = -hi;
578      } else {
579        hi = -hi - 1;
580      }
581    }
582
583    if ( words_needed == 2 )  { 
584       s = 2;
585       Hp(-1) = lo;
586       Hp(0) = hi;
587    } else { 
588        if ( lo != 0 ) {
589            s = 1;
590            Hp(0) = lo;
591        } else /* val==0 */  {
592            s = 0;
593        }
594    }
595    if ( neg != 0 ) {
596         s = -s;
597    }
598
599    /* returns (# size  :: Int#, 
600                  data  :: ByteArray# #)
601    */
602    RET_NP(s,p);
603 }
604 word64ToIntegerzh_fast
605 {
606    /* arguments: L1 = Word64# */
607
608    L_ val;
609    W_ hi, lo, s, words_needed, p;
610
611    val = L1;
612    hi = TO_W_(val >> 32);
613    lo = TO_W_(val);
614
615    if ( hi != 0 ) {
616       words_needed = 2;
617    } else {
618       words_needed = 1;
619    }
620
621    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
622                NO_PTRS, word64ToIntegerzh_fast );
623
624    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
625    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
626    StgArrWords_words(p) = words_needed;
627
628    if ( hi != 0 ) { 
629      s = 2;
630      Hp(-1) = lo;
631      Hp(0)  = hi;
632    } else {
633       if ( lo != 0 ) {
634         s = 1;
635         Hp(0) = lo;
636      } else /* val==0 */  {
637       s = 0;
638      }
639   }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# #)
643    */
644    RET_NP(s,p);
645 }
646
647
648
649 #endif /* SUPPORT_LONG_LONGS */
650
651 /* ToDo: this is shockingly inefficient */
652
653 #ifndef THREADED_RTS
654 section "bss" {
655   mp_tmp1:
656     bits8 [SIZEOF_MP_INT];
657 }
658
659 section "bss" {
660   mp_tmp2:
661     bits8 [SIZEOF_MP_INT];
662 }
663
664 section "bss" {
665   mp_result1:
666     bits8 [SIZEOF_MP_INT];
667 }
668
669 section "bss" {
670   mp_result2:
671     bits8 [SIZEOF_MP_INT];
672 }
673 #endif
674
675 #ifdef THREADED_RTS
676 #define FETCH_MP_TEMP(X) \
677 W_ X; \
678 X = BaseReg + (OFFSET_StgRegTable_r ## X);
679 #else
680 #define FETCH_MP_TEMP(X) /* Nothing */
681 #endif
682
683 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
684 name                                                                    \
685 {                                                                       \
686   CInt s1, s2;                                                          \
687   W_ d1, d2;                                                            \
688   FETCH_MP_TEMP(mp_tmp1);                                               \
689   FETCH_MP_TEMP(mp_tmp2);                                               \
690   FETCH_MP_TEMP(mp_result1)                                             \
691   FETCH_MP_TEMP(mp_result2);                                            \
692                                                                         \
693   /* call doYouWantToGC() */                                            \
694   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
695                                                                         \
696   s1 = W_TO_INT(R1);                                                    \
697   d1 = R2;                                                              \
698   s2 = W_TO_INT(R3);                                                    \
699   d2 = R4;                                                              \
700                                                                         \
701   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
702   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
703   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
704   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
705   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
706   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
707                                                                         \
708   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
709                                                                         \
710   /* Perform the operation */                                           \
711   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
712                                                                         \
713   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
714          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
715 }
716
717 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
718 name                                                                    \
719 {                                                                       \
720   CInt s1;                                                              \
721   W_ d1;                                                                \
722   FETCH_MP_TEMP(mp_tmp1);                                               \
723   FETCH_MP_TEMP(mp_result1)                                             \
724                                                                         \
725   /* call doYouWantToGC() */                                            \
726   MAYBE_GC(R2_PTR, name);                                               \
727                                                                         \
728   d1 = R2;                                                              \
729   s1 = W_TO_INT(R1);                                                    \
730                                                                         \
731   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
732   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
733   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
734                                                                         \
735   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
736                                                                         \
737   /* Perform the operation */                                           \
738   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
739                                                                         \
740   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
741          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
742 }
743
744 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
745 name                                                                                    \
746 {                                                                                       \
747   CInt s1, s2;                                                                          \
748   W_ d1, d2;                                                                            \
749   FETCH_MP_TEMP(mp_tmp1);                                                               \
750   FETCH_MP_TEMP(mp_tmp2);                                                               \
751   FETCH_MP_TEMP(mp_result1)                                                             \
752   FETCH_MP_TEMP(mp_result2)                                                             \
753                                                                                         \
754   /* call doYouWantToGC() */                                                            \
755   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
756                                                                                         \
757   s1 = W_TO_INT(R1);                                                                    \
758   d1 = R2;                                                                              \
759   s2 = W_TO_INT(R3);                                                                    \
760   d2 = R4;                                                                              \
761                                                                                         \
762   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
763   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
764   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
765   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
766   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
767   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
768                                                                                         \
769   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
770   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
771                                                                                         \
772   /* Perform the operation */                                                           \
773   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
774                                                                                         \
775   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
776            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
777            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
778            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
779 }
780
781 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
782 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
783 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
784 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
785 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
786 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
787 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
788 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
789 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
790 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
791 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
792
793 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
794 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
795
796 #ifndef THREADED_RTS
797 section "bss" {
798   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
799 }
800 #endif
801
802 gcdIntzh_fast
803 {
804     /* R1 = the first Int#; R2 = the second Int# */
805     W_ r; 
806     FETCH_MP_TEMP(mp_tmp_w);
807
808     W_[mp_tmp_w] = R1;
809     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
810
811     R1 = r;
812     /* Result parked in R1, return via info-pointer at TOS */
813     jump %ENTRY_CODE(Sp(0));
814 }
815
816
817 gcdIntegerIntzh_fast
818 {
819     /* R1 = s1; R2 = d1; R3 = the int */
820     W_ s1;
821     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
822     R1 = s1;
823     
824     /* Result parked in R1, return via info-pointer at TOS */
825     jump %ENTRY_CODE(Sp(0));
826 }
827
828
829 cmpIntegerIntzh_fast
830 {
831     /* R1 = s1; R2 = d1; R3 = the int */
832     W_ usize, vsize, v_digit, u_digit;
833
834     usize = R1;
835     vsize = 0;
836     v_digit = R3;
837
838     // paraphrased from __gmpz_cmp_si() in the GMP sources
839     if (%gt(v_digit,0)) {
840         vsize = 1;
841     } else { 
842         if (%lt(v_digit,0)) {
843             vsize = -1;
844             v_digit = -v_digit;
845         }
846     }
847
848     if (usize != vsize) {
849         R1 = usize - vsize; 
850         jump %ENTRY_CODE(Sp(0));
851     }
852
853     if (usize == 0) {
854         R1 = 0; 
855         jump %ENTRY_CODE(Sp(0));
856     }
857
858     u_digit = W_[BYTE_ARR_CTS(R2)];
859
860     if (u_digit == v_digit) {
861         R1 = 0; 
862         jump %ENTRY_CODE(Sp(0));
863     }
864
865     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
866         R1 = usize; 
867     } else {
868         R1 = -usize; 
869     }
870
871     jump %ENTRY_CODE(Sp(0));
872 }
873
874 cmpIntegerzh_fast
875 {
876     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
877     W_ usize, vsize, size, up, vp;
878     CInt cmp;
879
880     // paraphrased from __gmpz_cmp() in the GMP sources
881     usize = R1;
882     vsize = R3;
883
884     if (usize != vsize) {
885         R1 = usize - vsize; 
886         jump %ENTRY_CODE(Sp(0));
887     }
888
889     if (usize == 0) {
890         R1 = 0; 
891         jump %ENTRY_CODE(Sp(0));
892     }
893
894     if (%lt(usize,0)) { // NB. not <, which is unsigned
895         size = -usize;
896     } else {
897         size = usize;
898     }
899
900     up = BYTE_ARR_CTS(R2);
901     vp = BYTE_ARR_CTS(R4);
902
903     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
904
905     if (cmp == 0 :: CInt) {
906         R1 = 0; 
907         jump %ENTRY_CODE(Sp(0));
908     }
909
910     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
911         R1 = 1;
912     } else {
913         R1 = (-1); 
914     }
915     /* Result parked in R1, return via info-pointer at TOS */
916     jump %ENTRY_CODE(Sp(0));
917 }
918
919 integer2Intzh_fast
920 {
921     /* R1 = s; R2 = d */
922     W_ r, s;
923
924     s = R1;
925     if (s == 0) {
926         r = 0;
927     } else {
928         r = W_[R2 + SIZEOF_StgArrWords];
929         if (%lt(s,0)) {
930             r = -r;
931         }
932     }
933     /* Result parked in R1, return via info-pointer at TOS */
934     R1 = r;
935     jump %ENTRY_CODE(Sp(0));
936 }
937
938 integer2Wordzh_fast
939 {
940   /* R1 = s; R2 = d */
941   W_ r, s;
942
943   s = R1;
944   if (s == 0) {
945     r = 0;
946   } else {
947     r = W_[R2 + SIZEOF_StgArrWords];
948     if (%lt(s,0)) {
949         r = -r;
950     }
951   }
952   /* Result parked in R1, return via info-pointer at TOS */
953   R1 = r;
954   jump %ENTRY_CODE(Sp(0));
955 }
956
957 decodeFloatzuIntzh_fast
958
959     W_ p;
960     F_ arg;
961     FETCH_MP_TEMP(mp_tmp1);
962     FETCH_MP_TEMP(mp_tmp_w);
963     
964     /* arguments: F1 = Float# */
965     arg = F1;
966     
967     /* Perform the operation */
968     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
969     
970     /* returns: (Int# (mantissa), Int# (exponent)) */
971     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
972 }
973
974 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
975 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
976
977 decodeDoublezh_fast
978
979     D_ arg;
980     W_ p;
981     FETCH_MP_TEMP(mp_tmp1);
982     FETCH_MP_TEMP(mp_tmp_w);
983
984     /* arguments: D1 = Double# */
985     arg = D1;
986
987     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
988     
989     /* Be prepared to tell Lennart-coded __decodeDouble
990        where mantissa.d can be put (it does not care about the rest) */
991     p = Hp - ARR_SIZE + WDS(1);
992     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
993     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
994     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
995
996     /* Perform the operation */
997     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
998     
999     /* returns: (Int# (expn), Int#, ByteArray#) */
1000     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
1001 }
1002
1003 decodeDoublezu2Intzh_fast
1004
1005     D_ arg;
1006     W_ p;
1007     FETCH_MP_TEMP(mp_tmp1);
1008     FETCH_MP_TEMP(mp_tmp2);
1009     FETCH_MP_TEMP(mp_result1);
1010     FETCH_MP_TEMP(mp_result2);
1011
1012     /* arguments: D1 = Double# */
1013     arg = D1;
1014
1015     /* Perform the operation */
1016     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1017                                     mp_result1 "ptr", mp_result2 "ptr",
1018                                     arg) [];
1019
1020     /* returns:
1021        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1022     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1023 }
1024
1025 /* -----------------------------------------------------------------------------
1026  * Concurrency primitives
1027  * -------------------------------------------------------------------------- */
1028
1029 forkzh_fast
1030 {
1031   /* args: R1 = closure to spark */
1032
1033   MAYBE_GC(R1_PTR, forkzh_fast);
1034
1035   W_ closure;
1036   W_ threadid;
1037   closure = R1;
1038
1039   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1040                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1041                                 closure "ptr") [];
1042
1043   /* start blocked if the current thread is blocked */
1044   StgTSO_flags(threadid) = 
1045      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1046                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1047
1048   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1049
1050   // context switch soon, but not immediately: we don't want every
1051   // forkIO to force a context-switch.
1052   Capability_context_switch(MyCapability()) = 1 :: CInt;
1053   
1054   RET_P(threadid);
1055 }
1056
1057 forkOnzh_fast
1058 {
1059   /* args: R1 = cpu, R2 = closure to spark */
1060
1061   MAYBE_GC(R2_PTR, forkOnzh_fast);
1062
1063   W_ cpu;
1064   W_ closure;
1065   W_ threadid;
1066   cpu = R1;
1067   closure = R2;
1068
1069   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1070                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1071                                 closure "ptr") [];
1072
1073   /* start blocked if the current thread is blocked */
1074   StgTSO_flags(threadid) = 
1075      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1076                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1077
1078   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1079
1080   // context switch soon, but not immediately: we don't want every
1081   // forkIO to force a context-switch.
1082   Capability_context_switch(MyCapability()) = 1 :: CInt;
1083   
1084   RET_P(threadid);
1085 }
1086
1087 yieldzh_fast
1088 {
1089   jump stg_yield_noregs;
1090 }
1091
1092 myThreadIdzh_fast
1093 {
1094   /* no args. */
1095   RET_P(CurrentTSO);
1096 }
1097
1098 labelThreadzh_fast
1099 {
1100   /* args: 
1101         R1 = ThreadId#
1102         R2 = Addr# */
1103 #ifdef DEBUG
1104   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1105 #endif
1106   jump %ENTRY_CODE(Sp(0));
1107 }
1108
1109 isCurrentThreadBoundzh_fast
1110 {
1111   /* no args */
1112   W_ r;
1113   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1114   RET_N(r);
1115 }
1116
1117 threadStatuszh_fast
1118 {
1119     /* args: R1 :: ThreadId# */
1120     W_ tso;
1121     W_ why_blocked;
1122     W_ what_next;
1123     W_ ret;
1124
1125     tso = R1;
1126     loop:
1127       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1128           tso = StgTSO__link(tso);
1129           goto loop;
1130       }
1131
1132     what_next   = TO_W_(StgTSO_what_next(tso));
1133     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1134     // Note: these two reads are not atomic, so they might end up
1135     // being inconsistent.  It doesn't matter, since we
1136     // only return one or the other.  If we wanted to return the
1137     // contents of block_info too, then we'd have to do some synchronisation.
1138
1139     if (what_next == ThreadComplete) {
1140         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1141     } else {
1142         if (what_next == ThreadKilled) {
1143             ret = 17;
1144         } else {
1145             ret = why_blocked;
1146         }
1147     }
1148     RET_N(ret);
1149 }
1150
1151 /* -----------------------------------------------------------------------------
1152  * TVar primitives
1153  * -------------------------------------------------------------------------- */
1154
1155 #define SP_OFF 0
1156
1157 // Catch retry frame ------------------------------------------------------------
1158
1159 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1160 #if defined(PROFILING)
1161   W_ unused1, W_ unused2,
1162 #endif
1163   W_ unused3, P_ unused4, P_ unused5)
1164 {
1165    W_ r, frame, trec, outer;
1166
1167    frame = Sp;
1168    trec = StgTSO_trec(CurrentTSO);
1169    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1170    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1171    if (r != 0) {
1172      /* Succeeded (either first branch or second branch) */
1173      StgTSO_trec(CurrentTSO) = outer;
1174      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1175      jump %ENTRY_CODE(Sp(SP_OFF));
1176    } else {
1177      /* Did not commit: re-execute */
1178      W_ new_trec;
1179      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1180      StgTSO_trec(CurrentTSO) = new_trec;
1181      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1182        R1 = StgCatchRetryFrame_alt_code(frame);
1183      } else {
1184        R1 = StgCatchRetryFrame_first_code(frame);
1185      }
1186      jump stg_ap_v_fast;
1187    }
1188 }
1189
1190
1191 // Atomically frame ------------------------------------------------------------
1192
1193 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1194 #if defined(PROFILING)
1195   W_ unused1, W_ unused2,
1196 #endif
1197   P_ unused3, P_ unused4)
1198 {
1199   W_ frame, trec, valid, next_invariant, q, outer;
1200
1201   frame = Sp;
1202   trec = StgTSO_trec(CurrentTSO);
1203   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1204
1205   if (outer == NO_TREC) {
1206     /* First time back at the atomically frame -- pick up invariants */
1207     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1208     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1209
1210   } else {
1211     /* Second/subsequent time back at the atomically frame -- abort the
1212      * tx that's checking the invariant and move on to the next one */
1213     StgTSO_trec(CurrentTSO) = outer;
1214     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1215     StgInvariantCheckQueue_my_execution(q) = trec;
1216     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1217     /* Don't free trec -- it's linked from q and will be stashed in the
1218      * invariant if we eventually commit. */
1219     q = StgInvariantCheckQueue_next_queue_entry(q);
1220     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1221     trec = outer;
1222   }
1223
1224   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1225
1226   if (q != END_INVARIANT_CHECK_QUEUE) {
1227     /* We can't commit yet: another invariant to check */
1228     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1229     StgTSO_trec(CurrentTSO) = trec;
1230
1231     next_invariant = StgInvariantCheckQueue_invariant(q);
1232     R1 = StgAtomicInvariant_code(next_invariant);
1233     jump stg_ap_v_fast;
1234
1235   } else {
1236
1237     /* We've got no more invariants to check, try to commit */
1238     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1239     if (valid != 0) {
1240       /* Transaction was valid: commit succeeded */
1241       StgTSO_trec(CurrentTSO) = NO_TREC;
1242       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1243       jump %ENTRY_CODE(Sp(SP_OFF));
1244     } else {
1245       /* Transaction was not valid: try again */
1246       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1247       StgTSO_trec(CurrentTSO) = trec;
1248       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1249       R1 = StgAtomicallyFrame_code(frame);
1250       jump stg_ap_v_fast;
1251     }
1252   }
1253 }
1254
1255 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1256 #if defined(PROFILING)
1257   W_ unused1, W_ unused2,
1258 #endif
1259   P_ unused3, P_ unused4)
1260 {
1261   W_ frame, trec, valid;
1262
1263   frame = Sp;
1264
1265   /* The TSO is currently waiting: should we stop waiting? */
1266   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1267   if (valid != 0) {
1268     /* Previous attempt is still valid: no point trying again yet */
1269     jump stg_block_noregs;
1270   } else {
1271     /* Previous attempt is no longer valid: try again */
1272     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1273     StgTSO_trec(CurrentTSO) = trec;
1274     StgHeader_info(frame) = stg_atomically_frame_info;
1275     R1 = StgAtomicallyFrame_code(frame);
1276     jump stg_ap_v_fast;
1277   }
1278 }
1279
1280 // STM catch frame --------------------------------------------------------------
1281
1282 #define SP_OFF 0
1283
1284 /* Catch frames are very similar to update frames, but when entering
1285  * one we just pop the frame off the stack and perform the correct
1286  * kind of return to the activation record underneath us on the stack.
1287  */
1288
1289 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1290 #if defined(PROFILING)
1291   W_ unused1, W_ unused2,
1292 #endif
1293   P_ unused3, P_ unused4)
1294    {
1295       W_ r, frame, trec, outer;
1296       frame = Sp;
1297       trec = StgTSO_trec(CurrentTSO);
1298       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1299       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1300       if (r != 0) {
1301         /* Commit succeeded */
1302         StgTSO_trec(CurrentTSO) = outer;
1303         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1304         jump Sp(SP_OFF);
1305       } else {
1306         /* Commit failed */
1307         W_ new_trec;
1308         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1309         StgTSO_trec(CurrentTSO) = new_trec;
1310         R1 = StgCatchSTMFrame_code(frame);
1311         jump stg_ap_v_fast;
1312       }
1313    }
1314
1315
1316 // Primop definition ------------------------------------------------------------
1317
1318 atomicallyzh_fast
1319 {
1320   W_ frame;
1321   W_ old_trec;
1322   W_ new_trec;
1323   
1324   // stmStartTransaction may allocate
1325   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1326
1327   /* Args: R1 = m :: STM a */
1328   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1329
1330   old_trec = StgTSO_trec(CurrentTSO);
1331
1332   /* Nested transactions are not allowed; raise an exception */
1333   if (old_trec != NO_TREC) {
1334      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1335      jump raisezh_fast;
1336   }
1337
1338   /* Set up the atomically frame */
1339   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1340   frame = Sp;
1341
1342   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1343   StgAtomicallyFrame_code(frame) = R1;
1344   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1345
1346   /* Start the memory transcation */
1347   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1348   StgTSO_trec(CurrentTSO) = new_trec;
1349
1350   /* Apply R1 to the realworld token */
1351   jump stg_ap_v_fast;
1352 }
1353
1354
1355 catchSTMzh_fast
1356 {
1357   W_ frame;
1358   
1359   /* Args: R1 :: STM a */
1360   /* Args: R2 :: Exception -> STM a */
1361   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1362
1363   /* Set up the catch frame */
1364   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1365   frame = Sp;
1366
1367   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1368   StgCatchSTMFrame_handler(frame) = R2;
1369   StgCatchSTMFrame_code(frame) = R1;
1370
1371   /* Start a nested transaction to run the body of the try block in */
1372   W_ cur_trec;  
1373   W_ new_trec;
1374   cur_trec = StgTSO_trec(CurrentTSO);
1375   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1376   StgTSO_trec(CurrentTSO) = new_trec;
1377
1378   /* Apply R1 to the realworld token */
1379   jump stg_ap_v_fast;
1380 }
1381
1382
1383 catchRetryzh_fast
1384 {
1385   W_ frame;
1386   W_ new_trec;
1387   W_ trec;
1388
1389   // stmStartTransaction may allocate
1390   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1391
1392   /* Args: R1 :: STM a */
1393   /* Args: R2 :: STM a */
1394   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1395
1396   /* Start a nested transaction within which to run the first code */
1397   trec = StgTSO_trec(CurrentTSO);
1398   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1399   StgTSO_trec(CurrentTSO) = new_trec;
1400
1401   /* Set up the catch-retry frame */
1402   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1403   frame = Sp;
1404   
1405   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1406   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1407   StgCatchRetryFrame_first_code(frame) = R1;
1408   StgCatchRetryFrame_alt_code(frame) = R2;
1409
1410   /* Apply R1 to the realworld token */
1411   jump stg_ap_v_fast;
1412 }
1413
1414
1415 retryzh_fast
1416 {
1417   W_ frame_type;
1418   W_ frame;
1419   W_ trec;
1420   W_ outer;
1421   W_ r;
1422
1423   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1424
1425   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1426 retry_pop_stack:
1427   StgTSO_sp(CurrentTSO) = Sp;
1428   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1429   Sp = StgTSO_sp(CurrentTSO);
1430   frame = Sp;
1431   trec = StgTSO_trec(CurrentTSO);
1432   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1433
1434   if (frame_type == CATCH_RETRY_FRAME) {
1435     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1436     ASSERT(outer != NO_TREC);
1437     // Abort the transaction attempting the current branch
1438     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1439     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1440     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1441       // Retry in the first branch: try the alternative
1442       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1443       StgTSO_trec(CurrentTSO) = trec;
1444       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1445       R1 = StgCatchRetryFrame_alt_code(frame);
1446       jump stg_ap_v_fast;
1447     } else {
1448       // Retry in the alternative code: propagate the retry
1449       StgTSO_trec(CurrentTSO) = outer;
1450       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1451       goto retry_pop_stack;
1452     }
1453   }
1454
1455   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1456   ASSERT(frame_type == ATOMICALLY_FRAME);
1457   if (outer != NO_TREC) {
1458     // We called retry while checking invariants, so abort the current
1459     // invariant check (merging its TVar accesses into the parents read
1460     // set so we'll wait on them)
1461     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1462     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1463     trec = outer;
1464     StgTSO_trec(CurrentTSO) = trec;
1465     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1466   }
1467   ASSERT(outer == NO_TREC);
1468
1469   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1470   if (r != 0) {
1471     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1472     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1473     Sp = frame;
1474     // Fix up the stack in the unregisterised case: the return convention is different.
1475     R3 = trec; // passing to stmWaitUnblock()
1476     jump stg_block_stmwait;
1477   } else {
1478     // Transaction was not valid: retry immediately
1479     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1480     StgTSO_trec(CurrentTSO) = trec;
1481     R1 = StgAtomicallyFrame_code(frame);
1482     Sp = frame;
1483     jump stg_ap_v_fast;
1484   }
1485 }
1486
1487
1488 checkzh_fast
1489 {
1490   W_ trec, closure;
1491
1492   /* Args: R1 = invariant closure */
1493   MAYBE_GC (R1_PTR, checkzh_fast); 
1494
1495   trec = StgTSO_trec(CurrentTSO);
1496   closure = R1;
1497   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1498                                      trec "ptr",
1499                                      closure "ptr") [];
1500
1501   jump %ENTRY_CODE(Sp(0));
1502 }
1503
1504
1505 newTVarzh_fast
1506 {
1507   W_ tv;
1508   W_ new_value;
1509
1510   /* Args: R1 = initialisation value */
1511
1512   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1513   new_value = R1;
1514   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1515   RET_P(tv);
1516 }
1517
1518
1519 readTVarzh_fast
1520 {
1521   W_ trec;
1522   W_ tvar;
1523   W_ result;
1524
1525   /* Args: R1 = TVar closure */
1526
1527   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1528   trec = StgTSO_trec(CurrentTSO);
1529   tvar = R1;
1530   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1531
1532   RET_P(result);
1533 }
1534
1535 readTVarIOzh_fast
1536 {
1537     W_ result;
1538
1539 again:
1540     result = StgTVar_current_value(R1);
1541     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1542         goto again;
1543     }
1544     RET_P(result);
1545 }
1546
1547 writeTVarzh_fast
1548 {
1549   W_ trec;
1550   W_ tvar;
1551   W_ new_value;
1552   
1553   /* Args: R1 = TVar closure */
1554   /*       R2 = New value    */
1555
1556   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1557   trec = StgTSO_trec(CurrentTSO);
1558   tvar = R1;
1559   new_value = R2;
1560   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1561
1562   jump %ENTRY_CODE(Sp(0));
1563 }
1564
1565
1566 /* -----------------------------------------------------------------------------
1567  * MVar primitives
1568  *
1569  * take & putMVar work as follows.  Firstly, an important invariant:
1570  *
1571  *    If the MVar is full, then the blocking queue contains only
1572  *    threads blocked on putMVar, and if the MVar is empty then the
1573  *    blocking queue contains only threads blocked on takeMVar.
1574  *
1575  * takeMvar:
1576  *    MVar empty : then add ourselves to the blocking queue
1577  *    MVar full  : remove the value from the MVar, and
1578  *                 blocking queue empty     : return
1579  *                 blocking queue non-empty : perform the first blocked putMVar
1580  *                                            from the queue, and wake up the
1581  *                                            thread (MVar is now full again)
1582  *
1583  * putMVar is just the dual of the above algorithm.
1584  *
1585  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1586  * the stack of the thread waiting to do the putMVar.  See
1587  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1588  * the stack layout, and the PerformPut and PerformTake macros below.
1589  *
1590  * It is important that a blocked take or put is woken up with the
1591  * take/put already performed, because otherwise there would be a
1592  * small window of vulnerability where the thread could receive an
1593  * exception and never perform its take or put, and we'd end up with a
1594  * deadlock.
1595  *
1596  * -------------------------------------------------------------------------- */
1597
1598 isEmptyMVarzh_fast
1599 {
1600     /* args: R1 = MVar closure */
1601
1602     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1603         RET_N(1);
1604     } else {
1605         RET_N(0);
1606     }
1607 }
1608
1609 newMVarzh_fast
1610 {
1611     /* args: none */
1612     W_ mvar;
1613
1614     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1615   
1616     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1617     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1618         // MVARs start dirty: generation 0 has no mutable list
1619     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1620     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1621     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1622     RET_P(mvar);
1623 }
1624
1625
1626 #define PerformTake(tso, value)                         \
1627     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1628     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1629
1630 #define PerformPut(tso,lval)                    \
1631     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1632     lval = W_[StgTSO_sp(tso) - WDS(1)];
1633
1634 takeMVarzh_fast
1635 {
1636     W_ mvar, val, info, tso;
1637
1638     /* args: R1 = MVar closure */
1639     mvar = R1;
1640
1641 #if defined(THREADED_RTS)
1642     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1643 #else
1644     info = GET_INFO(mvar);
1645 #endif
1646         
1647     if (info == stg_MVAR_CLEAN_info) {
1648         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1649     }
1650
1651     /* If the MVar is empty, put ourselves on its blocking queue,
1652      * and wait until we're woken up.
1653      */
1654     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1655         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1656             StgMVar_head(mvar) = CurrentTSO;
1657         } else {
1658             foreign "C" setTSOLink(MyCapability() "ptr", 
1659                                    StgMVar_tail(mvar) "ptr",
1660                                    CurrentTSO) [];
1661         }
1662         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1663         StgTSO_block_info(CurrentTSO)  = mvar;
1664         // write barrier for throwTo(), which looks at block_info
1665         // if why_blocked==BlockedOnMVar.
1666         prim %write_barrier() [];
1667         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1668         StgMVar_tail(mvar) = CurrentTSO;
1669         
1670         R1 = mvar;
1671         jump stg_block_takemvar;
1672   }
1673
1674   /* we got the value... */
1675   val = StgMVar_value(mvar);
1676
1677   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1678   {
1679       /* There are putMVar(s) waiting... 
1680        * wake up the first thread on the queue
1681        */
1682       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1683
1684       /* actually perform the putMVar for the thread that we just woke up */
1685       tso = StgMVar_head(mvar);
1686       PerformPut(tso,StgMVar_value(mvar));
1687
1688       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1689           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1690       }
1691
1692       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1693                                             StgMVar_head(mvar) "ptr", 1) [];
1694       StgMVar_head(mvar) = tso;
1695
1696       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1697           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1698       }
1699
1700 #if defined(THREADED_RTS)
1701       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1702 #else
1703       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1704 #endif
1705       RET_P(val);
1706   } 
1707   else
1708   {
1709       /* No further putMVars, MVar is now empty */
1710       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1711  
1712 #if defined(THREADED_RTS)
1713       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1714 #else
1715       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1716 #endif
1717
1718       RET_P(val);
1719   }
1720 }
1721
1722
1723 tryTakeMVarzh_fast
1724 {
1725     W_ mvar, val, info, tso;
1726
1727     /* args: R1 = MVar closure */
1728
1729     mvar = R1;
1730
1731 #if defined(THREADED_RTS)
1732     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1733 #else
1734     info = GET_INFO(mvar);
1735 #endif
1736
1737     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1738 #if defined(THREADED_RTS)
1739         unlockClosure(mvar, info);
1740 #endif
1741         /* HACK: we need a pointer to pass back, 
1742          * so we abuse NO_FINALIZER_closure
1743          */
1744         RET_NP(0, stg_NO_FINALIZER_closure);
1745     }
1746
1747     if (info == stg_MVAR_CLEAN_info) {
1748         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1749     }
1750
1751     /* we got the value... */
1752     val = StgMVar_value(mvar);
1753
1754     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1755
1756         /* There are putMVar(s) waiting... 
1757          * wake up the first thread on the queue
1758          */
1759         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1760
1761         /* actually perform the putMVar for the thread that we just woke up */
1762         tso = StgMVar_head(mvar);
1763         PerformPut(tso,StgMVar_value(mvar));
1764         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1765             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1766         }
1767
1768         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1769                                               StgMVar_head(mvar) "ptr", 1) [];
1770         StgMVar_head(mvar) = tso;
1771
1772         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1773             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1774         }
1775 #if defined(THREADED_RTS)
1776         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1777 #else
1778         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1779 #endif
1780     }
1781     else 
1782     {
1783         /* No further putMVars, MVar is now empty */
1784         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1785 #if defined(THREADED_RTS)
1786         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1787 #else
1788         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1789 #endif
1790     }
1791     
1792     RET_NP(1, val);
1793 }
1794
1795
1796 putMVarzh_fast
1797 {
1798     W_ mvar, val, info, tso;
1799
1800     /* args: R1 = MVar, R2 = value */
1801     mvar = R1;
1802     val  = R2;
1803
1804 #if defined(THREADED_RTS)
1805     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1806 #else
1807     info = GET_INFO(mvar);
1808 #endif
1809
1810     if (info == stg_MVAR_CLEAN_info) {
1811         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1812     }
1813
1814     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1815         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1816             StgMVar_head(mvar) = CurrentTSO;
1817         } else {
1818             foreign "C" setTSOLink(MyCapability() "ptr", 
1819                                    StgMVar_tail(mvar) "ptr",
1820                                    CurrentTSO) [];
1821         }
1822         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1823         StgTSO_block_info(CurrentTSO)  = mvar;
1824         // write barrier for throwTo(), which looks at block_info
1825         // if why_blocked==BlockedOnMVar.
1826         prim %write_barrier() [];
1827         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1828         StgMVar_tail(mvar) = CurrentTSO;
1829         
1830         R1 = mvar;
1831         R2 = val;
1832         jump stg_block_putmvar;
1833     }
1834   
1835     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1836
1837         /* There are takeMVar(s) waiting: wake up the first one
1838          */
1839         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1840
1841         /* actually perform the takeMVar */
1842         tso = StgMVar_head(mvar);
1843         PerformTake(tso, val);
1844         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1845             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1846         }
1847       
1848         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1849                                               StgMVar_head(mvar) "ptr", 1) [];
1850         StgMVar_head(mvar) = tso;
1851
1852         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1853             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1854         }
1855
1856 #if defined(THREADED_RTS)
1857         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1858 #else
1859         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1860 #endif
1861         jump %ENTRY_CODE(Sp(0));
1862     }
1863     else
1864     {
1865         /* No further takes, the MVar is now full. */
1866         StgMVar_value(mvar) = val;
1867
1868 #if defined(THREADED_RTS)
1869         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1870 #else
1871         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1872 #endif
1873         jump %ENTRY_CODE(Sp(0));
1874     }
1875     
1876     /* ToDo: yield afterward for better communication performance? */
1877 }
1878
1879
1880 tryPutMVarzh_fast
1881 {
1882     W_ mvar, info, tso;
1883
1884     /* args: R1 = MVar, R2 = value */
1885     mvar = R1;
1886
1887 #if defined(THREADED_RTS)
1888     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1889 #else
1890     info = GET_INFO(mvar);
1891 #endif
1892
1893     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1894 #if defined(THREADED_RTS)
1895         unlockClosure(mvar, info);
1896 #endif
1897         RET_N(0);
1898     }
1899   
1900     if (info == stg_MVAR_CLEAN_info) {
1901         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1902     }
1903
1904     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1905
1906         /* There are takeMVar(s) waiting: wake up the first one
1907          */
1908         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1909         
1910         /* actually perform the takeMVar */
1911         tso = StgMVar_head(mvar);
1912         PerformTake(tso, R2);
1913         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1914             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1915         }
1916       
1917         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1918                                               StgMVar_head(mvar) "ptr", 1) [];
1919         StgMVar_head(mvar) = tso;
1920
1921         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1922             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1923         }
1924
1925 #if defined(THREADED_RTS)
1926         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1927 #else
1928         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1929 #endif
1930     }
1931     else
1932     {
1933         /* No further takes, the MVar is now full. */
1934         StgMVar_value(mvar) = R2;
1935
1936 #if defined(THREADED_RTS)
1937         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1938 #else
1939         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1940 #endif
1941     }
1942     
1943     RET_N(1);
1944     /* ToDo: yield afterward for better communication performance? */
1945 }
1946
1947
1948 /* -----------------------------------------------------------------------------
1949    Stable pointer primitives
1950    -------------------------------------------------------------------------  */
1951
1952 makeStableNamezh_fast
1953 {
1954     W_ index, sn_obj;
1955
1956     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1957   
1958     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1959
1960     /* Is there already a StableName for this heap object?
1961      *  stable_ptr_table is a pointer to an array of snEntry structs.
1962      */
1963     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1964         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1965         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1966         StgStableName_sn(sn_obj) = index;
1967         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1968     } else {
1969         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1970     }
1971     
1972     RET_P(sn_obj);
1973 }
1974
1975
1976 makeStablePtrzh_fast
1977 {
1978     /* Args: R1 = a */
1979     W_ sp;
1980     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1981     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1982     RET_N(sp);
1983 }
1984
1985 deRefStablePtrzh_fast
1986 {
1987     /* Args: R1 = the stable ptr */
1988     W_ r, sp;
1989     sp = R1;
1990     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1991     RET_P(r);
1992 }
1993
1994 /* -----------------------------------------------------------------------------
1995    Bytecode object primitives
1996    -------------------------------------------------------------------------  */
1997
1998 newBCOzh_fast
1999 {
2000     /* R1 = instrs
2001        R2 = literals
2002        R3 = ptrs
2003        R4 = arity
2004        R5 = bitmap array
2005     */
2006     W_ bco, bitmap_arr, bytes, words;
2007     
2008     bitmap_arr = R5;
2009
2010     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
2011     bytes = WDS(words);
2012
2013     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
2014
2015     bco = Hp - bytes + WDS(1);
2016     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2017     
2018     StgBCO_instrs(bco)     = R1;
2019     StgBCO_literals(bco)   = R2;
2020     StgBCO_ptrs(bco)       = R3;
2021     StgBCO_arity(bco)      = HALF_W_(R4);
2022     StgBCO_size(bco)       = HALF_W_(words);
2023     
2024     // Copy the arity/bitmap info into the BCO
2025     W_ i;
2026     i = 0;
2027 for:
2028     if (i < StgArrWords_words(bitmap_arr)) {
2029         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2030         i = i + 1;
2031         goto for;
2032     }
2033     
2034     RET_P(bco);
2035 }
2036
2037
2038 mkApUpd0zh_fast
2039 {
2040     // R1 = the BCO# for the AP
2041     //  
2042     W_ ap;
2043
2044     // This function is *only* used to wrap zero-arity BCOs in an
2045     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2046     // saturated and always points directly to a FUN or BCO.
2047     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2048            StgBCO_arity(R1) == HALF_W_(0));
2049
2050     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2051     TICK_ALLOC_UP_THK(0, 0);
2052     CCCS_ALLOC(SIZEOF_StgAP);
2053
2054     ap = Hp - SIZEOF_StgAP + WDS(1);
2055     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2056     
2057     StgAP_n_args(ap) = HALF_W_(0);
2058     StgAP_fun(ap) = R1;
2059     
2060     RET_P(ap);
2061 }
2062
2063 unpackClosurezh_fast
2064 {
2065 /* args: R1 = closure to analyze */
2066 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2067
2068     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2069     info  = %GET_STD_INFO(UNTAG(R1));
2070
2071     // Some closures have non-standard layout, so we omit those here.
2072     W_ type;
2073     type = TO_W_(%INFO_TYPE(info));
2074     switch [0 .. N_CLOSURE_TYPES] type {
2075     case THUNK_SELECTOR : {
2076         ptrs = 1;
2077         nptrs = 0;
2078         goto out;
2079     }
2080     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2081          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2082         ptrs = 0;
2083         nptrs = 0;
2084         goto out;
2085     }
2086     default: {
2087         ptrs  = TO_W_(%INFO_PTRS(info)); 
2088         nptrs = TO_W_(%INFO_NPTRS(info));
2089         goto out;
2090     }}
2091 out:
2092
2093     W_ ptrs_arr_sz, nptrs_arr_sz;
2094     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2095     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2096
2097     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2098
2099     W_ clos;
2100     clos = UNTAG(R1);
2101
2102     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2103     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2104
2105     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2106     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2107     p = 0;
2108 for:
2109     if(p < ptrs) {
2110          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2111          p = p + 1;
2112          goto for;
2113     }
2114     
2115     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2116     StgArrWords_words(nptrs_arr) = nptrs;
2117     p = 0;
2118 for2:
2119     if(p < nptrs) {
2120          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2121          p = p + 1;
2122          goto for2;
2123     }
2124     RET_NPP(info, ptrs_arr, nptrs_arr);
2125 }
2126
2127 /* -----------------------------------------------------------------------------
2128    Thread I/O blocking primitives
2129    -------------------------------------------------------------------------- */
2130
2131 /* Add a thread to the end of the blocked queue. (C-- version of the C
2132  * macro in Schedule.h).
2133  */
2134 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2135     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2136     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2137       W_[blocked_queue_hd] = tso;                       \
2138     } else {                                            \
2139       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2140     }                                                   \
2141     W_[blocked_queue_tl] = tso;
2142
2143 waitReadzh_fast
2144 {
2145     /* args: R1 */
2146 #ifdef THREADED_RTS
2147     foreign "C" barf("waitRead# on threaded RTS") never returns;
2148 #else
2149
2150     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2151     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2152     StgTSO_block_info(CurrentTSO) = R1;
2153     // No locking - we're not going to use this interface in the
2154     // threaded RTS anyway.
2155     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2156     jump stg_block_noregs;
2157 #endif
2158 }
2159
2160 waitWritezh_fast
2161 {
2162     /* args: R1 */
2163 #ifdef THREADED_RTS
2164     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2165 #else
2166
2167     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2168     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2169     StgTSO_block_info(CurrentTSO) = R1;
2170     // No locking - we're not going to use this interface in the
2171     // threaded RTS anyway.
2172     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2173     jump stg_block_noregs;
2174 #endif
2175 }
2176
2177
2178 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2179 delayzh_fast
2180 {
2181 #ifdef mingw32_HOST_OS
2182     W_ ares;
2183     CInt reqID;
2184 #else
2185     W_ t, prev, target;
2186 #endif
2187
2188 #ifdef THREADED_RTS
2189     foreign "C" barf("delay# on threaded RTS") never returns;
2190 #else
2191
2192     /* args: R1 (microsecond delay amount) */
2193     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2194     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2195
2196 #ifdef mingw32_HOST_OS
2197
2198     /* could probably allocate this on the heap instead */
2199     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2200                                             stg_delayzh_malloc_str);
2201     (reqID) = foreign "C" addDelayRequest(R1);
2202     StgAsyncIOResult_reqID(ares)   = reqID;
2203     StgAsyncIOResult_len(ares)     = 0;
2204     StgAsyncIOResult_errCode(ares) = 0;
2205     StgTSO_block_info(CurrentTSO)  = ares;
2206
2207     /* Having all async-blocked threads reside on the blocked_queue
2208      * simplifies matters, so change the status to OnDoProc put the
2209      * delayed thread on the blocked_queue.
2210      */
2211     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2212     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2213     jump stg_block_async_void;
2214
2215 #else
2216
2217     W_ time;
2218     W_ divisor;
2219     (time) = foreign "C" getourtimeofday() [R1];
2220     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2221     if (divisor == 0) {
2222         divisor = 50;
2223     }
2224     divisor = divisor * 1000;
2225     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2226            + time + 1; /* Add 1 as getourtimeofday rounds down */
2227     StgTSO_block_info(CurrentTSO) = target;
2228
2229     /* Insert the new thread in the sleeping queue. */
2230     prev = NULL;
2231     t = W_[sleeping_queue];
2232 while:
2233     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2234         prev = t;
2235         t = StgTSO__link(t);
2236         goto while;
2237     }
2238
2239     StgTSO__link(CurrentTSO) = t;
2240     if (prev == NULL) {
2241         W_[sleeping_queue] = CurrentTSO;
2242     } else {
2243         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2244     }
2245     jump stg_block_noregs;
2246 #endif
2247 #endif /* !THREADED_RTS */
2248 }
2249
2250
2251 #ifdef mingw32_HOST_OS
2252 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2253 asyncReadzh_fast
2254 {
2255     W_ ares;
2256     CInt reqID;
2257
2258 #ifdef THREADED_RTS
2259     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2260 #else
2261
2262     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2263     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2264     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2265
2266     /* could probably allocate this on the heap instead */
2267     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2268                                             stg_asyncReadzh_malloc_str)
2269                         [R1,R2,R3,R4];
2270     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2271     StgAsyncIOResult_reqID(ares)   = reqID;
2272     StgAsyncIOResult_len(ares)     = 0;
2273     StgAsyncIOResult_errCode(ares) = 0;
2274     StgTSO_block_info(CurrentTSO)  = ares;
2275     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2276     jump stg_block_async;
2277 #endif
2278 }
2279
2280 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2281 asyncWritezh_fast
2282 {
2283     W_ ares;
2284     CInt reqID;
2285
2286 #ifdef THREADED_RTS
2287     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2288 #else
2289
2290     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2291     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2292     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2293
2294     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2295                                             stg_asyncWritezh_malloc_str)
2296                         [R1,R2,R3,R4];
2297     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2298
2299     StgAsyncIOResult_reqID(ares)   = reqID;
2300     StgAsyncIOResult_len(ares)     = 0;
2301     StgAsyncIOResult_errCode(ares) = 0;
2302     StgTSO_block_info(CurrentTSO)  = ares;
2303     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2304     jump stg_block_async;
2305 #endif
2306 }
2307
2308 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2309 asyncDoProczh_fast
2310 {
2311     W_ ares;
2312     CInt reqID;
2313
2314 #ifdef THREADED_RTS
2315     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2316 #else
2317
2318     /* args: R1 = proc, R2 = param */
2319     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2320     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2321
2322     /* could probably allocate this on the heap instead */
2323     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2324                                             stg_asyncDoProczh_malloc_str) 
2325                                 [R1,R2];
2326     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2327     StgAsyncIOResult_reqID(ares)   = reqID;
2328     StgAsyncIOResult_len(ares)     = 0;
2329     StgAsyncIOResult_errCode(ares) = 0;
2330     StgTSO_block_info(CurrentTSO) = ares;
2331     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2332     jump stg_block_async;
2333 #endif
2334 }
2335 #endif
2336
2337 // noDuplicate# tries to ensure that none of the thunks under
2338 // evaluation by the current thread are also under evaluation by
2339 // another thread.  It relies on *both* threads doing noDuplicate#;
2340 // the second one will get blocked if they are duplicating some work.
2341 noDuplicatezh_fast
2342 {
2343     SAVE_THREAD_STATE();
2344     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2345     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2346     
2347     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2348         jump stg_threadFinished;
2349     } else {
2350         LOAD_THREAD_STATE();
2351         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2352         jump %ENTRY_CODE(Sp(0));
2353     }
2354 }
2355
2356 getApStackValzh_fast
2357 {
2358    W_ ap_stack, offset, val, ok;
2359
2360    /* args: R1 = AP_STACK, R2 = offset */
2361    ap_stack = R1;
2362    offset   = R2;
2363
2364    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2365         ok = 1;
2366         val = StgAP_STACK_payload(ap_stack,offset); 
2367    } else {
2368         ok = 0;
2369         val = R1;
2370    }
2371    RET_NP(ok,val);
2372 }
2373
2374 /* -----------------------------------------------------------------------------
2375    Misc. primitives
2376    -------------------------------------------------------------------------- */
2377
2378 // Write the cost center stack of the first argument on stderr; return
2379 // the second.  Possibly only makes sense for already evaluated
2380 // things?
2381 traceCcszh_fast
2382 {
2383     W_ ccs;
2384
2385 #ifdef PROFILING
2386     ccs = StgHeader_ccs(UNTAG(R1));
2387     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2388 #endif
2389
2390     R1 = R2;
2391     ENTER();
2392 }
2393
2394 getSparkzh_fast
2395 {
2396    W_ spark;
2397
2398 #ifndef THREADED_RTS
2399    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2400 #else
2401    (spark) = foreign "C" findSpark(MyCapability());
2402    if (spark != 0) {
2403       RET_NP(1,spark);
2404    } else {
2405       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2406    }
2407 #endif
2408 }