Allocate the right number of words in new*PinnedByteArrayzh_fast
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_ControlziExceptionziBase_nestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55 import ghczmprim_GHCziBool_False_closure;
56
57 /*-----------------------------------------------------------------------------
58   Array Primitives
59
60   Basically just new*Array - the others are all inline macros.
61
62   The size arg is always passed in R1, and the result returned in R1.
63
64   The slow entry point is for returning from a heap check, the saved
65   size argument must be re-loaded from the stack.
66   -------------------------------------------------------------------------- */
67
68 /* for objects that are *less* than the size of a word, make sure we
69  * round up to the nearest word for the size of the array.
70  */
71
72 newByteArrayzh_fast
73 {
74     W_ words, payload_words, n, p;
75     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
76     n = R1;
77     payload_words = ROUNDUP_BYTES_TO_WDS(n);
78     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
79     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
80     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
81     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
82     StgArrWords_words(p) = payload_words;
83     RET_P(p);
84 }
85
86 #define BA_ALIGN 16
87 #define BA_MASK  (BA_ALIGN-1)
88
89 newPinnedByteArrayzh_fast
90 {
91     W_ words, bytes, payload_words, p;
92
93     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
94     bytes = R1;
95     /* payload_words is what we will tell the profiler we had to allocate */
96     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
97     /* When we actually allocate memory, we need to allow space for the
98        header: */
99     bytes = bytes + SIZEOF_StgArrWords;
100     /* And we want to align to BA_ALIGN bytes, so we need to allow space
101        to shift up to BA_ALIGN - 1 bytes: */
102     bytes = bytes + BA_ALIGN - 1;
103     /* Now we convert to a number of words: */
104     words = ROUNDUP_BYTES_TO_WDS(bytes);
105
106     ("ptr" p) = foreign "C" allocatePinned(words) [];
107     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
108
109     /* Now we need to move p forward so that the payload is aligned
110        to BA_ALIGN bytes: */
111     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
112
113     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
114     StgArrWords_words(p) = payload_words;
115     RET_P(p);
116 }
117
118 newAlignedPinnedByteArrayzh_fast
119 {
120     W_ words, bytes, payload_words, p, alignment;
121
122     MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
123     bytes = R1;
124     alignment = R2;
125
126     /* payload_words is what we will tell the profiler we had to allocate */
127     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
128
129     /* When we actually allocate memory, we need to allow space for the
130        header: */
131     bytes = bytes + SIZEOF_StgArrWords;
132     /* And we want to align to <alignment> bytes, so we need to allow space
133        to shift up to <alignment - 1> bytes: */
134     bytes = bytes + alignment - 1;
135     /* Now we convert to a number of words: */
136     words = ROUNDUP_BYTES_TO_WDS(bytes);
137
138     ("ptr" p) = foreign "C" allocatePinned(words) [];
139     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
140
141     /* Now we need to move p forward so that the payload is aligned
142        to <alignment> bytes. Note that we are assuming that
143        <alignment> is a power of 2, which is technically not guaranteed */
144     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
145
146     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
147     StgArrWords_words(p) = payload_words;
148     RET_P(p);
149 }
150
151 newArrayzh_fast
152 {
153     W_ words, n, init, arr, p;
154     /* Args: R1 = words, R2 = initialisation value */
155
156     n = R1;
157     MAYBE_GC(R2_PTR,newArrayzh_fast);
158
159     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
160     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
161     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
162
163     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
164     StgMutArrPtrs_ptrs(arr) = n;
165
166     // Initialise all elements of the the array with the value in R2
167     init = R2;
168     p = arr + SIZEOF_StgMutArrPtrs;
169   for:
170     if (p < arr + WDS(words)) {
171         W_[p] = init;
172         p = p + WDS(1);
173         goto for;
174     }
175
176     RET_P(arr);
177 }
178
179 unsafeThawArrayzh_fast
180 {
181   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
182   //
183   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
184   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
185   // it on the mutable list for the GC to remove (removing something from
186   // the mutable list is not easy, because the mut_list is only singly-linked).
187   // 
188   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
189   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
190   // to indicate that it is still on the mutable list.
191   //
192   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
193   // either it is on a mut_list, or it isn't.  We adopt the convention that
194   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
195   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
196   // we put it on the mutable list more than once, but it would get scavenged
197   // multiple times during GC, which would be unnecessarily slow.
198   //
199   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
200         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
201         recordMutable(R1, R1);
202         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
203         RET_P(R1);
204   } else {
205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
206         RET_P(R1);
207   }
208 }
209
210 /* -----------------------------------------------------------------------------
211    MutVar primitives
212    -------------------------------------------------------------------------- */
213
214 newMutVarzh_fast
215 {
216     W_ mv;
217     /* Args: R1 = initialisation value */
218
219     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
220
221     mv = Hp - SIZEOF_StgMutVar + WDS(1);
222     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
223     StgMutVar_var(mv) = R1;
224     
225     RET_P(mv);
226 }
227
228 atomicModifyMutVarzh_fast
229 {
230     W_ mv, f, z, x, y, r, h;
231     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
232
233     /* If x is the current contents of the MutVar#, then 
234        We want to make the new contents point to
235
236          (sel_0 (f x))
237  
238        and the return value is
239          
240          (sel_1 (f x))
241
242         obviously we can share (f x).
243
244          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
245          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
246          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
247     */
248
249 #if MIN_UPD_SIZE > 1
250 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
251 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
252 #else
253 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
254 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
255 #endif
256
257 #if MIN_UPD_SIZE > 2
258 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
259 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
260 #else
261 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
262 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
263 #endif
264
265 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
266
267    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
268
269    mv = R1;
270    f = R2;
271
272    TICK_ALLOC_THUNK_2();
273    CCCS_ALLOC(THUNK_2_SIZE);
274    z = Hp - THUNK_2_SIZE + WDS(1);
275    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
276    LDV_RECORD_CREATE(z);
277    StgThunk_payload(z,0) = f;
278
279    TICK_ALLOC_THUNK_1();
280    CCCS_ALLOC(THUNK_1_SIZE);
281    y = z - THUNK_1_SIZE;
282    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
283    LDV_RECORD_CREATE(y);
284    StgThunk_payload(y,0) = z;
285
286    TICK_ALLOC_THUNK_1();
287    CCCS_ALLOC(THUNK_1_SIZE);
288    r = y - THUNK_1_SIZE;
289    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
290    LDV_RECORD_CREATE(r);
291    StgThunk_payload(r,0) = z;
292
293  retry:
294    x = StgMutVar_var(mv);
295    StgThunk_payload(z,1) = x;
296 #ifdef THREADED_RTS
297    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
298    if (h != x) { goto retry; }
299 #else
300    StgMutVar_var(mv) = y;
301 #endif
302
303    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
304      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
305    }
306
307    RET_P(r);
308 }
309
310 /* -----------------------------------------------------------------------------
311    Weak Pointer Primitives
312    -------------------------------------------------------------------------- */
313
314 STRING(stg_weak_msg,"New weak pointer at %p\n")
315
316 mkWeakzh_fast
317 {
318   /* R1 = key
319      R2 = value
320      R3 = finalizer (or NULL)
321   */
322   W_ w;
323
324   if (R3 == NULL) {
325     R3 = stg_NO_FINALIZER_closure;
326   }
327
328   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
329
330   w = Hp - SIZEOF_StgWeak + WDS(1);
331   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
332
333   // We don't care about cfinalizer here.
334   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
335   // something else?
336
337   StgWeak_key(w)        = R1;
338   StgWeak_value(w)      = R2;
339   StgWeak_finalizer(w)  = R3;
340   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
341
342   StgWeak_link(w)       = W_[weak_ptr_list];
343   W_[weak_ptr_list]     = w;
344
345   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
346
347   RET_P(w);
348 }
349
350 mkWeakForeignEnvzh_fast
351 {
352   /* R1 = key
353      R2 = value
354      R3 = finalizer
355      R4 = pointer
356      R5 = has environment (0 or 1)
357      R6 = environment
358   */
359   W_ w, payload_words, words, p;
360
361   W_ key, val, fptr, ptr, flag, eptr;
362
363   key  = R1;
364   val  = R2;
365   fptr = R3;
366   ptr  = R4;
367   flag = R5;
368   eptr = R6;
369
370   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
371
372   w = Hp - SIZEOF_StgWeak + WDS(1);
373   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
374
375   payload_words = 4;
376   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
377   ("ptr" p)     = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
378
379   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
380   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
381
382   StgArrWords_words(p)     = payload_words;
383   StgArrWords_payload(p,0) = fptr;
384   StgArrWords_payload(p,1) = ptr;
385   StgArrWords_payload(p,2) = eptr;
386   StgArrWords_payload(p,3) = flag;
387
388   // We don't care about the value here.
389   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
390
391   StgWeak_key(w)        = key;
392   StgWeak_value(w)      = val;
393   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
394   StgWeak_cfinalizer(w) = p;
395
396   StgWeak_link(w)   = W_[weak_ptr_list];
397   W_[weak_ptr_list] = w;
398
399   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
400
401   RET_P(w);
402 }
403
404 finalizzeWeakzh_fast
405 {
406   /* R1 = weak ptr
407    */
408   W_ w, f, arr;
409
410   w = R1;
411
412   // already dead?
413   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
414       RET_NP(0,stg_NO_FINALIZER_closure);
415   }
416
417   // kill it
418 #ifdef PROFILING
419   // @LDV profiling
420   // A weak pointer is inherently used, so we do not need to call
421   // LDV_recordDead_FILL_SLOP_DYNAMIC():
422   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
423   // or, LDV_recordDead():
424   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
425   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
426   // large as weak pointers, so there is no need to fill the slop, either.
427   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
428 #endif
429
430   //
431   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
432   //
433   SET_INFO(w,stg_DEAD_WEAK_info);
434   LDV_RECORD_CREATE(w);
435
436   f   = StgWeak_finalizer(w);
437   arr = StgWeak_cfinalizer(w);
438
439   StgDeadWeak_link(w) = StgWeak_link(w);
440
441   if (arr != stg_NO_FINALIZER_closure) {
442     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
443                               StgArrWords_payload(arr,1),
444                               StgArrWords_payload(arr,2),
445                               StgArrWords_payload(arr,3)) [];
446   }
447
448   /* return the finalizer */
449   if (f == stg_NO_FINALIZER_closure) {
450       RET_NP(0,stg_NO_FINALIZER_closure);
451   } else {
452       RET_NP(1,f);
453   }
454 }
455
456 deRefWeakzh_fast
457 {
458   /* R1 = weak ptr */
459   W_ w, code, val;
460
461   w = R1;
462   if (GET_INFO(w) == stg_WEAK_info) {
463     code = 1;
464     val = StgWeak_value(w);
465   } else {
466     code = 0;
467     val = w;
468   }
469   RET_NP(code,val);
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474
475    There are some assumptions in this code that mp_limb_t == W_.  This is
476    the case for all the platforms that GHC supports, currently.
477    -------------------------------------------------------------------------- */
478
479 int2Integerzh_fast
480 {
481    /* arguments: R1 = Int# */
482
483    W_ val, s, p;        /* to avoid aliasing */
484
485    val = R1;
486    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
487
488    p = Hp - SIZEOF_StgArrWords;
489    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
490    StgArrWords_words(p) = 1;
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (%lt(val,0)) { 
494         s  = -1;
495         Hp(0) = -val;
496    } else { 
497      if (%gt(val,0)) {
498         s = 1;
499         Hp(0) = val;
500      } else {
501         s = 0;
502      }
503   }
504
505    /* returns (# size  :: Int#, 
506                  data  :: ByteArray# 
507                #)
508    */
509    RET_NP(s,p);
510 }
511
512 word2Integerzh_fast
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val, s, p;        /* to avoid aliasing */
517
518    val = R1;
519
520    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
521
522    p = Hp - SIZEOF_StgArrWords;
523    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
524    StgArrWords_words(p) = 1;
525
526    if (val != 0) {
527         s = 1;
528         W_[Hp] = val;
529    } else {
530         s = 0;
531    }
532
533    /* returns (# size  :: Int#, 
534                  data  :: ByteArray# #)
535    */
536    RET_NP(s,p);
537 }
538
539
540 /*
541  * 'long long' primops for converting to/from Integers.
542  */
543
544 #ifdef SUPPORT_LONG_LONGS
545
546 int64ToIntegerzh_fast
547 {
548    /* arguments: L1 = Int64# */
549
550    L_ val;
551    W_ hi, lo, s, neg, words_needed, p;
552
553    val = L1;
554    neg = 0;
555
556    hi = TO_W_(val >> 32);
557    lo = TO_W_(val);
558
559    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
560        // minimum is one word
561        words_needed = 1;
562    } else { 
563        words_needed = 2;
564    }
565
566    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
567                NO_PTRS, int64ToIntegerzh_fast );
568
569    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
570    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
571    StgArrWords_words(p) = words_needed;
572
573    if ( %lt(hi,0) ) {
574      neg = 1;
575      lo = -lo;
576      if(lo == 0) {
577        hi = -hi;
578      } else {
579        hi = -hi - 1;
580      }
581    }
582
583    if ( words_needed == 2 )  { 
584       s = 2;
585       Hp(-1) = lo;
586       Hp(0) = hi;
587    } else { 
588        if ( lo != 0 ) {
589            s = 1;
590            Hp(0) = lo;
591        } else /* val==0 */  {
592            s = 0;
593        }
594    }
595    if ( neg != 0 ) {
596         s = -s;
597    }
598
599    /* returns (# size  :: Int#, 
600                  data  :: ByteArray# #)
601    */
602    RET_NP(s,p);
603 }
604 word64ToIntegerzh_fast
605 {
606    /* arguments: L1 = Word64# */
607
608    L_ val;
609    W_ hi, lo, s, words_needed, p;
610
611    val = L1;
612    hi = TO_W_(val >> 32);
613    lo = TO_W_(val);
614
615    if ( hi != 0 ) {
616       words_needed = 2;
617    } else {
618       words_needed = 1;
619    }
620
621    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
622                NO_PTRS, word64ToIntegerzh_fast );
623
624    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
625    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
626    StgArrWords_words(p) = words_needed;
627
628    if ( hi != 0 ) { 
629      s = 2;
630      Hp(-1) = lo;
631      Hp(0)  = hi;
632    } else {
633       if ( lo != 0 ) {
634         s = 1;
635         Hp(0) = lo;
636      } else /* val==0 */  {
637       s = 0;
638      }
639   }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# #)
643    */
644    RET_NP(s,p);
645 }
646
647
648
649 #endif /* SUPPORT_LONG_LONGS */
650
651 /* ToDo: this is shockingly inefficient */
652
653 #ifndef THREADED_RTS
654 section "bss" {
655   mp_tmp1:
656     bits8 [SIZEOF_MP_INT];
657 }
658
659 section "bss" {
660   mp_tmp2:
661     bits8 [SIZEOF_MP_INT];
662 }
663
664 section "bss" {
665   mp_result1:
666     bits8 [SIZEOF_MP_INT];
667 }
668
669 section "bss" {
670   mp_result2:
671     bits8 [SIZEOF_MP_INT];
672 }
673 #endif
674
675 #ifdef THREADED_RTS
676 #define FETCH_MP_TEMP(X) \
677 W_ X; \
678 X = BaseReg + (OFFSET_StgRegTable_r ## X);
679 #else
680 #define FETCH_MP_TEMP(X) /* Nothing */
681 #endif
682
683 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
684 name                                                                    \
685 {                                                                       \
686   CInt s1, s2;                                                          \
687   W_ d1, d2;                                                            \
688   FETCH_MP_TEMP(mp_tmp1);                                               \
689   FETCH_MP_TEMP(mp_tmp2);                                               \
690   FETCH_MP_TEMP(mp_result1)                                             \
691   FETCH_MP_TEMP(mp_result2);                                            \
692                                                                         \
693   /* call doYouWantToGC() */                                            \
694   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
695                                                                         \
696   s1 = W_TO_INT(R1);                                                    \
697   d1 = R2;                                                              \
698   s2 = W_TO_INT(R3);                                                    \
699   d2 = R4;                                                              \
700                                                                         \
701   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
702   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
703   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
704   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
705   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
706   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
707                                                                         \
708   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
709                                                                         \
710   /* Perform the operation */                                           \
711   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
712                                                                         \
713   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
714          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
715 }
716
717 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
718 name                                                                    \
719 {                                                                       \
720   CInt s1;                                                              \
721   W_ d1;                                                                \
722   FETCH_MP_TEMP(mp_tmp1);                                               \
723   FETCH_MP_TEMP(mp_result1)                                             \
724                                                                         \
725   /* call doYouWantToGC() */                                            \
726   MAYBE_GC(R2_PTR, name);                                               \
727                                                                         \
728   d1 = R2;                                                              \
729   s1 = W_TO_INT(R1);                                                    \
730                                                                         \
731   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
732   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
733   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
734                                                                         \
735   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
736                                                                         \
737   /* Perform the operation */                                           \
738   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
739                                                                         \
740   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
741          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
742 }
743
744 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
745 name                                                                                    \
746 {                                                                                       \
747   CInt s1, s2;                                                                          \
748   W_ d1, d2;                                                                            \
749   FETCH_MP_TEMP(mp_tmp1);                                                               \
750   FETCH_MP_TEMP(mp_tmp2);                                                               \
751   FETCH_MP_TEMP(mp_result1)                                                             \
752   FETCH_MP_TEMP(mp_result2)                                                             \
753                                                                                         \
754   /* call doYouWantToGC() */                                                            \
755   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
756                                                                                         \
757   s1 = W_TO_INT(R1);                                                                    \
758   d1 = R2;                                                                              \
759   s2 = W_TO_INT(R3);                                                                    \
760   d2 = R4;                                                                              \
761                                                                                         \
762   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
763   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
764   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
765   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
766   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
767   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
768                                                                                         \
769   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
770   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
771                                                                                         \
772   /* Perform the operation */                                                           \
773   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
774                                                                                         \
775   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
776            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
777            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
778            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
779 }
780
781 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
782 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
783 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
784 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
785 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
786 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
787 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
788 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
789 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
790 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
791 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
792
793 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
794 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
795
796 #ifndef THREADED_RTS
797 section "bss" {
798   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
799 }
800 #endif
801
802 gcdIntzh_fast
803 {
804     /* R1 = the first Int#; R2 = the second Int# */
805     W_ r; 
806     FETCH_MP_TEMP(mp_tmp_w);
807
808     W_[mp_tmp_w] = R1;
809     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
810
811     R1 = r;
812     /* Result parked in R1, return via info-pointer at TOS */
813     jump %ENTRY_CODE(Sp(0));
814 }
815
816
817 gcdIntegerIntzh_fast
818 {
819     /* R1 = s1; R2 = d1; R3 = the int */
820     W_ s1;
821     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
822     R1 = s1;
823     
824     /* Result parked in R1, return via info-pointer at TOS */
825     jump %ENTRY_CODE(Sp(0));
826 }
827
828
829 cmpIntegerIntzh_fast
830 {
831     /* R1 = s1; R2 = d1; R3 = the int */
832     W_ usize, vsize, v_digit, u_digit;
833
834     usize = R1;
835     vsize = 0;
836     v_digit = R3;
837
838     // paraphrased from __gmpz_cmp_si() in the GMP sources
839     if (%gt(v_digit,0)) {
840         vsize = 1;
841     } else { 
842         if (%lt(v_digit,0)) {
843             vsize = -1;
844             v_digit = -v_digit;
845         }
846     }
847
848     if (usize != vsize) {
849         R1 = usize - vsize; 
850         jump %ENTRY_CODE(Sp(0));
851     }
852
853     if (usize == 0) {
854         R1 = 0; 
855         jump %ENTRY_CODE(Sp(0));
856     }
857
858     u_digit = W_[BYTE_ARR_CTS(R2)];
859
860     if (u_digit == v_digit) {
861         R1 = 0; 
862         jump %ENTRY_CODE(Sp(0));
863     }
864
865     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
866         R1 = usize; 
867     } else {
868         R1 = -usize; 
869     }
870
871     jump %ENTRY_CODE(Sp(0));
872 }
873
874 cmpIntegerzh_fast
875 {
876     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
877     W_ usize, vsize, size, up, vp;
878     CInt cmp;
879
880     // paraphrased from __gmpz_cmp() in the GMP sources
881     usize = R1;
882     vsize = R3;
883
884     if (usize != vsize) {
885         R1 = usize - vsize; 
886         jump %ENTRY_CODE(Sp(0));
887     }
888
889     if (usize == 0) {
890         R1 = 0; 
891         jump %ENTRY_CODE(Sp(0));
892     }
893
894     if (%lt(usize,0)) { // NB. not <, which is unsigned
895         size = -usize;
896     } else {
897         size = usize;
898     }
899
900     up = BYTE_ARR_CTS(R2);
901     vp = BYTE_ARR_CTS(R4);
902
903     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
904
905     if (cmp == 0 :: CInt) {
906         R1 = 0; 
907         jump %ENTRY_CODE(Sp(0));
908     }
909
910     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
911         R1 = 1;
912     } else {
913         R1 = (-1); 
914     }
915     /* Result parked in R1, return via info-pointer at TOS */
916     jump %ENTRY_CODE(Sp(0));
917 }
918
919 integer2Intzh_fast
920 {
921     /* R1 = s; R2 = d */
922     W_ r, s;
923
924     s = R1;
925     if (s == 0) {
926         r = 0;
927     } else {
928         r = W_[R2 + SIZEOF_StgArrWords];
929         if (%lt(s,0)) {
930             r = -r;
931         }
932     }
933     /* Result parked in R1, return via info-pointer at TOS */
934     R1 = r;
935     jump %ENTRY_CODE(Sp(0));
936 }
937
938 integer2Wordzh_fast
939 {
940   /* R1 = s; R2 = d */
941   W_ r, s;
942
943   s = R1;
944   if (s == 0) {
945     r = 0;
946   } else {
947     r = W_[R2 + SIZEOF_StgArrWords];
948     if (%lt(s,0)) {
949         r = -r;
950     }
951   }
952   /* Result parked in R1, return via info-pointer at TOS */
953   R1 = r;
954   jump %ENTRY_CODE(Sp(0));
955 }
956
957 decodeFloatzh_fast
958
959     W_ p;
960     F_ arg;
961     FETCH_MP_TEMP(mp_tmp1);
962     FETCH_MP_TEMP(mp_tmp_w);
963     
964     /* arguments: F1 = Float# */
965     arg = F1;
966     
967     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
968     
969     /* Be prepared to tell Lennart-coded __decodeFloat
970        where mantissa._mp_d can be put (it does not care about the rest) */
971     p = Hp - SIZEOF_StgArrWords;
972     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
973     StgArrWords_words(p) = 1;
974     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
975     
976     /* Perform the operation */
977     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
978     
979     /* returns: (Int# (expn), Int#, ByteArray#) */
980     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
981 }
982
983 decodeFloatzuIntzh_fast
984
985     W_ p;
986     F_ arg;
987     FETCH_MP_TEMP(mp_tmp1);
988     FETCH_MP_TEMP(mp_tmp_w);
989     
990     /* arguments: F1 = Float# */
991     arg = F1;
992     
993     /* Perform the operation */
994     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
995     
996     /* returns: (Int# (mantissa), Int# (exponent)) */
997     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
998 }
999
1000 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
1001 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
1002
1003 decodeDoublezh_fast
1004
1005     D_ arg;
1006     W_ p;
1007     FETCH_MP_TEMP(mp_tmp1);
1008     FETCH_MP_TEMP(mp_tmp_w);
1009
1010     /* arguments: D1 = Double# */
1011     arg = D1;
1012
1013     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
1014     
1015     /* Be prepared to tell Lennart-coded __decodeDouble
1016        where mantissa.d can be put (it does not care about the rest) */
1017     p = Hp - ARR_SIZE + WDS(1);
1018     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
1019     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
1020     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
1021
1022     /* Perform the operation */
1023     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
1024     
1025     /* returns: (Int# (expn), Int#, ByteArray#) */
1026     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
1027 }
1028
1029 decodeDoublezu2Intzh_fast
1030
1031     D_ arg;
1032     W_ p;
1033     FETCH_MP_TEMP(mp_tmp1);
1034     FETCH_MP_TEMP(mp_tmp2);
1035     FETCH_MP_TEMP(mp_result1);
1036     FETCH_MP_TEMP(mp_result2);
1037
1038     /* arguments: D1 = Double# */
1039     arg = D1;
1040
1041     /* Perform the operation */
1042     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
1043                                     mp_result1 "ptr", mp_result2 "ptr",
1044                                     arg) [];
1045
1046     /* returns:
1047        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
1048     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
1049 }
1050
1051 /* -----------------------------------------------------------------------------
1052  * Concurrency primitives
1053  * -------------------------------------------------------------------------- */
1054
1055 forkzh_fast
1056 {
1057   /* args: R1 = closure to spark */
1058
1059   MAYBE_GC(R1_PTR, forkzh_fast);
1060
1061   W_ closure;
1062   W_ threadid;
1063   closure = R1;
1064
1065   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1066                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1067                                 closure "ptr") [];
1068
1069   /* start blocked if the current thread is blocked */
1070   StgTSO_flags(threadid) = 
1071      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1072                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1073
1074   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
1075
1076   // switch at the earliest opportunity
1077   Capability_context_switch(MyCapability()) = 1 :: CInt;
1078   
1079   RET_P(threadid);
1080 }
1081
1082 forkOnzh_fast
1083 {
1084   /* args: R1 = cpu, R2 = closure to spark */
1085
1086   MAYBE_GC(R2_PTR, forkOnzh_fast);
1087
1088   W_ cpu;
1089   W_ closure;
1090   W_ threadid;
1091   cpu = R1;
1092   closure = R2;
1093
1094   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
1095                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
1096                                 closure "ptr") [];
1097
1098   /* start blocked if the current thread is blocked */
1099   StgTSO_flags(threadid) = 
1100      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
1101                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
1102
1103   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1104
1105   // switch at the earliest opportunity
1106   Capability_context_switch(MyCapability()) = 1 :: CInt;
1107   
1108   RET_P(threadid);
1109 }
1110
1111 yieldzh_fast
1112 {
1113   jump stg_yield_noregs;
1114 }
1115
1116 myThreadIdzh_fast
1117 {
1118   /* no args. */
1119   RET_P(CurrentTSO);
1120 }
1121
1122 labelThreadzh_fast
1123 {
1124   /* args: 
1125         R1 = ThreadId#
1126         R2 = Addr# */
1127 #ifdef DEBUG
1128   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1129 #endif
1130   jump %ENTRY_CODE(Sp(0));
1131 }
1132
1133 isCurrentThreadBoundzh_fast
1134 {
1135   /* no args */
1136   W_ r;
1137   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1138   RET_N(r);
1139 }
1140
1141 threadStatuszh_fast
1142 {
1143     /* args: R1 :: ThreadId# */
1144     W_ tso;
1145     W_ why_blocked;
1146     W_ what_next;
1147     W_ ret;
1148
1149     tso = R1;
1150     loop:
1151       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1152           tso = StgTSO__link(tso);
1153           goto loop;
1154       }
1155
1156     what_next   = TO_W_(StgTSO_what_next(tso));
1157     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1158     // Note: these two reads are not atomic, so they might end up
1159     // being inconsistent.  It doesn't matter, since we
1160     // only return one or the other.  If we wanted to return the
1161     // contents of block_info too, then we'd have to do some synchronisation.
1162
1163     if (what_next == ThreadComplete) {
1164         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1165     } else {
1166         if (what_next == ThreadKilled) {
1167             ret = 17;
1168         } else {
1169             ret = why_blocked;
1170         }
1171     }
1172     RET_N(ret);
1173 }
1174
1175 /* -----------------------------------------------------------------------------
1176  * TVar primitives
1177  * -------------------------------------------------------------------------- */
1178
1179 #define SP_OFF 0
1180
1181 // Catch retry frame ------------------------------------------------------------
1182
1183 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1184 #if defined(PROFILING)
1185   W_ unused1, W_ unused2,
1186 #endif
1187   W_ unused3, P_ unused4, P_ unused5)
1188 {
1189    W_ r, frame, trec, outer;
1190
1191    frame = Sp;
1192    trec = StgTSO_trec(CurrentTSO);
1193    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1194    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1195    if (r != 0) {
1196      /* Succeeded (either first branch or second branch) */
1197      StgTSO_trec(CurrentTSO) = outer;
1198      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1199      jump %ENTRY_CODE(Sp(SP_OFF));
1200    } else {
1201      /* Did not commit: re-execute */
1202      W_ new_trec;
1203      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1204      StgTSO_trec(CurrentTSO) = new_trec;
1205      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1206        R1 = StgCatchRetryFrame_alt_code(frame);
1207      } else {
1208        R1 = StgCatchRetryFrame_first_code(frame);
1209      }
1210      jump stg_ap_v_fast;
1211    }
1212 }
1213
1214
1215 // Atomically frame ------------------------------------------------------------
1216
1217 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1218 #if defined(PROFILING)
1219   W_ unused1, W_ unused2,
1220 #endif
1221   P_ unused3, P_ unused4)
1222 {
1223   W_ frame, trec, valid, next_invariant, q, outer;
1224
1225   frame = Sp;
1226   trec = StgTSO_trec(CurrentTSO);
1227   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1228
1229   if (outer == NO_TREC) {
1230     /* First time back at the atomically frame -- pick up invariants */
1231     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1232     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1233
1234   } else {
1235     /* Second/subsequent time back at the atomically frame -- abort the
1236      * tx that's checking the invariant and move on to the next one */
1237     StgTSO_trec(CurrentTSO) = outer;
1238     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1239     StgInvariantCheckQueue_my_execution(q) = trec;
1240     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1241     /* Don't free trec -- it's linked from q and will be stashed in the
1242      * invariant if we eventually commit. */
1243     q = StgInvariantCheckQueue_next_queue_entry(q);
1244     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1245     trec = outer;
1246   }
1247
1248   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1249
1250   if (q != END_INVARIANT_CHECK_QUEUE) {
1251     /* We can't commit yet: another invariant to check */
1252     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1253     StgTSO_trec(CurrentTSO) = trec;
1254
1255     next_invariant = StgInvariantCheckQueue_invariant(q);
1256     R1 = StgAtomicInvariant_code(next_invariant);
1257     jump stg_ap_v_fast;
1258
1259   } else {
1260
1261     /* We've got no more invariants to check, try to commit */
1262     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1263     if (valid != 0) {
1264       /* Transaction was valid: commit succeeded */
1265       StgTSO_trec(CurrentTSO) = NO_TREC;
1266       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1267       jump %ENTRY_CODE(Sp(SP_OFF));
1268     } else {
1269       /* Transaction was not valid: try again */
1270       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1271       StgTSO_trec(CurrentTSO) = trec;
1272       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1273       R1 = StgAtomicallyFrame_code(frame);
1274       jump stg_ap_v_fast;
1275     }
1276   }
1277 }
1278
1279 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1280 #if defined(PROFILING)
1281   W_ unused1, W_ unused2,
1282 #endif
1283   P_ unused3, P_ unused4)
1284 {
1285   W_ frame, trec, valid;
1286
1287   frame = Sp;
1288
1289   /* The TSO is currently waiting: should we stop waiting? */
1290   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1291   if (valid != 0) {
1292     /* Previous attempt is still valid: no point trying again yet */
1293     jump stg_block_noregs;
1294   } else {
1295     /* Previous attempt is no longer valid: try again */
1296     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1297     StgTSO_trec(CurrentTSO) = trec;
1298     StgHeader_info(frame) = stg_atomically_frame_info;
1299     R1 = StgAtomicallyFrame_code(frame);
1300     jump stg_ap_v_fast;
1301   }
1302 }
1303
1304 // STM catch frame --------------------------------------------------------------
1305
1306 #define SP_OFF 0
1307
1308 /* Catch frames are very similar to update frames, but when entering
1309  * one we just pop the frame off the stack and perform the correct
1310  * kind of return to the activation record underneath us on the stack.
1311  */
1312
1313 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1314 #if defined(PROFILING)
1315   W_ unused1, W_ unused2,
1316 #endif
1317   P_ unused3, P_ unused4)
1318    {
1319       W_ r, frame, trec, outer;
1320       frame = Sp;
1321       trec = StgTSO_trec(CurrentTSO);
1322       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1323       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1324       if (r != 0) {
1325         /* Commit succeeded */
1326         StgTSO_trec(CurrentTSO) = outer;
1327         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1328         jump Sp(SP_OFF);
1329       } else {
1330         /* Commit failed */
1331         W_ new_trec;
1332         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1333         StgTSO_trec(CurrentTSO) = new_trec;
1334         R1 = StgCatchSTMFrame_code(frame);
1335         jump stg_ap_v_fast;
1336       }
1337    }
1338
1339
1340 // Primop definition ------------------------------------------------------------
1341
1342 atomicallyzh_fast
1343 {
1344   W_ frame;
1345   W_ old_trec;
1346   W_ new_trec;
1347   
1348   // stmStartTransaction may allocate
1349   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1350
1351   /* Args: R1 = m :: STM a */
1352   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1353
1354   old_trec = StgTSO_trec(CurrentTSO);
1355
1356   /* Nested transactions are not allowed; raise an exception */
1357   if (old_trec != NO_TREC) {
1358      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
1359      jump raisezh_fast;
1360   }
1361
1362   /* Set up the atomically frame */
1363   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1364   frame = Sp;
1365
1366   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1367   StgAtomicallyFrame_code(frame) = R1;
1368   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1369
1370   /* Start the memory transcation */
1371   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1372   StgTSO_trec(CurrentTSO) = new_trec;
1373
1374   /* Apply R1 to the realworld token */
1375   jump stg_ap_v_fast;
1376 }
1377
1378
1379 catchSTMzh_fast
1380 {
1381   W_ frame;
1382   
1383   /* Args: R1 :: STM a */
1384   /* Args: R2 :: Exception -> STM a */
1385   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1386
1387   /* Set up the catch frame */
1388   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1389   frame = Sp;
1390
1391   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1392   StgCatchSTMFrame_handler(frame) = R2;
1393   StgCatchSTMFrame_code(frame) = R1;
1394
1395   /* Start a nested transaction to run the body of the try block in */
1396   W_ cur_trec;  
1397   W_ new_trec;
1398   cur_trec = StgTSO_trec(CurrentTSO);
1399   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1400   StgTSO_trec(CurrentTSO) = new_trec;
1401
1402   /* Apply R1 to the realworld token */
1403   jump stg_ap_v_fast;
1404 }
1405
1406
1407 catchRetryzh_fast
1408 {
1409   W_ frame;
1410   W_ new_trec;
1411   W_ trec;
1412
1413   // stmStartTransaction may allocate
1414   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1415
1416   /* Args: R1 :: STM a */
1417   /* Args: R2 :: STM a */
1418   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1419
1420   /* Start a nested transaction within which to run the first code */
1421   trec = StgTSO_trec(CurrentTSO);
1422   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1423   StgTSO_trec(CurrentTSO) = new_trec;
1424
1425   /* Set up the catch-retry frame */
1426   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1427   frame = Sp;
1428   
1429   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1430   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1431   StgCatchRetryFrame_first_code(frame) = R1;
1432   StgCatchRetryFrame_alt_code(frame) = R2;
1433
1434   /* Apply R1 to the realworld token */
1435   jump stg_ap_v_fast;
1436 }
1437
1438
1439 retryzh_fast
1440 {
1441   W_ frame_type;
1442   W_ frame;
1443   W_ trec;
1444   W_ outer;
1445   W_ r;
1446
1447   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1448
1449   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1450 retry_pop_stack:
1451   StgTSO_sp(CurrentTSO) = Sp;
1452   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1453   Sp = StgTSO_sp(CurrentTSO);
1454   frame = Sp;
1455   trec = StgTSO_trec(CurrentTSO);
1456   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1457
1458   if (frame_type == CATCH_RETRY_FRAME) {
1459     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1460     ASSERT(outer != NO_TREC);
1461     // Abort the transaction attempting the current branch
1462     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1463     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1464     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1465       // Retry in the first branch: try the alternative
1466       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1467       StgTSO_trec(CurrentTSO) = trec;
1468       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1469       R1 = StgCatchRetryFrame_alt_code(frame);
1470       jump stg_ap_v_fast;
1471     } else {
1472       // Retry in the alternative code: propagate the retry
1473       StgTSO_trec(CurrentTSO) = outer;
1474       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1475       goto retry_pop_stack;
1476     }
1477   }
1478
1479   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1480   ASSERT(frame_type == ATOMICALLY_FRAME);
1481   if (outer != NO_TREC) {
1482     // We called retry while checking invariants, so abort the current
1483     // invariant check (merging its TVar accesses into the parents read
1484     // set so we'll wait on them)
1485     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1486     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1487     trec = outer;
1488     StgTSO_trec(CurrentTSO) = trec;
1489     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1490   }
1491   ASSERT(outer == NO_TREC);
1492
1493   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1494   if (r != 0) {
1495     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1496     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1497     Sp = frame;
1498     // Fix up the stack in the unregisterised case: the return convention is different.
1499     R3 = trec; // passing to stmWaitUnblock()
1500     jump stg_block_stmwait;
1501   } else {
1502     // Transaction was not valid: retry immediately
1503     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1504     StgTSO_trec(CurrentTSO) = trec;
1505     R1 = StgAtomicallyFrame_code(frame);
1506     Sp = frame;
1507     jump stg_ap_v_fast;
1508   }
1509 }
1510
1511
1512 checkzh_fast
1513 {
1514   W_ trec, closure;
1515
1516   /* Args: R1 = invariant closure */
1517   MAYBE_GC (R1_PTR, checkzh_fast); 
1518
1519   trec = StgTSO_trec(CurrentTSO);
1520   closure = R1;
1521   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1522                                      trec "ptr",
1523                                      closure "ptr") [];
1524
1525   jump %ENTRY_CODE(Sp(0));
1526 }
1527
1528
1529 newTVarzh_fast
1530 {
1531   W_ tv;
1532   W_ new_value;
1533
1534   /* Args: R1 = initialisation value */
1535
1536   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1537   new_value = R1;
1538   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1539   RET_P(tv);
1540 }
1541
1542
1543 readTVarzh_fast
1544 {
1545   W_ trec;
1546   W_ tvar;
1547   W_ result;
1548
1549   /* Args: R1 = TVar closure */
1550
1551   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1552   trec = StgTSO_trec(CurrentTSO);
1553   tvar = R1;
1554   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1555
1556   RET_P(result);
1557 }
1558
1559 readTVarIOzh_fast
1560 {
1561     W_ result;
1562
1563 again:
1564     result = StgTVar_current_value(R1);
1565     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1566         goto again;
1567     }
1568     RET_P(result);
1569 }
1570
1571 writeTVarzh_fast
1572 {
1573   W_ trec;
1574   W_ tvar;
1575   W_ new_value;
1576   
1577   /* Args: R1 = TVar closure */
1578   /*       R2 = New value    */
1579
1580   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1581   trec = StgTSO_trec(CurrentTSO);
1582   tvar = R1;
1583   new_value = R2;
1584   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1585
1586   jump %ENTRY_CODE(Sp(0));
1587 }
1588
1589
1590 /* -----------------------------------------------------------------------------
1591  * MVar primitives
1592  *
1593  * take & putMVar work as follows.  Firstly, an important invariant:
1594  *
1595  *    If the MVar is full, then the blocking queue contains only
1596  *    threads blocked on putMVar, and if the MVar is empty then the
1597  *    blocking queue contains only threads blocked on takeMVar.
1598  *
1599  * takeMvar:
1600  *    MVar empty : then add ourselves to the blocking queue
1601  *    MVar full  : remove the value from the MVar, and
1602  *                 blocking queue empty     : return
1603  *                 blocking queue non-empty : perform the first blocked putMVar
1604  *                                            from the queue, and wake up the
1605  *                                            thread (MVar is now full again)
1606  *
1607  * putMVar is just the dual of the above algorithm.
1608  *
1609  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1610  * the stack of the thread waiting to do the putMVar.  See
1611  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1612  * the stack layout, and the PerformPut and PerformTake macros below.
1613  *
1614  * It is important that a blocked take or put is woken up with the
1615  * take/put already performed, because otherwise there would be a
1616  * small window of vulnerability where the thread could receive an
1617  * exception and never perform its take or put, and we'd end up with a
1618  * deadlock.
1619  *
1620  * -------------------------------------------------------------------------- */
1621
1622 isEmptyMVarzh_fast
1623 {
1624     /* args: R1 = MVar closure */
1625
1626     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1627         RET_N(1);
1628     } else {
1629         RET_N(0);
1630     }
1631 }
1632
1633 newMVarzh_fast
1634 {
1635     /* args: none */
1636     W_ mvar;
1637
1638     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1639   
1640     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1641     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1642         // MVARs start dirty: generation 0 has no mutable list
1643     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1644     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1645     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1646     RET_P(mvar);
1647 }
1648
1649
1650 #define PerformTake(tso, value)                         \
1651     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1652     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1653
1654 #define PerformPut(tso,lval)                    \
1655     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1656     lval = W_[StgTSO_sp(tso) - WDS(1)];
1657
1658 takeMVarzh_fast
1659 {
1660     W_ mvar, val, info, tso;
1661
1662     /* args: R1 = MVar closure */
1663     mvar = R1;
1664
1665 #if defined(THREADED_RTS)
1666     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1667 #else
1668     info = GET_INFO(mvar);
1669 #endif
1670         
1671     if (info == stg_MVAR_CLEAN_info) {
1672         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1673     }
1674
1675     /* If the MVar is empty, put ourselves on its blocking queue,
1676      * and wait until we're woken up.
1677      */
1678     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1679         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1680             StgMVar_head(mvar) = CurrentTSO;
1681         } else {
1682             foreign "C" setTSOLink(MyCapability() "ptr", 
1683                                    StgMVar_tail(mvar) "ptr",
1684                                    CurrentTSO) [];
1685         }
1686         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1687         StgTSO_block_info(CurrentTSO)  = mvar;
1688         // write barrier for throwTo(), which looks at block_info
1689         // if why_blocked==BlockedOnMVar.
1690         prim %write_barrier() [];
1691         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1692         StgMVar_tail(mvar) = CurrentTSO;
1693         
1694         R1 = mvar;
1695         jump stg_block_takemvar;
1696   }
1697
1698   /* we got the value... */
1699   val = StgMVar_value(mvar);
1700
1701   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1702   {
1703       /* There are putMVar(s) waiting... 
1704        * wake up the first thread on the queue
1705        */
1706       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1707
1708       /* actually perform the putMVar for the thread that we just woke up */
1709       tso = StgMVar_head(mvar);
1710       PerformPut(tso,StgMVar_value(mvar));
1711
1712       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1713           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1714       }
1715
1716       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1717                                             StgMVar_head(mvar) "ptr", 1) [];
1718       StgMVar_head(mvar) = tso;
1719
1720       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1721           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1722       }
1723
1724 #if defined(THREADED_RTS)
1725       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1726 #else
1727       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1728 #endif
1729       RET_P(val);
1730   } 
1731   else
1732   {
1733       /* No further putMVars, MVar is now empty */
1734       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1735  
1736 #if defined(THREADED_RTS)
1737       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1738 #else
1739       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1740 #endif
1741
1742       RET_P(val);
1743   }
1744 }
1745
1746
1747 tryTakeMVarzh_fast
1748 {
1749     W_ mvar, val, info, tso;
1750
1751     /* args: R1 = MVar closure */
1752
1753     mvar = R1;
1754
1755 #if defined(THREADED_RTS)
1756     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1757 #else
1758     info = GET_INFO(mvar);
1759 #endif
1760
1761     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1762 #if defined(THREADED_RTS)
1763         unlockClosure(mvar, info);
1764 #endif
1765         /* HACK: we need a pointer to pass back, 
1766          * so we abuse NO_FINALIZER_closure
1767          */
1768         RET_NP(0, stg_NO_FINALIZER_closure);
1769     }
1770
1771     if (info == stg_MVAR_CLEAN_info) {
1772         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1773     }
1774
1775     /* we got the value... */
1776     val = StgMVar_value(mvar);
1777
1778     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1779
1780         /* There are putMVar(s) waiting... 
1781          * wake up the first thread on the queue
1782          */
1783         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1784
1785         /* actually perform the putMVar for the thread that we just woke up */
1786         tso = StgMVar_head(mvar);
1787         PerformPut(tso,StgMVar_value(mvar));
1788         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1789             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1790         }
1791
1792         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1793                                               StgMVar_head(mvar) "ptr", 1) [];
1794         StgMVar_head(mvar) = tso;
1795
1796         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1797             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1798         }
1799 #if defined(THREADED_RTS)
1800         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1801 #else
1802         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1803 #endif
1804     }
1805     else 
1806     {
1807         /* No further putMVars, MVar is now empty */
1808         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1809 #if defined(THREADED_RTS)
1810         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1811 #else
1812         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1813 #endif
1814     }
1815     
1816     RET_NP(1, val);
1817 }
1818
1819
1820 putMVarzh_fast
1821 {
1822     W_ mvar, val, info, tso;
1823
1824     /* args: R1 = MVar, R2 = value */
1825     mvar = R1;
1826     val  = R2;
1827
1828 #if defined(THREADED_RTS)
1829     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1830 #else
1831     info = GET_INFO(mvar);
1832 #endif
1833
1834     if (info == stg_MVAR_CLEAN_info) {
1835         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1836     }
1837
1838     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1839         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1840             StgMVar_head(mvar) = CurrentTSO;
1841         } else {
1842             foreign "C" setTSOLink(MyCapability() "ptr", 
1843                                    StgMVar_tail(mvar) "ptr",
1844                                    CurrentTSO) [];
1845         }
1846         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1847         StgTSO_block_info(CurrentTSO)  = mvar;
1848         // write barrier for throwTo(), which looks at block_info
1849         // if why_blocked==BlockedOnMVar.
1850         prim %write_barrier() [];
1851         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1852         StgMVar_tail(mvar) = CurrentTSO;
1853         
1854         R1 = mvar;
1855         R2 = val;
1856         jump stg_block_putmvar;
1857     }
1858   
1859     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1860
1861         /* There are takeMVar(s) waiting: wake up the first one
1862          */
1863         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1864
1865         /* actually perform the takeMVar */
1866         tso = StgMVar_head(mvar);
1867         PerformTake(tso, val);
1868         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1869             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1870         }
1871       
1872         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1873                                               StgMVar_head(mvar) "ptr", 1) [];
1874         StgMVar_head(mvar) = tso;
1875
1876         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1877             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1878         }
1879
1880 #if defined(THREADED_RTS)
1881         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1882 #else
1883         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1884 #endif
1885         jump %ENTRY_CODE(Sp(0));
1886     }
1887     else
1888     {
1889         /* No further takes, the MVar is now full. */
1890         StgMVar_value(mvar) = val;
1891
1892 #if defined(THREADED_RTS)
1893         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1894 #else
1895         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1896 #endif
1897         jump %ENTRY_CODE(Sp(0));
1898     }
1899     
1900     /* ToDo: yield afterward for better communication performance? */
1901 }
1902
1903
1904 tryPutMVarzh_fast
1905 {
1906     W_ mvar, info, tso;
1907
1908     /* args: R1 = MVar, R2 = value */
1909     mvar = R1;
1910
1911 #if defined(THREADED_RTS)
1912     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1913 #else
1914     info = GET_INFO(mvar);
1915 #endif
1916
1917     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1918 #if defined(THREADED_RTS)
1919         unlockClosure(mvar, info);
1920 #endif
1921         RET_N(0);
1922     }
1923   
1924     if (info == stg_MVAR_CLEAN_info) {
1925         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1926     }
1927
1928     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1929
1930         /* There are takeMVar(s) waiting: wake up the first one
1931          */
1932         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1933         
1934         /* actually perform the takeMVar */
1935         tso = StgMVar_head(mvar);
1936         PerformTake(tso, R2);
1937         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1938             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1939         }
1940       
1941         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1942                                               StgMVar_head(mvar) "ptr", 1) [];
1943         StgMVar_head(mvar) = tso;
1944
1945         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1946             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1947         }
1948
1949 #if defined(THREADED_RTS)
1950         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1951 #else
1952         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1953 #endif
1954     }
1955     else
1956     {
1957         /* No further takes, the MVar is now full. */
1958         StgMVar_value(mvar) = R2;
1959
1960 #if defined(THREADED_RTS)
1961         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1962 #else
1963         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1964 #endif
1965     }
1966     
1967     RET_N(1);
1968     /* ToDo: yield afterward for better communication performance? */
1969 }
1970
1971
1972 /* -----------------------------------------------------------------------------
1973    Stable pointer primitives
1974    -------------------------------------------------------------------------  */
1975
1976 makeStableNamezh_fast
1977 {
1978     W_ index, sn_obj;
1979
1980     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1981   
1982     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1983
1984     /* Is there already a StableName for this heap object?
1985      *  stable_ptr_table is a pointer to an array of snEntry structs.
1986      */
1987     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1988         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1989         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1990         StgStableName_sn(sn_obj) = index;
1991         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1992     } else {
1993         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1994     }
1995     
1996     RET_P(sn_obj);
1997 }
1998
1999
2000 makeStablePtrzh_fast
2001 {
2002     /* Args: R1 = a */
2003     W_ sp;
2004     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
2005     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
2006     RET_N(sp);
2007 }
2008
2009 deRefStablePtrzh_fast
2010 {
2011     /* Args: R1 = the stable ptr */
2012     W_ r, sp;
2013     sp = R1;
2014     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
2015     RET_P(r);
2016 }
2017
2018 /* -----------------------------------------------------------------------------
2019    Bytecode object primitives
2020    -------------------------------------------------------------------------  */
2021
2022 newBCOzh_fast
2023 {
2024     /* R1 = instrs
2025        R2 = literals
2026        R3 = ptrs
2027        R4 = arity
2028        R5 = bitmap array
2029     */
2030     W_ bco, bitmap_arr, bytes, words;
2031     
2032     bitmap_arr = R5;
2033
2034     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
2035     bytes = WDS(words);
2036
2037     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
2038
2039     bco = Hp - bytes + WDS(1);
2040     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
2041     
2042     StgBCO_instrs(bco)     = R1;
2043     StgBCO_literals(bco)   = R2;
2044     StgBCO_ptrs(bco)       = R3;
2045     StgBCO_arity(bco)      = HALF_W_(R4);
2046     StgBCO_size(bco)       = HALF_W_(words);
2047     
2048     // Copy the arity/bitmap info into the BCO
2049     W_ i;
2050     i = 0;
2051 for:
2052     if (i < StgArrWords_words(bitmap_arr)) {
2053         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
2054         i = i + 1;
2055         goto for;
2056     }
2057     
2058     RET_P(bco);
2059 }
2060
2061
2062 mkApUpd0zh_fast
2063 {
2064     // R1 = the BCO# for the AP
2065     //  
2066     W_ ap;
2067
2068     // This function is *only* used to wrap zero-arity BCOs in an
2069     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
2070     // saturated and always points directly to a FUN or BCO.
2071     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
2072            StgBCO_arity(R1) == HALF_W_(0));
2073
2074     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
2075     TICK_ALLOC_UP_THK(0, 0);
2076     CCCS_ALLOC(SIZEOF_StgAP);
2077
2078     ap = Hp - SIZEOF_StgAP + WDS(1);
2079     SET_HDR(ap, stg_AP_info, W_[CCCS]);
2080     
2081     StgAP_n_args(ap) = HALF_W_(0);
2082     StgAP_fun(ap) = R1;
2083     
2084     RET_P(ap);
2085 }
2086
2087 unpackClosurezh_fast
2088 {
2089 /* args: R1 = closure to analyze */
2090 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2091
2092     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2093     info  = %GET_STD_INFO(UNTAG(R1));
2094
2095     // Some closures have non-standard layout, so we omit those here.
2096     W_ type;
2097     type = TO_W_(%INFO_TYPE(info));
2098     switch [0 .. N_CLOSURE_TYPES] type {
2099     case THUNK_SELECTOR : {
2100         ptrs = 1;
2101         nptrs = 0;
2102         goto out;
2103     }
2104     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2105          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2106         ptrs = 0;
2107         nptrs = 0;
2108         goto out;
2109     }
2110     default: {
2111         ptrs  = TO_W_(%INFO_PTRS(info)); 
2112         nptrs = TO_W_(%INFO_NPTRS(info));
2113         goto out;
2114     }}
2115 out:
2116
2117     W_ ptrs_arr_sz, nptrs_arr_sz;
2118     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2119     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2120
2121     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2122
2123     W_ clos;
2124     clos = UNTAG(R1);
2125
2126     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2127     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2128
2129     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2130     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2131     p = 0;
2132 for:
2133     if(p < ptrs) {
2134          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2135          p = p + 1;
2136          goto for;
2137     }
2138     
2139     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2140     StgArrWords_words(nptrs_arr) = nptrs;
2141     p = 0;
2142 for2:
2143     if(p < nptrs) {
2144          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2145          p = p + 1;
2146          goto for2;
2147     }
2148     RET_NPP(info, ptrs_arr, nptrs_arr);
2149 }
2150
2151 /* -----------------------------------------------------------------------------
2152    Thread I/O blocking primitives
2153    -------------------------------------------------------------------------- */
2154
2155 /* Add a thread to the end of the blocked queue. (C-- version of the C
2156  * macro in Schedule.h).
2157  */
2158 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2159     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2160     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2161       W_[blocked_queue_hd] = tso;                       \
2162     } else {                                            \
2163       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2164     }                                                   \
2165     W_[blocked_queue_tl] = tso;
2166
2167 waitReadzh_fast
2168 {
2169     /* args: R1 */
2170 #ifdef THREADED_RTS
2171     foreign "C" barf("waitRead# on threaded RTS") never returns;
2172 #else
2173
2174     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2175     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2176     StgTSO_block_info(CurrentTSO) = R1;
2177     // No locking - we're not going to use this interface in the
2178     // threaded RTS anyway.
2179     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2180     jump stg_block_noregs;
2181 #endif
2182 }
2183
2184 waitWritezh_fast
2185 {
2186     /* args: R1 */
2187 #ifdef THREADED_RTS
2188     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2189 #else
2190
2191     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2192     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2193     StgTSO_block_info(CurrentTSO) = R1;
2194     // No locking - we're not going to use this interface in the
2195     // threaded RTS anyway.
2196     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2197     jump stg_block_noregs;
2198 #endif
2199 }
2200
2201
2202 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2203 delayzh_fast
2204 {
2205 #ifdef mingw32_HOST_OS
2206     W_ ares;
2207     CInt reqID;
2208 #else
2209     W_ t, prev, target;
2210 #endif
2211
2212 #ifdef THREADED_RTS
2213     foreign "C" barf("delay# on threaded RTS") never returns;
2214 #else
2215
2216     /* args: R1 (microsecond delay amount) */
2217     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2218     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2219
2220 #ifdef mingw32_HOST_OS
2221
2222     /* could probably allocate this on the heap instead */
2223     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2224                                             stg_delayzh_malloc_str);
2225     (reqID) = foreign "C" addDelayRequest(R1);
2226     StgAsyncIOResult_reqID(ares)   = reqID;
2227     StgAsyncIOResult_len(ares)     = 0;
2228     StgAsyncIOResult_errCode(ares) = 0;
2229     StgTSO_block_info(CurrentTSO)  = ares;
2230
2231     /* Having all async-blocked threads reside on the blocked_queue
2232      * simplifies matters, so change the status to OnDoProc put the
2233      * delayed thread on the blocked_queue.
2234      */
2235     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2236     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2237     jump stg_block_async_void;
2238
2239 #else
2240
2241     W_ time;
2242     W_ divisor;
2243     (time) = foreign "C" getourtimeofday() [R1];
2244     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2245     if (divisor == 0) {
2246         divisor = 50;
2247     }
2248     divisor = divisor * 1000;
2249     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2250            + time + 1; /* Add 1 as getourtimeofday rounds down */
2251     StgTSO_block_info(CurrentTSO) = target;
2252
2253     /* Insert the new thread in the sleeping queue. */
2254     prev = NULL;
2255     t = W_[sleeping_queue];
2256 while:
2257     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2258         prev = t;
2259         t = StgTSO__link(t);
2260         goto while;
2261     }
2262
2263     StgTSO__link(CurrentTSO) = t;
2264     if (prev == NULL) {
2265         W_[sleeping_queue] = CurrentTSO;
2266     } else {
2267         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2268     }
2269     jump stg_block_noregs;
2270 #endif
2271 #endif /* !THREADED_RTS */
2272 }
2273
2274
2275 #ifdef mingw32_HOST_OS
2276 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2277 asyncReadzh_fast
2278 {
2279     W_ ares;
2280     CInt reqID;
2281
2282 #ifdef THREADED_RTS
2283     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2284 #else
2285
2286     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2287     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2288     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2289
2290     /* could probably allocate this on the heap instead */
2291     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2292                                             stg_asyncReadzh_malloc_str)
2293                         [R1,R2,R3,R4];
2294     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2295     StgAsyncIOResult_reqID(ares)   = reqID;
2296     StgAsyncIOResult_len(ares)     = 0;
2297     StgAsyncIOResult_errCode(ares) = 0;
2298     StgTSO_block_info(CurrentTSO)  = ares;
2299     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2300     jump stg_block_async;
2301 #endif
2302 }
2303
2304 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2305 asyncWritezh_fast
2306 {
2307     W_ ares;
2308     CInt reqID;
2309
2310 #ifdef THREADED_RTS
2311     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2312 #else
2313
2314     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2315     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2316     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2317
2318     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2319                                             stg_asyncWritezh_malloc_str)
2320                         [R1,R2,R3,R4];
2321     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2322
2323     StgAsyncIOResult_reqID(ares)   = reqID;
2324     StgAsyncIOResult_len(ares)     = 0;
2325     StgAsyncIOResult_errCode(ares) = 0;
2326     StgTSO_block_info(CurrentTSO)  = ares;
2327     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2328     jump stg_block_async;
2329 #endif
2330 }
2331
2332 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2333 asyncDoProczh_fast
2334 {
2335     W_ ares;
2336     CInt reqID;
2337
2338 #ifdef THREADED_RTS
2339     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2340 #else
2341
2342     /* args: R1 = proc, R2 = param */
2343     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2344     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2345
2346     /* could probably allocate this on the heap instead */
2347     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2348                                             stg_asyncDoProczh_malloc_str) 
2349                                 [R1,R2];
2350     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2351     StgAsyncIOResult_reqID(ares)   = reqID;
2352     StgAsyncIOResult_len(ares)     = 0;
2353     StgAsyncIOResult_errCode(ares) = 0;
2354     StgTSO_block_info(CurrentTSO) = ares;
2355     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2356     jump stg_block_async;
2357 #endif
2358 }
2359 #endif
2360
2361 // noDuplicate# tries to ensure that none of the thunks under
2362 // evaluation by the current thread are also under evaluation by
2363 // another thread.  It relies on *both* threads doing noDuplicate#;
2364 // the second one will get blocked if they are duplicating some work.
2365 noDuplicatezh_fast
2366 {
2367     SAVE_THREAD_STATE();
2368     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2369     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2370     
2371     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2372         jump stg_threadFinished;
2373     } else {
2374         LOAD_THREAD_STATE();
2375         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2376         jump %ENTRY_CODE(Sp(0));
2377     }
2378 }
2379
2380 getApStackValzh_fast
2381 {
2382    W_ ap_stack, offset, val, ok;
2383
2384    /* args: R1 = AP_STACK, R2 = offset */
2385    ap_stack = R1;
2386    offset   = R2;
2387
2388    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2389         ok = 1;
2390         val = StgAP_STACK_payload(ap_stack,offset); 
2391    } else {
2392         ok = 0;
2393         val = R1;
2394    }
2395    RET_NP(ok,val);
2396 }
2397
2398 /* -----------------------------------------------------------------------------
2399    Misc. primitives
2400    -------------------------------------------------------------------------- */
2401
2402 // Write the cost center stack of the first argument on stderr; return
2403 // the second.  Possibly only makes sense for already evaluated
2404 // things?
2405 traceCcszh_fast
2406 {
2407     W_ ccs;
2408
2409 #ifdef PROFILING
2410     ccs = StgHeader_ccs(UNTAG(R1));
2411     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2412 #endif
2413
2414     R1 = R2;
2415     ENTER();
2416 }
2417
2418 getSparkzh_fast
2419 {
2420    W_ spark;
2421
2422 #ifndef THREADED_RTS
2423    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2424 #else
2425    (spark) = foreign "C" findSpark(MyCapability());
2426    if (spark != 0) {
2427       RET_NP(1,spark);
2428    } else {
2429       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2430    }
2431 #endif
2432 }