rts/PrimOps.cmm

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Out-of-line primitive operations
   6  *
   7  * This file contains the implementations of all the primitive
   8  * operations ("primops") which are not expanded inline.  See
   9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
  10  * this file contains code for most of those with the attribute
  11  * out_of_line=True.
  12  *
  13  * Entry convention: the entry convention for a primop is that all the
  14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
  15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
  16  *
  17  * Return convention: results from a primop are generally returned
  18  * using the ordinary unboxed tuple return convention.  The C-- parser
  19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
  20  * based on the prevailing return convention.
  21  *
  22  * This file is written in a subset of C--, extended with various
  23  * features specific to GHC.  It is compiled by GHC directly.  For the
  24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
  25  *
  26  * ---------------------------------------------------------------------------*/
  27
  28 #include "Cmm.h"
  29
  30 #ifdef __PIC__
  31 import pthread_mutex_lock;
  32 import pthread_mutex_unlock;
  33 #endif
  34 import base_ControlziExceptionziBase_nestedAtomically_closure;
  35 import EnterCriticalSection;
  36 import LeaveCriticalSection;
  37 import ghczmprim_GHCziTypes_False_closure;
  38 #if !defined(mingw32_HOST_OS)
  39 import sm_mutex;
  40 #endif
  41
  42 /*-----------------------------------------------------------------------------
  43   Array Primitives
  44
  45   Basically just new*Array - the others are all inline macros.
  46
  47   The size arg is always passed in R1, and the result returned in R1.
  48
  49   The slow entry point is for returning from a heap check, the saved
  50   size argument must be re-loaded from the stack.
  51   -------------------------------------------------------------------------- */
  52
  53 /* for objects that are *less* than the size of a word, make sure we
  54  * round up to the nearest word for the size of the array.
  55  */
  56
  57 stg_newByteArrayzh
  58 {
  59     W_ words, payload_words, n, p;
  60     MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
  61     n = R1;
  62     payload_words = ROUNDUP_BYTES_TO_WDS(n);
  63     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
  64     ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
  65     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  66     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  67     StgArrWords_bytes(p) = n;
  68     RET_P(p);
  69 }
  70
  71 #define BA_ALIGN 16
  72 #define BA_MASK  (BA_ALIGN-1)
  73
  74 stg_newPinnedByteArrayzh
  75 {
  76     W_ words, n, bytes, payload_words, p;
  77
  78     MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
  79     n = R1;
  80     bytes = n;
  81     /* payload_words is what we will tell the profiler we had to allocate */
  82     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
  83     /* When we actually allocate memory, we need to allow space for the
  84        header: */
  85     bytes = bytes + SIZEOF_StgArrWords;
  86     /* And we want to align to BA_ALIGN bytes, so we need to allow space
  87        to shift up to BA_ALIGN - 1 bytes: */
  88     bytes = bytes + BA_ALIGN - 1;
  89     /* Now we convert to a number of words: */
  90     words = ROUNDUP_BYTES_TO_WDS(bytes);
  91
  92     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
  93     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  94
  95     /* Now we need to move p forward so that the payload is aligned
  96        to BA_ALIGN bytes: */
  97     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
  98
  99     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 100     StgArrWords_bytes(p) = n;
 101     RET_P(p);
 102 }
 103
 104 stg_newAlignedPinnedByteArrayzh
 105 {
 106     W_ words, n, bytes, payload_words, p, alignment;
 107
 108     MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
 109     n = R1;
 110     alignment = R2;
 111
 112     /* we always supply at least word-aligned memory, so there's no
 113        need to allow extra space for alignment if the requirement is less
 114        than a word.  This also prevents mischief with alignment == 0. */
 115     if (alignment <= SIZEOF_W) { alignment = 1; }
 116
 117     bytes = n;
 118
 119     /* payload_words is what we will tell the profiler we had to allocate */
 120     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
 121
 122     /* When we actually allocate memory, we need to allow space for the
 123        header: */
 124     bytes = bytes + SIZEOF_StgArrWords;
 125     /* And we want to align to <alignment> bytes, so we need to allow space
 126        to shift up to <alignment - 1> bytes: */
 127     bytes = bytes + alignment - 1;
 128     /* Now we convert to a number of words: */
 129     words = ROUNDUP_BYTES_TO_WDS(bytes);
 130
 131     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
 132     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 133
 134     /* Now we need to move p forward so that the payload is aligned
 135        to <alignment> bytes. Note that we are assuming that
 136        <alignment> is a power of 2, which is technically not guaranteed */
 137     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 138
 139     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 140     StgArrWords_bytes(p) = n;
 141     RET_P(p);
 142 }
 143
 144 stg_newArrayzh
 145 {
 146     W_ words, n, init, arr, p, size;
 147     /* Args: R1 = words, R2 = initialisation value */
 148
 149     n = R1;
 150     MAYBE_GC(R2_PTR,stg_newArrayzh);
 151
 152     // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
 153     // in the array, making sure we round up, and then rounding up to a whole
 154     // number of words.
 155     size = n + mutArrPtrsCardWords(n);
 156     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
 157     ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
 158     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
 159
 160     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
 161     StgMutArrPtrs_ptrs(arr) = n;
 162     StgMutArrPtrs_size(arr) = size;
 163
 164     // Initialise all elements of the the array with the value in R2
 165     init = R2;
 166     p = arr + SIZEOF_StgMutArrPtrs;
 167   for:
 168     if (p < arr + WDS(words)) {
 169         W_[p] = init;
 170         p = p + WDS(1);
 171         goto for;
 172     }
 173     // Initialise the mark bits with 0
 174   for2:
 175     if (p < arr + WDS(size)) {
 176         W_[p] = 0;
 177         p = p + WDS(1);
 178         goto for2;
 179     }
 180
 181     RET_P(arr);
 182 }
 183
 184 stg_unsafeThawArrayzh
 185 {
 186   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
 187   //
 188   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
 189   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
 190   // it on the mutable list for the GC to remove (removing something from
 191   // the mutable list is not easy).
 192   //
 193   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
 194   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
 195   // to indicate that it is still on the mutable list.
 196   //
 197   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
 198   // either it is on a mut_list, or it isn't.  We adopt the convention that
 199   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
 200   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
 201   // we put it on the mutable list more than once, but it would get scavenged
 202   // multiple times during GC, which would be unnecessarily slow.
 203   //
 204   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
 205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 206         recordMutable(R1, R1);
 207         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
 208         RET_P(R1);
 209   } else {
 210         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 211         RET_P(R1);
 212   }
 213 }
 214
 215 /* -----------------------------------------------------------------------------
 216    MutVar primitives
 217    -------------------------------------------------------------------------- */
 218
 219 stg_newMutVarzh
 220 {
 221     W_ mv;
 222     /* Args: R1 = initialisation value */
 223
 224     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
 225
 226     mv = Hp - SIZEOF_StgMutVar + WDS(1);
 227     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
 228     StgMutVar_var(mv) = R1;
 229
 230     RET_P(mv);
 231 }
 232
 233 stg_atomicModifyMutVarzh
 234 {
 235     W_ mv, f, z, x, y, r, h;
 236     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
 237
 238     /* If x is the current contents of the MutVar#, then
 239        We want to make the new contents point to
 240
 241          (sel_0 (f x))
 242
 243        and the return value is
 244
 245          (sel_1 (f x))
 246
 247         obviously we can share (f x).
 248
 249          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
 250          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
 251          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
 252     */
 253
 254 #if MIN_UPD_SIZE > 1
 255 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 256 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
 257 #else
 258 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
 259 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
 260 #endif
 261
 262 #if MIN_UPD_SIZE > 2
 263 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 264 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
 265 #else
 266 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
 267 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 268 #endif
 269
 270 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
 271
 272    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
 273
 274    mv = R1;
 275    f = R2;
 276
 277    TICK_ALLOC_THUNK_2();
 278    CCCS_ALLOC(THUNK_2_SIZE);
 279    z = Hp - THUNK_2_SIZE + WDS(1);
 280    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
 281    LDV_RECORD_CREATE(z);
 282    StgThunk_payload(z,0) = f;
 283
 284    TICK_ALLOC_THUNK_1();
 285    CCCS_ALLOC(THUNK_1_SIZE);
 286    y = z - THUNK_1_SIZE;
 287    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
 288    LDV_RECORD_CREATE(y);
 289    StgThunk_payload(y,0) = z;
 290
 291    TICK_ALLOC_THUNK_1();
 292    CCCS_ALLOC(THUNK_1_SIZE);
 293    r = y - THUNK_1_SIZE;
 294    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
 295    LDV_RECORD_CREATE(r);
 296    StgThunk_payload(r,0) = z;
 297
 298  retry:
 299    x = StgMutVar_var(mv);
 300    StgThunk_payload(z,1) = x;
 301 #ifdef THREADED_RTS
 302    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
 303    if (h != x) { goto retry; }
 304 #else
 305    StgMutVar_var(mv) = y;
 306 #endif
 307
 308    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
 309      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
 310    }
 311
 312    RET_P(r);
 313 }
 314
 315 /* -----------------------------------------------------------------------------
 316    Weak Pointer Primitives
 317    -------------------------------------------------------------------------- */
 318
 319 STRING(stg_weak_msg,"New weak pointer at %p\n")
 320
 321 stg_mkWeakzh
 322 {
 323   /* R1 = key
 324      R2 = value
 325      R3 = finalizer (or NULL)
 326   */
 327   W_ w;
 328
 329   if (R3 == NULL) {
 330     R3 = stg_NO_FINALIZER_closure;
 331   }
 332
 333   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
 334
 335   w = Hp - SIZEOF_StgWeak + WDS(1);
 336   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 337
 338   // We don't care about cfinalizer here.
 339   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
 340   // something else?
 341
 342   StgWeak_key(w)        = R1;
 343   StgWeak_value(w)      = R2;
 344   StgWeak_finalizer(w)  = R3;
 345   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
 346
 347   ACQUIRE_LOCK(sm_mutex);
 348   StgWeak_link(w)       = W_[weak_ptr_list];
 349   W_[weak_ptr_list]     = w;
 350   RELEASE_LOCK(sm_mutex);
 351
 352   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 353
 354   RET_P(w);
 355 }
 356
 357 stg_mkWeakForeignEnvzh
 358 {
 359   /* R1 = key
 360      R2 = value
 361      R3 = finalizer
 362      R4 = pointer
 363      R5 = has environment (0 or 1)
 364      R6 = environment
 365   */
 366   W_ w, payload_words, words, p;
 367
 368   W_ key, val, fptr, ptr, flag, eptr;
 369
 370   key  = R1;
 371   val  = R2;
 372   fptr = R3;
 373   ptr  = R4;
 374   flag = R5;
 375   eptr = R6;
 376
 377   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
 378
 379   w = Hp - SIZEOF_StgWeak + WDS(1);
 380   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 381
 382   payload_words = 4;
 383   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
 384   ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
 385
 386   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 387   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 388
 389   StgArrWords_bytes(p)     = WDS(payload_words);
 390   StgArrWords_payload(p,0) = fptr;
 391   StgArrWords_payload(p,1) = ptr;
 392   StgArrWords_payload(p,2) = eptr;
 393   StgArrWords_payload(p,3) = flag;
 394
 395   // We don't care about the value here.
 396   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
 397
 398   StgWeak_key(w)        = key;
 399   StgWeak_value(w)      = val;
 400   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
 401   StgWeak_cfinalizer(w) = p;
 402
 403   ACQUIRE_LOCK(sm_mutex);
 404   StgWeak_link(w)   = W_[weak_ptr_list];
 405   W_[weak_ptr_list] = w;
 406   RELEASE_LOCK(sm_mutex);
 407
 408   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 409
 410   RET_P(w);
 411 }
 412
 413 stg_finalizzeWeakzh
 414 {
 415   /* R1 = weak ptr
 416    */
 417   W_ w, f, arr;
 418
 419   w = R1;
 420
 421   // already dead?
 422   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
 423       RET_NP(0,stg_NO_FINALIZER_closure);
 424   }
 425
 426   // kill it
 427 #ifdef PROFILING
 428   // @LDV profiling
 429   // A weak pointer is inherently used, so we do not need to call
 430   // LDV_recordDead_FILL_SLOP_DYNAMIC():
 431   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
 432   // or, LDV_recordDead():
 433   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
 434   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
 435   // large as weak pointers, so there is no need to fill the slop, either.
 436   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
 437 #endif
 438
 439   //
 440   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
 441   //
 442   SET_INFO(w,stg_DEAD_WEAK_info);
 443   LDV_RECORD_CREATE(w);
 444
 445   f   = StgWeak_finalizer(w);
 446   arr = StgWeak_cfinalizer(w);
 447
 448   StgDeadWeak_link(w) = StgWeak_link(w);
 449
 450   if (arr != stg_NO_FINALIZER_closure) {
 451     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
 452                               StgArrWords_payload(arr,1),
 453                               StgArrWords_payload(arr,2),
 454                               StgArrWords_payload(arr,3)) [];
 455   }
 456
 457   /* return the finalizer */
 458   if (f == stg_NO_FINALIZER_closure) {
 459       RET_NP(0,stg_NO_FINALIZER_closure);
 460   } else {
 461       RET_NP(1,f);
 462   }
 463 }
 464
 465 stg_deRefWeakzh
 466 {
 467   /* R1 = weak ptr */
 468   W_ w, code, val;
 469
 470   w = R1;
 471   if (GET_INFO(w) == stg_WEAK_info) {
 472     code = 1;
 473     val = StgWeak_value(w);
 474   } else {
 475     code = 0;
 476     val = w;
 477   }
 478   RET_NP(code,val);
 479 }
 480
 481 /* -----------------------------------------------------------------------------
 482    Floating point operations.
 483    -------------------------------------------------------------------------- */
 484
 485 stg_decodeFloatzuIntzh
 486 {
 487     W_ p;
 488     F_ arg;
 489     W_ mp_tmp1;
 490     W_ mp_tmp_w;
 491
 492     STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
 493
 494     mp_tmp1  = Sp - WDS(1);
 495     mp_tmp_w = Sp - WDS(2);
 496
 497     /* arguments: F1 = Float# */
 498     arg = F1;
 499
 500     /* Perform the operation */
 501     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
 502
 503     /* returns: (Int# (mantissa), Int# (exponent)) */
 504     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
 505 }
 506
 507 stg_decodeDoublezu2Intzh
 508 {
 509     D_ arg;
 510     W_ p;
 511     W_ mp_tmp1;
 512     W_ mp_tmp2;
 513     W_ mp_result1;
 514     W_ mp_result2;
 515
 516     STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
 517
 518     mp_tmp1    = Sp - WDS(1);
 519     mp_tmp2    = Sp - WDS(2);
 520     mp_result1 = Sp - WDS(3);
 521     mp_result2 = Sp - WDS(4);
 522
 523     /* arguments: D1 = Double# */
 524     arg = D1;
 525
 526     /* Perform the operation */
 527     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
 528                                     mp_result1 "ptr", mp_result2 "ptr",
 529                                     arg) [];
 530
 531     /* returns:
 532        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
 533     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
 534 }
 535
 536 /* -----------------------------------------------------------------------------
 537  * Concurrency primitives
 538  * -------------------------------------------------------------------------- */
 539
 540 stg_forkzh
 541 {
 542   /* args: R1 = closure to spark */
 543
 544   MAYBE_GC(R1_PTR, stg_forkzh);
 545
 546   W_ closure;
 547   W_ threadid;
 548   closure = R1;
 549
 550   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 551                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 552                                 closure "ptr") [];
 553
 554   /* start blocked if the current thread is blocked */
 555   StgTSO_flags(threadid) = %lobits16(
 556      TO_W_(StgTSO_flags(threadid)) |
 557      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 558
 559   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 560
 561   // context switch soon, but not immediately: we don't want every
 562   // forkIO to force a context-switch.
 563   Capability_context_switch(MyCapability()) = 1 :: CInt;
 564
 565   RET_P(threadid);
 566 }
 567
 568 stg_forkOnzh
 569 {
 570   /* args: R1 = cpu, R2 = closure to spark */
 571
 572   MAYBE_GC(R2_PTR, stg_forkOnzh);
 573
 574   W_ cpu;
 575   W_ closure;
 576   W_ threadid;
 577   cpu = R1;
 578   closure = R2;
 579
 580   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 581                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 582                                 closure "ptr") [];
 583
 584   /* start blocked if the current thread is blocked */
 585   StgTSO_flags(threadid) = %lobits16(
 586      TO_W_(StgTSO_flags(threadid)) |
 587      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 588
 589   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 590
 591   // context switch soon, but not immediately: we don't want every
 592   // forkIO to force a context-switch.
 593   Capability_context_switch(MyCapability()) = 1 :: CInt;
 594
 595   RET_P(threadid);
 596 }
 597
 598 stg_yieldzh
 599 {
 600   jump stg_yield_noregs;
 601 }
 602
 603 stg_myThreadIdzh
 604 {
 605   /* no args. */
 606   RET_P(CurrentTSO);
 607 }
 608
 609 stg_labelThreadzh
 610 {
 611   /* args:
 612         R1 = ThreadId#
 613         R2 = Addr# */
 614 #ifdef DEBUG
 615   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
 616 #endif
 617   jump %ENTRY_CODE(Sp(0));
 618 }
 619
 620 stg_isCurrentThreadBoundzh
 621 {
 622   /* no args */
 623   W_ r;
 624   (r) = foreign "C" isThreadBound(CurrentTSO) [];
 625   RET_N(r);
 626 }
 627
 628 stg_threadStatuszh
 629 {
 630     /* args: R1 :: ThreadId# */
 631     W_ tso;
 632     W_ why_blocked;
 633     W_ what_next;
 634     W_ ret;
 635
 636     tso = R1;
 637
 638     what_next   = TO_W_(StgTSO_what_next(tso));
 639     why_blocked = TO_W_(StgTSO_why_blocked(tso));
 640     // Note: these two reads are not atomic, so they might end up
 641     // being inconsistent.  It doesn't matter, since we
 642     // only return one or the other.  If we wanted to return the
 643     // contents of block_info too, then we'd have to do some synchronisation.
 644
 645     if (what_next == ThreadComplete) {
 646         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
 647     } else {
 648         if (what_next == ThreadKilled) {
 649             ret = 17;
 650         } else {
 651             ret = why_blocked;
 652         }
 653     }
 654     RET_N(ret);
 655 }
 656
 657 /* -----------------------------------------------------------------------------
 658  * TVar primitives
 659  * -------------------------------------------------------------------------- */
 660
 661 #define SP_OFF 0
 662
 663 // Catch retry frame ------------------------------------------------------------
 664
 665 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 666 #if defined(PROFILING)
 667   W_ unused1, W_ unused2,
 668 #endif
 669   W_ unused3, P_ unused4, P_ unused5)
 670 {
 671    W_ r, frame, trec, outer;
 672
 673    frame = Sp;
 674    trec = StgTSO_trec(CurrentTSO);
 675    outer  = StgTRecHeader_enclosing_trec(trec);
 676    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 677    if (r != 0) {
 678      /* Succeeded (either first branch or second branch) */
 679      StgTSO_trec(CurrentTSO) = outer;
 680      Sp = Sp + SIZEOF_StgCatchRetryFrame;
 681      jump %ENTRY_CODE(Sp(SP_OFF));
 682    } else {
 683      /* Did not commit: re-execute */
 684      W_ new_trec;
 685      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 686      StgTSO_trec(CurrentTSO) = new_trec;
 687      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 688        R1 = StgCatchRetryFrame_alt_code(frame);
 689      } else {
 690        R1 = StgCatchRetryFrame_first_code(frame);
 691      }
 692      jump stg_ap_v_fast;
 693    }
 694 }
 695
 696
 697 // Atomically frame ------------------------------------------------------------
 698
 699 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
 700 #if defined(PROFILING)
 701   W_ unused1, W_ unused2,
 702 #endif
 703   P_ code, P_ next_invariant_to_check, P_ result)
 704 {
 705   W_ frame, trec, valid, next_invariant, q, outer;
 706
 707   frame  = Sp;
 708   trec   = StgTSO_trec(CurrentTSO);
 709   result = R1;
 710   outer  = StgTRecHeader_enclosing_trec(trec);
 711
 712   if (outer == NO_TREC) {
 713     /* First time back at the atomically frame -- pick up invariants */
 714     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
 715     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 716     StgAtomicallyFrame_result(frame) = result;
 717
 718   } else {
 719     /* Second/subsequent time back at the atomically frame -- abort the
 720      * tx that's checking the invariant and move on to the next one */
 721     StgTSO_trec(CurrentTSO) = outer;
 722     q = StgAtomicallyFrame_next_invariant_to_check(frame);
 723     StgInvariantCheckQueue_my_execution(q) = trec;
 724     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 725     /* Don't free trec -- it's linked from q and will be stashed in the
 726      * invariant if we eventually commit. */
 727     q = StgInvariantCheckQueue_next_queue_entry(q);
 728     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 729     trec = outer;
 730   }
 731
 732   q = StgAtomicallyFrame_next_invariant_to_check(frame);
 733
 734   if (q != END_INVARIANT_CHECK_QUEUE) {
 735     /* We can't commit yet: another invariant to check */
 736     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
 737     StgTSO_trec(CurrentTSO) = trec;
 738
 739     next_invariant = StgInvariantCheckQueue_invariant(q);
 740     R1 = StgAtomicInvariant_code(next_invariant);
 741     jump stg_ap_v_fast;
 742
 743   } else {
 744
 745     /* We've got no more invariants to check, try to commit */
 746     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
 747     if (valid != 0) {
 748       /* Transaction was valid: commit succeeded */
 749       StgTSO_trec(CurrentTSO) = NO_TREC;
 750       R1 = StgAtomicallyFrame_result(frame);
 751       Sp = Sp + SIZEOF_StgAtomicallyFrame;
 752       jump %ENTRY_CODE(Sp(SP_OFF));
 753     } else {
 754       /* Transaction was not valid: try again */
 755       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 756       StgTSO_trec(CurrentTSO) = trec;
 757       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 758       R1 = StgAtomicallyFrame_code(frame);
 759       jump stg_ap_v_fast;
 760     }
 761   }
 762 }
 763
 764 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
 765 #if defined(PROFILING)
 766   W_ unused1, W_ unused2,
 767 #endif
 768   P_ code, P_ next_invariant_to_check, P_ result)
 769 {
 770   W_ frame, trec, valid;
 771
 772   frame = Sp;
 773
 774   /* The TSO is currently waiting: should we stop waiting? */
 775   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
 776   if (valid != 0) {
 777     /* Previous attempt is still valid: no point trying again yet */
 778     jump stg_block_noregs;
 779   } else {
 780     /* Previous attempt is no longer valid: try again */
 781     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 782     StgTSO_trec(CurrentTSO) = trec;
 783     StgHeader_info(frame) = stg_atomically_frame_info;
 784     R1 = StgAtomicallyFrame_code(frame);
 785     jump stg_ap_v_fast;
 786   }
 787 }
 788
 789 // STM catch frame --------------------------------------------------------------
 790
 791 #define SP_OFF 0
 792
 793 /* Catch frames are very similar to update frames, but when entering
 794  * one we just pop the frame off the stack and perform the correct
 795  * kind of return to the activation record underneath us on the stack.
 796  */
 797
 798 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
 799 #if defined(PROFILING)
 800   W_ unused1, W_ unused2,
 801 #endif
 802   P_ unused3, P_ unused4)
 803    {
 804       W_ r, frame, trec, outer;
 805       frame = Sp;
 806       trec = StgTSO_trec(CurrentTSO);
 807       outer  = StgTRecHeader_enclosing_trec(trec);
 808       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 809       if (r != 0) {
 810         /* Commit succeeded */
 811         StgTSO_trec(CurrentTSO) = outer;
 812         Sp = Sp + SIZEOF_StgCatchSTMFrame;
 813         jump Sp(SP_OFF);
 814       } else {
 815         /* Commit failed */
 816         W_ new_trec;
 817         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 818         StgTSO_trec(CurrentTSO) = new_trec;
 819         R1 = StgCatchSTMFrame_code(frame);
 820         jump stg_ap_v_fast;
 821       }
 822    }
 823
 824
 825 // Primop definition ------------------------------------------------------------
 826
 827 stg_atomicallyzh
 828 {
 829   W_ frame;
 830   W_ old_trec;
 831   W_ new_trec;
 832
 833   // stmStartTransaction may allocate
 834   MAYBE_GC (R1_PTR, stg_atomicallyzh);
 835
 836   /* Args: R1 = m :: STM a */
 837   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
 838
 839   old_trec = StgTSO_trec(CurrentTSO);
 840
 841   /* Nested transactions are not allowed; raise an exception */
 842   if (old_trec != NO_TREC) {
 843      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
 844      jump stg_raisezh;
 845   }
 846
 847   /* Set up the atomically frame */
 848   Sp = Sp - SIZEOF_StgAtomicallyFrame;
 849   frame = Sp;
 850
 851   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
 852   StgAtomicallyFrame_code(frame) = R1;
 853   StgAtomicallyFrame_result(frame) = NO_TREC;
 854   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 855
 856   /* Start the memory transcation */
 857   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
 858   StgTSO_trec(CurrentTSO) = new_trec;
 859
 860   /* Apply R1 to the realworld token */
 861   jump stg_ap_v_fast;
 862 }
 863
 864
 865 stg_catchSTMzh
 866 {
 867   W_ frame;
 868
 869   /* Args: R1 :: STM a */
 870   /* Args: R2 :: Exception -> STM a */
 871   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
 872
 873   /* Set up the catch frame */
 874   Sp = Sp - SIZEOF_StgCatchSTMFrame;
 875   frame = Sp;
 876
 877   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
 878   StgCatchSTMFrame_handler(frame) = R2;
 879   StgCatchSTMFrame_code(frame) = R1;
 880
 881   /* Start a nested transaction to run the body of the try block in */
 882   W_ cur_trec;
 883   W_ new_trec;
 884   cur_trec = StgTSO_trec(CurrentTSO);
 885   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
 886   StgTSO_trec(CurrentTSO) = new_trec;
 887
 888   /* Apply R1 to the realworld token */
 889   jump stg_ap_v_fast;
 890 }
 891
 892
 893 stg_catchRetryzh
 894 {
 895   W_ frame;
 896   W_ new_trec;
 897   W_ trec;
 898
 899   // stmStartTransaction may allocate
 900   MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
 901
 902   /* Args: R1 :: STM a */
 903   /* Args: R2 :: STM a */
 904   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
 905
 906   /* Start a nested transaction within which to run the first code */
 907   trec = StgTSO_trec(CurrentTSO);
 908   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
 909   StgTSO_trec(CurrentTSO) = new_trec;
 910
 911   /* Set up the catch-retry frame */
 912   Sp = Sp - SIZEOF_StgCatchRetryFrame;
 913   frame = Sp;
 914
 915   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
 916   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
 917   StgCatchRetryFrame_first_code(frame) = R1;
 918   StgCatchRetryFrame_alt_code(frame) = R2;
 919
 920   /* Apply R1 to the realworld token */
 921   jump stg_ap_v_fast;
 922 }
 923
 924
 925 stg_retryzh
 926 {
 927   W_ frame_type;
 928   W_ frame;
 929   W_ trec;
 930   W_ outer;
 931   W_ r;
 932
 933   MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
 934
 935   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 936 retry_pop_stack:
 937   SAVE_THREAD_STATE();
 938   (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
 939   LOAD_THREAD_STATE();
 940   frame = Sp;
 941   trec = StgTSO_trec(CurrentTSO);
 942   outer  = StgTRecHeader_enclosing_trec(trec);
 943
 944   if (frame_type == CATCH_RETRY_FRAME) {
 945     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
 946     ASSERT(outer != NO_TREC);
 947     // Abort the transaction attempting the current branch
 948     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 949     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 950     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 951       // Retry in the first branch: try the alternative
 952       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 953       StgTSO_trec(CurrentTSO) = trec;
 954       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
 955       R1 = StgCatchRetryFrame_alt_code(frame);
 956       jump stg_ap_v_fast;
 957     } else {
 958       // Retry in the alternative code: propagate the retry
 959       StgTSO_trec(CurrentTSO) = outer;
 960       Sp = Sp + SIZEOF_StgCatchRetryFrame;
 961       goto retry_pop_stack;
 962     }
 963   }
 964
 965   // We've reached the ATOMICALLY_FRAME: attempt to wait
 966   ASSERT(frame_type == ATOMICALLY_FRAME);
 967   if (outer != NO_TREC) {
 968     // We called retry while checking invariants, so abort the current
 969     // invariant check (merging its TVar accesses into the parents read
 970     // set so we'll wait on them)
 971     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 972     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 973     trec = outer;
 974     StgTSO_trec(CurrentTSO) = trec;
 975     outer  = StgTRecHeader_enclosing_trec(trec);
 976   }
 977   ASSERT(outer == NO_TREC);
 978
 979   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
 980   if (r != 0) {
 981     // Transaction was valid: stmWait put us on the TVars' queues, we now block
 982     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
 983     Sp = frame;
 984     // Fix up the stack in the unregisterised case: the return convention is different.
 985     R3 = trec; // passing to stmWaitUnblock()
 986     jump stg_block_stmwait;
 987   } else {
 988     // Transaction was not valid: retry immediately
 989     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 990     StgTSO_trec(CurrentTSO) = trec;
 991     R1 = StgAtomicallyFrame_code(frame);
 992     Sp = frame;
 993     jump stg_ap_v_fast;
 994   }
 995 }
 996
 997
 998 stg_checkzh
 999 {
1000   W_ trec, closure;
1001
1002   /* Args: R1 = invariant closure */
1003   MAYBE_GC (R1_PTR, stg_checkzh);
1004
1005   trec = StgTSO_trec(CurrentTSO);
1006   closure = R1;
1007   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
1008                                      trec "ptr",
1009                                      closure "ptr") [];
1010
1011   jump %ENTRY_CODE(Sp(0));
1012 }
1013
1014
1015 stg_newTVarzh
1016 {
1017   W_ tv;
1018   W_ new_value;
1019
1020   /* Args: R1 = initialisation value */
1021
1022   MAYBE_GC (R1_PTR, stg_newTVarzh);
1023   new_value = R1;
1024   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1025   RET_P(tv);
1026 }
1027
1028
1029 stg_readTVarzh
1030 {
1031   W_ trec;
1032   W_ tvar;
1033   W_ result;
1034
1035   /* Args: R1 = TVar closure */
1036
1037   MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
1038   trec = StgTSO_trec(CurrentTSO);
1039   tvar = R1;
1040   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1041
1042   RET_P(result);
1043 }
1044
1045 stg_readTVarIOzh
1046 {
1047     W_ result;
1048
1049 again:
1050     result = StgTVar_current_value(R1);
1051     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1052         goto again;
1053     }
1054     RET_P(result);
1055 }
1056
1057 stg_writeTVarzh
1058 {
1059   W_ trec;
1060   W_ tvar;
1061   W_ new_value;
1062
1063   /* Args: R1 = TVar closure */
1064   /*       R2 = New value    */
1065
1066   MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
1067   trec = StgTSO_trec(CurrentTSO);
1068   tvar = R1;
1069   new_value = R2;
1070   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1071
1072   jump %ENTRY_CODE(Sp(0));
1073 }
1074
1075
1076 /* -----------------------------------------------------------------------------
1077  * MVar primitives
1078  *
1079  * take & putMVar work as follows.  Firstly, an important invariant:
1080  *
1081  *    If the MVar is full, then the blocking queue contains only
1082  *    threads blocked on putMVar, and if the MVar is empty then the
1083  *    blocking queue contains only threads blocked on takeMVar.
1084  *
1085  * takeMvar:
1086  *    MVar empty : then add ourselves to the blocking queue
1087  *    MVar full  : remove the value from the MVar, and
1088  *                 blocking queue empty     : return
1089  *                 blocking queue non-empty : perform the first blocked putMVar
1090  *                                            from the queue, and wake up the
1091  *                                            thread (MVar is now full again)
1092  *
1093  * putMVar is just the dual of the above algorithm.
1094  *
1095  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1096  * the stack of the thread waiting to do the putMVar.  See
1097  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1098  * the stack layout, and the PerformPut and PerformTake macros below.
1099  *
1100  * It is important that a blocked take or put is woken up with the
1101  * take/put already performed, because otherwise there would be a
1102  * small window of vulnerability where the thread could receive an
1103  * exception and never perform its take or put, and we'd end up with a
1104  * deadlock.
1105  *
1106  * -------------------------------------------------------------------------- */
1107
1108 stg_isEmptyMVarzh
1109 {
1110     /* args: R1 = MVar closure */
1111
1112     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1113         RET_N(1);
1114     } else {
1115         RET_N(0);
1116     }
1117 }
1118
1119 stg_newMVarzh
1120 {
1121     /* args: none */
1122     W_ mvar;
1123
1124     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
1125
1126     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1127     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1128         // MVARs start dirty: generation 0 has no mutable list
1129     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1130     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1131     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1132     RET_P(mvar);
1133 }
1134
1135
1136 #define PerformTake(stack, value)               \
1137     W_ sp;                                      \
1138     sp = StgStack_sp(stack);                    \
1139     W_[sp + WDS(1)] = value;                    \
1140     W_[sp + WDS(0)] = stg_gc_unpt_r1_info;
1141
1142 #define PerformPut(stack,lval)                  \
1143     W_ sp;                                      \
1144     sp = StgStack_sp(stack) + WDS(3);           \
1145     StgStack_sp(stack) = sp;                    \
1146     lval = W_[sp - WDS(1)];
1147
1148 stg_takeMVarzh
1149 {
1150     W_ mvar, val, info, tso, q;
1151
1152     /* args: R1 = MVar closure */
1153     mvar = R1;
1154
1155 #if defined(THREADED_RTS)
1156     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1157 #else
1158     info = GET_INFO(mvar);
1159 #endif
1160
1161     if (info == stg_MVAR_CLEAN_info) {
1162         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1163     }
1164
1165     /* If the MVar is empty, put ourselves on its blocking queue,
1166      * and wait until we're woken up.
1167      */
1168     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1169
1170         // Note [mvar-heap-check] We want to do the heap check in the
1171         // branch here, to avoid the conditional in the common case.
1172         // However, we've already locked the MVar above, so we better
1173         // be careful to unlock it again if the the heap check fails.
1174         // Unfortunately we don't have an easy way to inject any code
1175         // into the heap check generated by the code generator, so we
1176         // have to do it in stg_gc_gen (see HeapStackCheck.cmm).
1177         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
1178
1179         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1180
1181         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1182         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1183         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1184
1185         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1186             StgMVar_head(mvar) = q;
1187         } else {
1188             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1189             foreign "C" recordClosureMutated(MyCapability() "ptr",
1190                                              StgMVar_tail(mvar)) [];
1191         }
1192         StgTSO__link(CurrentTSO)       = q;
1193         StgTSO_block_info(CurrentTSO)  = mvar;
1194         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1195         StgMVar_tail(mvar)             = q;
1196
1197         R1 = mvar;
1198         jump stg_block_takemvar;
1199     }
1200
1201     /* we got the value... */
1202     val = StgMVar_value(mvar);
1203
1204     q = StgMVar_head(mvar);
1205 loop:
1206     if (q == stg_END_TSO_QUEUE_closure) {
1207         /* No further putMVars, MVar is now empty */
1208         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1209         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1210         RET_P(val);
1211     }
1212     if (StgHeader_info(q) == stg_IND_info ||
1213         StgHeader_info(q) == stg_MSG_NULL_info) {
1214         q = StgInd_indirectee(q);
1215         goto loop;
1216     }
1217
1218     // There are putMVar(s) waiting... wake up the first thread on the queue
1219
1220     tso = StgMVarTSOQueue_tso(q);
1221     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1222     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1223         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1224     }
1225
1226     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1227     ASSERT(StgTSO_block_info(tso) == mvar);
1228
1229     // actually perform the putMVar for the thread that we just woke up
1230     W_ stack;
1231     stack = StgTSO_stackobj(tso);
1232     PerformPut(stack, StgMVar_value(mvar));
1233
1234     // indicate that the MVar operation has now completed.
1235     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1236
1237     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1238
1239     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1240
1241     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1242     RET_P(val);
1243 }
1244
1245
1246 stg_tryTakeMVarzh
1247 {
1248     W_ mvar, val, info, tso, q;
1249
1250     /* args: R1 = MVar closure */
1251     mvar = R1;
1252
1253 #if defined(THREADED_RTS)
1254     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1255 #else
1256     info = GET_INFO(mvar);
1257 #endif
1258
1259     /* If the MVar is empty, put ourselves on its blocking queue,
1260      * and wait until we're woken up.
1261      */
1262     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1263 #if defined(THREADED_RTS)
1264         unlockClosure(mvar, info);
1265 #endif
1266         /* HACK: we need a pointer to pass back,
1267          * so we abuse NO_FINALIZER_closure
1268          */
1269         RET_NP(0, stg_NO_FINALIZER_closure);
1270     }
1271
1272     if (info == stg_MVAR_CLEAN_info) {
1273         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1274     }
1275
1276     /* we got the value... */
1277     val = StgMVar_value(mvar);
1278
1279     q = StgMVar_head(mvar);
1280 loop:
1281     if (q == stg_END_TSO_QUEUE_closure) {
1282         /* No further putMVars, MVar is now empty */
1283         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1284         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1285         RET_NP(1, val);
1286     }
1287     if (StgHeader_info(q) == stg_IND_info ||
1288         StgHeader_info(q) == stg_MSG_NULL_info) {
1289         q = StgInd_indirectee(q);
1290         goto loop;
1291     }
1292
1293     // There are putMVar(s) waiting... wake up the first thread on the queue
1294
1295     tso = StgMVarTSOQueue_tso(q);
1296     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1297     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1298         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1299     }
1300
1301     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1302     ASSERT(StgTSO_block_info(tso) == mvar);
1303
1304     // actually perform the putMVar for the thread that we just woke up
1305     W_ stack;
1306     stack = StgTSO_stackobj(tso);
1307     PerformPut(stack, StgMVar_value(mvar));
1308
1309     // indicate that the MVar operation has now completed.
1310     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1311
1312     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1313
1314     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1315
1316     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1317     RET_NP(1,val);
1318 }
1319
1320
1321 stg_putMVarzh
1322 {
1323     W_ mvar, val, info, tso, q;
1324
1325     /* args: R1 = MVar, R2 = value */
1326     mvar = R1;
1327     val  = R2;
1328
1329 #if defined(THREADED_RTS)
1330     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1331 #else
1332     info = GET_INFO(mvar);
1333 #endif
1334
1335     if (info == stg_MVAR_CLEAN_info) {
1336         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1337     }
1338
1339     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1340
1341         // see Note [mvar-heap-check] above
1342         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
1343
1344         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1345
1346         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1347         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1348         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1349
1350         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1351             StgMVar_head(mvar) = q;
1352         } else {
1353             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1354             foreign "C" recordClosureMutated(MyCapability() "ptr",
1355                                              StgMVar_tail(mvar)) [];
1356         }
1357         StgTSO__link(CurrentTSO)       = q;
1358         StgTSO_block_info(CurrentTSO)  = mvar;
1359         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1360         StgMVar_tail(mvar)             = q;
1361
1362         R1 = mvar;
1363         R2 = val;
1364         jump stg_block_putmvar;
1365     }
1366
1367     q = StgMVar_head(mvar);
1368 loop:
1369     if (q == stg_END_TSO_QUEUE_closure) {
1370         /* No further takes, the MVar is now full. */
1371         StgMVar_value(mvar) = val;
1372         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1373         jump %ENTRY_CODE(Sp(0));
1374     }
1375     if (StgHeader_info(q) == stg_IND_info ||
1376         StgHeader_info(q) == stg_MSG_NULL_info) {
1377         q = StgInd_indirectee(q);
1378         goto loop;
1379     }
1380
1381     // There are takeMVar(s) waiting: wake up the first one
1382
1383     tso = StgMVarTSOQueue_tso(q);
1384     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1385     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1386         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1387     }
1388
1389     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1390     ASSERT(StgTSO_block_info(tso) == mvar);
1391
1392     // actually perform the takeMVar
1393     W_ stack;
1394     stack = StgTSO_stackobj(tso);
1395     PerformTake(stack, val);
1396
1397     // indicate that the MVar operation has now completed.
1398     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1399
1400     if (TO_W_(StgStack_dirty(stack)) == 0) {
1401         foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
1402     }
1403
1404     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1405
1406     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1407     jump %ENTRY_CODE(Sp(0));
1408 }
1409
1410
1411 stg_tryPutMVarzh
1412 {
1413     W_ mvar, val, info, tso, q;
1414
1415     /* args: R1 = MVar, R2 = value */
1416     mvar = R1;
1417     val  = R2;
1418
1419 #if defined(THREADED_RTS)
1420     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1421 #else
1422     info = GET_INFO(mvar);
1423 #endif
1424
1425     if (info == stg_MVAR_CLEAN_info) {
1426         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1427     }
1428
1429     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1430 #if defined(THREADED_RTS)
1431         unlockClosure(mvar, info);
1432 #endif
1433         RET_N(0);
1434     }
1435
1436     q = StgMVar_head(mvar);
1437 loop:
1438     if (q == stg_END_TSO_QUEUE_closure) {
1439         /* No further takes, the MVar is now full. */
1440         StgMVar_value(mvar) = val;
1441         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1442         RET_N(1);
1443     }
1444     if (StgHeader_info(q) == stg_IND_info ||
1445         StgHeader_info(q) == stg_MSG_NULL_info) {
1446         q = StgInd_indirectee(q);
1447         goto loop;
1448     }
1449
1450     // There are takeMVar(s) waiting: wake up the first one
1451
1452     tso = StgMVarTSOQueue_tso(q);
1453     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1454     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1455         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1456     }
1457
1458     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1459     ASSERT(StgTSO_block_info(tso) == mvar);
1460
1461     // actually perform the takeMVar
1462     W_ stack;
1463     stack = StgTSO_stackobj(tso);
1464     PerformTake(stack, val);
1465
1466     // indicate that the MVar operation has now completed.
1467     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1468
1469     if (TO_W_(StgStack_dirty(stack)) == 0) {
1470         foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
1471     }
1472
1473     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1474
1475     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1476     RET_N(1);
1477 }
1478
1479
1480 /* -----------------------------------------------------------------------------
1481    Stable pointer primitives
1482    -------------------------------------------------------------------------  */
1483
1484 stg_makeStableNamezh
1485 {
1486     W_ index, sn_obj;
1487
1488     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
1489
1490     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1491
1492     /* Is there already a StableName for this heap object?
1493      *  stable_ptr_table is a pointer to an array of snEntry structs.
1494      */
1495     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1496         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1497         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1498         StgStableName_sn(sn_obj) = index;
1499         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1500     } else {
1501         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1502     }
1503
1504     RET_P(sn_obj);
1505 }
1506
1507
1508 stg_makeStablePtrzh
1509 {
1510     /* Args: R1 = a */
1511     W_ sp;
1512     MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
1513     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1514     RET_N(sp);
1515 }
1516
1517 stg_deRefStablePtrzh
1518 {
1519     /* Args: R1 = the stable ptr */
1520     W_ r, sp;
1521     sp = R1;
1522     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1523     RET_P(r);
1524 }
1525
1526 /* -----------------------------------------------------------------------------
1527    Bytecode object primitives
1528    -------------------------------------------------------------------------  */
1529
1530 stg_newBCOzh
1531 {
1532     /* R1 = instrs
1533        R2 = literals
1534        R3 = ptrs
1535        R4 = arity
1536        R5 = bitmap array
1537     */
1538     W_ bco, bitmap_arr, bytes, words;
1539
1540     bitmap_arr = R5;
1541
1542     words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
1543     bytes = WDS(words);
1544
1545     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
1546
1547     bco = Hp - bytes + WDS(1);
1548     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1549
1550     StgBCO_instrs(bco)     = R1;
1551     StgBCO_literals(bco)   = R2;
1552     StgBCO_ptrs(bco)       = R3;
1553     StgBCO_arity(bco)      = HALF_W_(R4);
1554     StgBCO_size(bco)       = HALF_W_(words);
1555
1556     // Copy the arity/bitmap info into the BCO
1557     W_ i;
1558     i = 0;
1559 for:
1560     if (i < BYTE_ARR_WDS(bitmap_arr)) {
1561         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1562         i = i + 1;
1563         goto for;
1564     }
1565
1566     RET_P(bco);
1567 }
1568
1569
1570 stg_mkApUpd0zh
1571 {
1572     // R1 = the BCO# for the AP
1573     //
1574     W_ ap;
1575
1576     // This function is *only* used to wrap zero-arity BCOs in an
1577     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1578     // saturated and always points directly to a FUN or BCO.
1579     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1580            StgBCO_arity(R1) == HALF_W_(0));
1581
1582     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
1583     TICK_ALLOC_UP_THK(0, 0);
1584     CCCS_ALLOC(SIZEOF_StgAP);
1585
1586     ap = Hp - SIZEOF_StgAP + WDS(1);
1587     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1588
1589     StgAP_n_args(ap) = HALF_W_(0);
1590     StgAP_fun(ap) = R1;
1591
1592     RET_P(ap);
1593 }
1594
1595 stg_unpackClosurezh
1596 {
1597 /* args: R1 = closure to analyze */
1598 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1599
1600     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1601     info  = %GET_STD_INFO(UNTAG(R1));
1602
1603     // Some closures have non-standard layout, so we omit those here.
1604     W_ type;
1605     type = TO_W_(%INFO_TYPE(info));
1606     switch [0 .. N_CLOSURE_TYPES] type {
1607     case THUNK_SELECTOR : {
1608         ptrs = 1;
1609         nptrs = 0;
1610         goto out;
1611     }
1612     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
1613          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1614         ptrs = 0;
1615         nptrs = 0;
1616         goto out;
1617     }
1618     default: {
1619         ptrs  = TO_W_(%INFO_PTRS(info));
1620         nptrs = TO_W_(%INFO_NPTRS(info));
1621         goto out;
1622     }}
1623 out:
1624
1625     W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
1626     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1627     ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
1628     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
1629
1630     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
1631
1632     W_ clos;
1633     clos = UNTAG(R1);
1634
1635     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1636     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1637
1638     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1639     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1640     StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
1641
1642     p = 0;
1643 for:
1644     if(p < ptrs) {
1645          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1646          p = p + 1;
1647          goto for;
1648     }
1649     /* We can leave the card table uninitialised, since the array is
1650        allocated in the nursery.  The GC will fill it in if/when the array
1651        is promoted. */
1652
1653     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1654     StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
1655     p = 0;
1656 for2:
1657     if(p < nptrs) {
1658          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1659          p = p + 1;
1660          goto for2;
1661     }
1662     RET_NPP(info, ptrs_arr, nptrs_arr);
1663 }
1664
1665 /* -----------------------------------------------------------------------------
1666    Thread I/O blocking primitives
1667    -------------------------------------------------------------------------- */
1668
1669 /* Add a thread to the end of the blocked queue. (C-- version of the C
1670  * macro in Schedule.h).
1671  */
1672 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1673     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
1674     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1675       W_[blocked_queue_hd] = tso;                       \
1676     } else {                                            \
1677       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
1678     }                                                   \
1679     W_[blocked_queue_tl] = tso;
1680
1681 stg_waitReadzh
1682 {
1683     /* args: R1 */
1684 #ifdef THREADED_RTS
1685     foreign "C" barf("waitRead# on threaded RTS") never returns;
1686 #else
1687
1688     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1689     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1690     StgTSO_block_info(CurrentTSO) = R1;
1691     // No locking - we're not going to use this interface in the
1692     // threaded RTS anyway.
1693     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1694     jump stg_block_noregs;
1695 #endif
1696 }
1697
1698 stg_waitWritezh
1699 {
1700     /* args: R1 */
1701 #ifdef THREADED_RTS
1702     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1703 #else
1704
1705     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1706     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1707     StgTSO_block_info(CurrentTSO) = R1;
1708     // No locking - we're not going to use this interface in the
1709     // threaded RTS anyway.
1710     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1711     jump stg_block_noregs;
1712 #endif
1713 }
1714
1715
1716 STRING(stg_delayzh_malloc_str, "stg_delayzh")
1717 stg_delayzh
1718 {
1719 #ifdef mingw32_HOST_OS
1720     W_ ares;
1721     CInt reqID;
1722 #else
1723     W_ t, prev, target;
1724 #endif
1725
1726 #ifdef THREADED_RTS
1727     foreign "C" barf("delay# on threaded RTS") never returns;
1728 #else
1729
1730     /* args: R1 (microsecond delay amount) */
1731     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1732     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1733
1734 #ifdef mingw32_HOST_OS
1735
1736     /* could probably allocate this on the heap instead */
1737     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1738                                             stg_delayzh_malloc_str);
1739     (reqID) = foreign "C" addDelayRequest(R1);
1740     StgAsyncIOResult_reqID(ares)   = reqID;
1741     StgAsyncIOResult_len(ares)     = 0;
1742     StgAsyncIOResult_errCode(ares) = 0;
1743     StgTSO_block_info(CurrentTSO)  = ares;
1744
1745     /* Having all async-blocked threads reside on the blocked_queue
1746      * simplifies matters, so change the status to OnDoProc put the
1747      * delayed thread on the blocked_queue.
1748      */
1749     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1750     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1751     jump stg_block_async_void;
1752
1753 #else
1754
1755     W_ time;
1756     W_ divisor;
1757     (time) = foreign "C" getourtimeofday() [R1];
1758     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
1759     if (divisor == 0) {
1760         divisor = 50;
1761     }
1762     divisor = divisor * 1000;
1763     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
1764            + time + 1; /* Add 1 as getourtimeofday rounds down */
1765     StgTSO_block_info(CurrentTSO) = target;
1766
1767     /* Insert the new thread in the sleeping queue. */
1768     prev = NULL;
1769     t = W_[sleeping_queue];
1770 while:
1771     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1772         prev = t;
1773         t = StgTSO__link(t);
1774         goto while;
1775     }
1776
1777     StgTSO__link(CurrentTSO) = t;
1778     if (prev == NULL) {
1779         W_[sleeping_queue] = CurrentTSO;
1780     } else {
1781         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
1782     }
1783     jump stg_block_noregs;
1784 #endif
1785 #endif /* !THREADED_RTS */
1786 }
1787
1788
1789 #ifdef mingw32_HOST_OS
1790 STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
1791 stg_asyncReadzh
1792 {
1793     W_ ares;
1794     CInt reqID;
1795
1796 #ifdef THREADED_RTS
1797     foreign "C" barf("asyncRead# on threaded RTS") never returns;
1798 #else
1799
1800     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1801     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1802     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1803
1804     /* could probably allocate this on the heap instead */
1805     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1806                                             stg_asyncReadzh_malloc_str)
1807                         [R1,R2,R3,R4];
1808     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
1809     StgAsyncIOResult_reqID(ares)   = reqID;
1810     StgAsyncIOResult_len(ares)     = 0;
1811     StgAsyncIOResult_errCode(ares) = 0;
1812     StgTSO_block_info(CurrentTSO)  = ares;
1813     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1814     jump stg_block_async;
1815 #endif
1816 }
1817
1818 STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
1819 stg_asyncWritezh
1820 {
1821     W_ ares;
1822     CInt reqID;
1823
1824 #ifdef THREADED_RTS
1825     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
1826 #else
1827
1828     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1829     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1830     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1831
1832     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1833                                             stg_asyncWritezh_malloc_str)
1834                         [R1,R2,R3,R4];
1835     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
1836
1837     StgAsyncIOResult_reqID(ares)   = reqID;
1838     StgAsyncIOResult_len(ares)     = 0;
1839     StgAsyncIOResult_errCode(ares) = 0;
1840     StgTSO_block_info(CurrentTSO)  = ares;
1841     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1842     jump stg_block_async;
1843 #endif
1844 }
1845
1846 STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
1847 stg_asyncDoProczh
1848 {
1849     W_ ares;
1850     CInt reqID;
1851
1852 #ifdef THREADED_RTS
1853     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
1854 #else
1855
1856     /* args: R1 = proc, R2 = param */
1857     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1858     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1859
1860     /* could probably allocate this on the heap instead */
1861     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1862                                             stg_asyncDoProczh_malloc_str)
1863                                 [R1,R2];
1864     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
1865     StgAsyncIOResult_reqID(ares)   = reqID;
1866     StgAsyncIOResult_len(ares)     = 0;
1867     StgAsyncIOResult_errCode(ares) = 0;
1868     StgTSO_block_info(CurrentTSO) = ares;
1869     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1870     jump stg_block_async;
1871 #endif
1872 }
1873 #endif
1874
1875 /* -----------------------------------------------------------------------------
1876  * noDuplicate#
1877  *
1878  * noDuplicate# tries to ensure that none of the thunks under
1879  * evaluation by the current thread are also under evaluation by
1880  * another thread.  It relies on *both* threads doing noDuplicate#;
1881  * the second one will get blocked if they are duplicating some work.
1882  *
1883  * The idea is that noDuplicate# is used within unsafePerformIO to
1884  * ensure that the IO operation is performed at most once.
1885  * noDuplicate# calls threadPaused which acquires an exclusive lock on
1886  * all the thunks currently under evaluation by the current thread.
1887  *
1888  * Consider the following scenario.  There is a thunk A, whose
1889  * evaluation requires evaluating thunk B, where thunk B is an
1890  * unsafePerformIO.  Two threads, 1 and 2, bother enter A.  Thread 2
1891  * is pre-empted before it enters B, and claims A by blackholing it
1892  * (in threadPaused).  Thread 1 now enters B, and calls noDuplicate#.
1893  *
1894  *      thread 1                      thread 2
1895  *   +-----------+                 +---------------+
1896  *   |    -------+-----> A <-------+-------        |
1897  *   |  update   |   BLACKHOLE     | marked_update |
1898  *   +-----------+                 +---------------+
1899  *   |           |                 |               |
1900  *        ...                             ...
1901  *   |           |                 +---------------+
1902  *   +-----------+
1903  *   |     ------+-----> B
1904  *   |  update   |   BLACKHOLE
1905  *   +-----------+
1906  *
1907  * At this point: A is a blackhole, owned by thread 2.  noDuplicate#
1908  * calls threadPaused, which walks up the stack and
1909  *  - claims B on behalf of thread 1
1910  *  - then it reaches the update frame for A, which it sees is already
1911  *    a BLACKHOLE and is therefore owned by another thread.  Since
1912  *    thread 1 is duplicating work, the computation up to the update
1913  *    frame for A is suspended, including thunk B.
1914  *  - thunk B, which is an unsafePerformIO, has now been reverted to
1915  *    an AP_STACK which could be duplicated - BAD!
1916  *  - The solution is as follows: before calling threadPaused, we
1917  *    leave a frame on the stack (stg_noDuplicate_info) that will call
1918  *    noDuplicate# again if the current computation is suspended and
1919  *    restarted.
1920  *
1921  * See the test program in concurrent/prog003 for a way to demonstrate
1922  * this.  It needs to be run with +RTS -N3 or greater, and the bug
1923  * only manifests occasionally (once very 10 runs or so).
1924  * -------------------------------------------------------------------------- */
1925
1926 INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
1927 {
1928     Sp_adj(1);
1929     jump stg_noDuplicatezh;
1930 }
1931
1932 stg_noDuplicatezh
1933 {
1934     STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
1935     // leave noDuplicate frame in case the current
1936     // computation is suspended and restarted (see above).
1937     Sp_adj(-1);
1938     Sp(0) = stg_noDuplicate_info;
1939
1940     SAVE_THREAD_STATE();
1941     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1942     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
1943
1944     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
1945         jump stg_threadFinished;
1946     } else {
1947         LOAD_THREAD_STATE();
1948         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1949         // remove the stg_noDuplicate frame if it is still there.
1950         if (Sp(0) == stg_noDuplicate_info) {
1951             Sp_adj(1);
1952         }
1953         jump %ENTRY_CODE(Sp(0));
1954     }
1955 }
1956
1957 /* -----------------------------------------------------------------------------
1958    Misc. primitives
1959    -------------------------------------------------------------------------- */
1960
1961 stg_getApStackValzh
1962 {
1963    W_ ap_stack, offset, val, ok;
1964
1965    /* args: R1 = AP_STACK, R2 = offset */
1966    ap_stack = R1;
1967    offset   = R2;
1968
1969    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
1970         ok = 1;
1971         val = StgAP_STACK_payload(ap_stack,offset);
1972    } else {
1973         ok = 0;
1974         val = R1;
1975    }
1976    RET_NP(ok,val);
1977 }
1978
1979 // Write the cost center stack of the first argument on stderr; return
1980 // the second.  Possibly only makes sense for already evaluated
1981 // things?
1982 stg_traceCcszh
1983 {
1984     W_ ccs;
1985
1986 #ifdef PROFILING
1987     ccs = StgHeader_ccs(UNTAG(R1));
1988     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
1989 #endif
1990
1991     R1 = R2;
1992     ENTER();
1993 }
1994
1995 stg_getSparkzh
1996 {
1997    W_ spark;
1998
1999 #ifndef THREADED_RTS
2000    RET_NP(0,ghczmprim_GHCziTypes_False_closure);
2001 #else
2002    (spark) = foreign "C" findSpark(MyCapability());
2003    if (spark != 0) {
2004       RET_NP(1,spark);
2005    } else {
2006       RET_NP(0,ghczmprim_GHCziTypes_False_closure);
2007    }
2008 #endif
2009 }
2010
2011 stg_numSparkszh
2012 {
2013   W_ n;
2014 #ifdef THREADED_RTS
2015   (n) = foreign "C" dequeElements(Capability_sparks(MyCapability()));
2016 #else
2017   n = 0;
2018 #endif
2019   RET_N(n);
2020 }
2021
2022 stg_traceEventzh
2023 {
2024    W_ msg;
2025    msg = R1;
2026
2027 #if defined(TRACING) || defined(DEBUG)
2028
2029    foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
2030
2031 #elif defined(DTRACE)
2032
2033    W_ enabled;
2034
2035    // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
2036    // RtsProbes.h, but that header file includes unistd.h, which doesn't
2037    // work in Cmm
2038    (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
2039    if (enabled != 0) {
2040      foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
2041    }
2042
2043 #endif
2044    jump %ENTRY_CODE(Sp(0));
2045 }