rts/PrimOps.cmm

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Out-of-line primitive operations
   6  *
   7  * This file contains the implementations of all the primitive
   8  * operations ("primops") which are not expanded inline.  See
   9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
  10  * this file contains code for most of those with the attribute
  11  * out_of_line=True.
  12  *
  13  * Entry convention: the entry convention for a primop is that all the
  14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
  15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
  16  *
  17  * Return convention: results from a primop are generally returned
  18  * using the ordinary unboxed tuple return convention.  The C-- parser
  19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
  20  * based on the prevailing return convention.
  21  *
  22  * This file is written in a subset of C--, extended with various
  23  * features specific to GHC.  It is compiled by GHC directly.  For the
  24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
  25  *
  26  * ---------------------------------------------------------------------------*/
  27
  28 #include "Cmm.h"
  29
  30 #ifdef __PIC__
  31 import pthread_mutex_lock;
  32 import pthread_mutex_unlock;
  33 #endif
  34 import base_ControlziExceptionziBase_nestedAtomically_closure;
  35 import EnterCriticalSection;
  36 import LeaveCriticalSection;
  37 import ghczmprim_GHCziBool_False_closure;
  38 #if !defined(mingw32_HOST_OS)
  39 import sm_mutex;
  40 #endif
  41
  42 /*-----------------------------------------------------------------------------
  43   Array Primitives
  44
  45   Basically just new*Array - the others are all inline macros.
  46
  47   The size arg is always passed in R1, and the result returned in R1.
  48
  49   The slow entry point is for returning from a heap check, the saved
  50   size argument must be re-loaded from the stack.
  51   -------------------------------------------------------------------------- */
  52
  53 /* for objects that are *less* than the size of a word, make sure we
  54  * round up to the nearest word for the size of the array.
  55  */
  56
  57 stg_newByteArrayzh
  58 {
  59     W_ words, payload_words, n, p;
  60     MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
  61     n = R1;
  62     payload_words = ROUNDUP_BYTES_TO_WDS(n);
  63     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
  64     ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
  65     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  66     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  67     StgArrWords_bytes(p) = n;
  68     RET_P(p);
  69 }
  70
  71 #define BA_ALIGN 16
  72 #define BA_MASK  (BA_ALIGN-1)
  73
  74 stg_newPinnedByteArrayzh
  75 {
  76     W_ words, n, bytes, payload_words, p;
  77
  78     MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
  79     n = R1;
  80     bytes = n;
  81     /* payload_words is what we will tell the profiler we had to allocate */
  82     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
  83     /* When we actually allocate memory, we need to allow space for the
  84        header: */
  85     bytes = bytes + SIZEOF_StgArrWords;
  86     /* And we want to align to BA_ALIGN bytes, so we need to allow space
  87        to shift up to BA_ALIGN - 1 bytes: */
  88     bytes = bytes + BA_ALIGN - 1;
  89     /* Now we convert to a number of words: */
  90     words = ROUNDUP_BYTES_TO_WDS(bytes);
  91
  92     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
  93     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  94
  95     /* Now we need to move p forward so that the payload is aligned
  96        to BA_ALIGN bytes: */
  97     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
  98
  99     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 100     StgArrWords_bytes(p) = n;
 101     RET_P(p);
 102 }
 103
 104 stg_newAlignedPinnedByteArrayzh
 105 {
 106     W_ words, n, bytes, payload_words, p, alignment;
 107
 108     MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
 109     n = R1;
 110     alignment = R2;
 111
 112     /* we always supply at least word-aligned memory, so there's no
 113        need to allow extra space for alignment if the requirement is less
 114        than a word.  This also prevents mischief with alignment == 0. */
 115     if (alignment <= SIZEOF_W) { alignment = 1; }
 116
 117     bytes = n;
 118
 119     /* payload_words is what we will tell the profiler we had to allocate */
 120     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
 121
 122     /* When we actually allocate memory, we need to allow space for the
 123        header: */
 124     bytes = bytes + SIZEOF_StgArrWords;
 125     /* And we want to align to <alignment> bytes, so we need to allow space
 126        to shift up to <alignment - 1> bytes: */
 127     bytes = bytes + alignment - 1;
 128     /* Now we convert to a number of words: */
 129     words = ROUNDUP_BYTES_TO_WDS(bytes);
 130
 131     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
 132     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 133
 134     /* Now we need to move p forward so that the payload is aligned
 135        to <alignment> bytes. Note that we are assuming that
 136        <alignment> is a power of 2, which is technically not guaranteed */
 137     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 138
 139     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 140     StgArrWords_bytes(p) = n;
 141     RET_P(p);
 142 }
 143
 144 stg_newArrayzh
 145 {
 146     W_ words, n, init, arr, p, size;
 147     /* Args: R1 = words, R2 = initialisation value */
 148
 149     n = R1;
 150     MAYBE_GC(R2_PTR,stg_newArrayzh);
 151
 152     // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
 153     // in the array, making sure we round up, and then rounding up to a whole
 154     // number of words.
 155     size = n + mutArrPtrsCardWords(n);
 156     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
 157     ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
 158     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
 159
 160     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
 161     StgMutArrPtrs_ptrs(arr) = n;
 162     StgMutArrPtrs_size(arr) = size;
 163
 164     // Initialise all elements of the the array with the value in R2
 165     init = R2;
 166     p = arr + SIZEOF_StgMutArrPtrs;
 167   for:
 168     if (p < arr + WDS(words)) {
 169         W_[p] = init;
 170         p = p + WDS(1);
 171         goto for;
 172     }
 173     // Initialise the mark bits with 0
 174   for2:
 175     if (p < arr + WDS(size)) {
 176         W_[p] = 0;
 177         p = p + WDS(1);
 178         goto for2;
 179     }
 180
 181     RET_P(arr);
 182 }
 183
 184 stg_unsafeThawArrayzh
 185 {
 186   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
 187   //
 188   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
 189   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
 190   // it on the mutable list for the GC to remove (removing something from
 191   // the mutable list is not easy).
 192   //
 193   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
 194   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
 195   // to indicate that it is still on the mutable list.
 196   //
 197   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
 198   // either it is on a mut_list, or it isn't.  We adopt the convention that
 199   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
 200   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
 201   // we put it on the mutable list more than once, but it would get scavenged
 202   // multiple times during GC, which would be unnecessarily slow.
 203   //
 204   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
 205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 206         recordMutable(R1, R1);
 207         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
 208         RET_P(R1);
 209   } else {
 210         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 211         RET_P(R1);
 212   }
 213 }
 214
 215 /* -----------------------------------------------------------------------------
 216    MutVar primitives
 217    -------------------------------------------------------------------------- */
 218
 219 stg_newMutVarzh
 220 {
 221     W_ mv;
 222     /* Args: R1 = initialisation value */
 223
 224     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
 225
 226     mv = Hp - SIZEOF_StgMutVar + WDS(1);
 227     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
 228     StgMutVar_var(mv) = R1;
 229
 230     RET_P(mv);
 231 }
 232
 233 stg_atomicModifyMutVarzh
 234 {
 235     W_ mv, f, z, x, y, r, h;
 236     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
 237
 238     /* If x is the current contents of the MutVar#, then
 239        We want to make the new contents point to
 240
 241          (sel_0 (f x))
 242
 243        and the return value is
 244
 245          (sel_1 (f x))
 246
 247         obviously we can share (f x).
 248
 249          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
 250          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
 251          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
 252     */
 253
 254 #if MIN_UPD_SIZE > 1
 255 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 256 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
 257 #else
 258 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
 259 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
 260 #endif
 261
 262 #if MIN_UPD_SIZE > 2
 263 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 264 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
 265 #else
 266 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
 267 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 268 #endif
 269
 270 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
 271
 272    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
 273
 274    mv = R1;
 275    f = R2;
 276
 277    TICK_ALLOC_THUNK_2();
 278    CCCS_ALLOC(THUNK_2_SIZE);
 279    z = Hp - THUNK_2_SIZE + WDS(1);
 280    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
 281    LDV_RECORD_CREATE(z);
 282    StgThunk_payload(z,0) = f;
 283
 284    TICK_ALLOC_THUNK_1();
 285    CCCS_ALLOC(THUNK_1_SIZE);
 286    y = z - THUNK_1_SIZE;
 287    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
 288    LDV_RECORD_CREATE(y);
 289    StgThunk_payload(y,0) = z;
 290
 291    TICK_ALLOC_THUNK_1();
 292    CCCS_ALLOC(THUNK_1_SIZE);
 293    r = y - THUNK_1_SIZE;
 294    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
 295    LDV_RECORD_CREATE(r);
 296    StgThunk_payload(r,0) = z;
 297
 298  retry:
 299    x = StgMutVar_var(mv);
 300    StgThunk_payload(z,1) = x;
 301 #ifdef THREADED_RTS
 302    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
 303    if (h != x) { goto retry; }
 304 #else
 305    StgMutVar_var(mv) = y;
 306 #endif
 307
 308    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
 309      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
 310    }
 311
 312    RET_P(r);
 313 }
 314
 315 /* -----------------------------------------------------------------------------
 316    Weak Pointer Primitives
 317    -------------------------------------------------------------------------- */
 318
 319 STRING(stg_weak_msg,"New weak pointer at %p\n")
 320
 321 stg_mkWeakzh
 322 {
 323   /* R1 = key
 324      R2 = value
 325      R3 = finalizer (or NULL)
 326   */
 327   W_ w;
 328
 329   if (R3 == NULL) {
 330     R3 = stg_NO_FINALIZER_closure;
 331   }
 332
 333   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
 334
 335   w = Hp - SIZEOF_StgWeak + WDS(1);
 336   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 337
 338   // We don't care about cfinalizer here.
 339   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
 340   // something else?
 341
 342   StgWeak_key(w)        = R1;
 343   StgWeak_value(w)      = R2;
 344   StgWeak_finalizer(w)  = R3;
 345   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
 346
 347   ACQUIRE_LOCK(sm_mutex);
 348   StgWeak_link(w)       = W_[weak_ptr_list];
 349   W_[weak_ptr_list]     = w;
 350   RELEASE_LOCK(sm_mutex);
 351
 352   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 353
 354   RET_P(w);
 355 }
 356
 357 stg_mkWeakForeignEnvzh
 358 {
 359   /* R1 = key
 360      R2 = value
 361      R3 = finalizer
 362      R4 = pointer
 363      R5 = has environment (0 or 1)
 364      R6 = environment
 365   */
 366   W_ w, payload_words, words, p;
 367
 368   W_ key, val, fptr, ptr, flag, eptr;
 369
 370   key  = R1;
 371   val  = R2;
 372   fptr = R3;
 373   ptr  = R4;
 374   flag = R5;
 375   eptr = R6;
 376
 377   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
 378
 379   w = Hp - SIZEOF_StgWeak + WDS(1);
 380   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 381
 382   payload_words = 4;
 383   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
 384   ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
 385
 386   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 387   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 388
 389   StgArrWords_bytes(p)     = WDS(payload_words);
 390   StgArrWords_payload(p,0) = fptr;
 391   StgArrWords_payload(p,1) = ptr;
 392   StgArrWords_payload(p,2) = eptr;
 393   StgArrWords_payload(p,3) = flag;
 394
 395   // We don't care about the value here.
 396   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
 397
 398   StgWeak_key(w)        = key;
 399   StgWeak_value(w)      = val;
 400   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
 401   StgWeak_cfinalizer(w) = p;
 402
 403   ACQUIRE_LOCK(sm_mutex);
 404   StgWeak_link(w)   = W_[weak_ptr_list];
 405   W_[weak_ptr_list] = w;
 406   RELEASE_LOCK(sm_mutex);
 407
 408   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 409
 410   RET_P(w);
 411 }
 412
 413 stg_finalizzeWeakzh
 414 {
 415   /* R1 = weak ptr
 416    */
 417   W_ w, f, arr;
 418
 419   w = R1;
 420
 421   // already dead?
 422   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
 423       RET_NP(0,stg_NO_FINALIZER_closure);
 424   }
 425
 426   // kill it
 427 #ifdef PROFILING
 428   // @LDV profiling
 429   // A weak pointer is inherently used, so we do not need to call
 430   // LDV_recordDead_FILL_SLOP_DYNAMIC():
 431   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
 432   // or, LDV_recordDead():
 433   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
 434   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
 435   // large as weak pointers, so there is no need to fill the slop, either.
 436   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
 437 #endif
 438
 439   //
 440   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
 441   //
 442   SET_INFO(w,stg_DEAD_WEAK_info);
 443   LDV_RECORD_CREATE(w);
 444
 445   f   = StgWeak_finalizer(w);
 446   arr = StgWeak_cfinalizer(w);
 447
 448   StgDeadWeak_link(w) = StgWeak_link(w);
 449
 450   if (arr != stg_NO_FINALIZER_closure) {
 451     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
 452                               StgArrWords_payload(arr,1),
 453                               StgArrWords_payload(arr,2),
 454                               StgArrWords_payload(arr,3)) [];
 455   }
 456
 457   /* return the finalizer */
 458   if (f == stg_NO_FINALIZER_closure) {
 459       RET_NP(0,stg_NO_FINALIZER_closure);
 460   } else {
 461       RET_NP(1,f);
 462   }
 463 }
 464
 465 stg_deRefWeakzh
 466 {
 467   /* R1 = weak ptr */
 468   W_ w, code, val;
 469
 470   w = R1;
 471   if (GET_INFO(w) == stg_WEAK_info) {
 472     code = 1;
 473     val = StgWeak_value(w);
 474   } else {
 475     code = 0;
 476     val = w;
 477   }
 478   RET_NP(code,val);
 479 }
 480
 481 /* -----------------------------------------------------------------------------
 482    Floating point operations.
 483    -------------------------------------------------------------------------- */
 484
 485 stg_decodeFloatzuIntzh
 486 {
 487     W_ p;
 488     F_ arg;
 489     W_ mp_tmp1;
 490     W_ mp_tmp_w;
 491
 492     STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
 493
 494     mp_tmp1  = Sp - WDS(1);
 495     mp_tmp_w = Sp - WDS(2);
 496
 497     /* arguments: F1 = Float# */
 498     arg = F1;
 499
 500     /* Perform the operation */
 501     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
 502
 503     /* returns: (Int# (mantissa), Int# (exponent)) */
 504     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
 505 }
 506
 507 stg_decodeDoublezu2Intzh
 508 {
 509     D_ arg;
 510     W_ p;
 511     W_ mp_tmp1;
 512     W_ mp_tmp2;
 513     W_ mp_result1;
 514     W_ mp_result2;
 515
 516     STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
 517
 518     mp_tmp1    = Sp - WDS(1);
 519     mp_tmp2    = Sp - WDS(2);
 520     mp_result1 = Sp - WDS(3);
 521     mp_result2 = Sp - WDS(4);
 522
 523     /* arguments: D1 = Double# */
 524     arg = D1;
 525
 526     /* Perform the operation */
 527     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
 528                                     mp_result1 "ptr", mp_result2 "ptr",
 529                                     arg) [];
 530
 531     /* returns:
 532        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
 533     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
 534 }
 535
 536 /* -----------------------------------------------------------------------------
 537  * Concurrency primitives
 538  * -------------------------------------------------------------------------- */
 539
 540 stg_forkzh
 541 {
 542   /* args: R1 = closure to spark */
 543
 544   MAYBE_GC(R1_PTR, stg_forkzh);
 545
 546   W_ closure;
 547   W_ threadid;
 548   closure = R1;
 549
 550   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 551                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 552                                 closure "ptr") [];
 553
 554   /* start blocked if the current thread is blocked */
 555   StgTSO_flags(threadid) = %lobits16(
 556      TO_W_(StgTSO_flags(threadid)) |
 557      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 558
 559   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 560
 561   // context switch soon, but not immediately: we don't want every
 562   // forkIO to force a context-switch.
 563   Capability_context_switch(MyCapability()) = 1 :: CInt;
 564
 565   RET_P(threadid);
 566 }
 567
 568 stg_forkOnzh
 569 {
 570   /* args: R1 = cpu, R2 = closure to spark */
 571
 572   MAYBE_GC(R2_PTR, stg_forkOnzh);
 573
 574   W_ cpu;
 575   W_ closure;
 576   W_ threadid;
 577   cpu = R1;
 578   closure = R2;
 579
 580   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 581                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 582                                 closure "ptr") [];
 583
 584   /* start blocked if the current thread is blocked */
 585   StgTSO_flags(threadid) = %lobits16(
 586      TO_W_(StgTSO_flags(threadid)) |
 587      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 588
 589   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 590
 591   // context switch soon, but not immediately: we don't want every
 592   // forkIO to force a context-switch.
 593   Capability_context_switch(MyCapability()) = 1 :: CInt;
 594
 595   RET_P(threadid);
 596 }
 597
 598 stg_yieldzh
 599 {
 600   jump stg_yield_noregs;
 601 }
 602
 603 stg_myThreadIdzh
 604 {
 605   /* no args. */
 606   RET_P(CurrentTSO);
 607 }
 608
 609 stg_labelThreadzh
 610 {
 611   /* args:
 612         R1 = ThreadId#
 613         R2 = Addr# */
 614 #ifdef DEBUG
 615   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
 616 #endif
 617   jump %ENTRY_CODE(Sp(0));
 618 }
 619
 620 stg_isCurrentThreadBoundzh
 621 {
 622   /* no args */
 623   W_ r;
 624   (r) = foreign "C" isThreadBound(CurrentTSO) [];
 625   RET_N(r);
 626 }
 627
 628 stg_threadStatuszh
 629 {
 630     /* args: R1 :: ThreadId# */
 631     W_ tso;
 632     W_ why_blocked;
 633     W_ what_next;
 634     W_ ret;
 635
 636     tso = R1;
 637     loop:
 638       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
 639           tso = StgTSO__link(tso);
 640           goto loop;
 641       }
 642
 643     what_next   = TO_W_(StgTSO_what_next(tso));
 644     why_blocked = TO_W_(StgTSO_why_blocked(tso));
 645     // Note: these two reads are not atomic, so they might end up
 646     // being inconsistent.  It doesn't matter, since we
 647     // only return one or the other.  If we wanted to return the
 648     // contents of block_info too, then we'd have to do some synchronisation.
 649
 650     if (what_next == ThreadComplete) {
 651         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
 652     } else {
 653         if (what_next == ThreadKilled) {
 654             ret = 17;
 655         } else {
 656             ret = why_blocked;
 657         }
 658     }
 659     RET_N(ret);
 660 }
 661
 662 /* -----------------------------------------------------------------------------
 663  * TVar primitives
 664  * -------------------------------------------------------------------------- */
 665
 666 #define SP_OFF 0
 667
 668 // Catch retry frame ------------------------------------------------------------
 669
 670 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 671 #if defined(PROFILING)
 672   W_ unused1, W_ unused2,
 673 #endif
 674   W_ unused3, P_ unused4, P_ unused5)
 675 {
 676    W_ r, frame, trec, outer;
 677
 678    frame = Sp;
 679    trec = StgTSO_trec(CurrentTSO);
 680    outer  = StgTRecHeader_enclosing_trec(trec);
 681    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 682    if (r != 0) {
 683      /* Succeeded (either first branch or second branch) */
 684      StgTSO_trec(CurrentTSO) = outer;
 685      Sp = Sp + SIZEOF_StgCatchRetryFrame;
 686      jump %ENTRY_CODE(Sp(SP_OFF));
 687    } else {
 688      /* Did not commit: re-execute */
 689      W_ new_trec;
 690      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 691      StgTSO_trec(CurrentTSO) = new_trec;
 692      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 693        R1 = StgCatchRetryFrame_alt_code(frame);
 694      } else {
 695        R1 = StgCatchRetryFrame_first_code(frame);
 696      }
 697      jump stg_ap_v_fast;
 698    }
 699 }
 700
 701
 702 // Atomically frame ------------------------------------------------------------
 703
 704 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
 705 #if defined(PROFILING)
 706   W_ unused1, W_ unused2,
 707 #endif
 708   P_ code, P_ next_invariant_to_check, P_ result)
 709 {
 710   W_ frame, trec, valid, next_invariant, q, outer;
 711
 712   frame  = Sp;
 713   trec   = StgTSO_trec(CurrentTSO);
 714   result = R1;
 715   outer  = StgTRecHeader_enclosing_trec(trec);
 716
 717   if (outer == NO_TREC) {
 718     /* First time back at the atomically frame -- pick up invariants */
 719     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
 720     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 721     StgAtomicallyFrame_result(frame) = result;
 722
 723   } else {
 724     /* Second/subsequent time back at the atomically frame -- abort the
 725      * tx that's checking the invariant and move on to the next one */
 726     StgTSO_trec(CurrentTSO) = outer;
 727     q = StgAtomicallyFrame_next_invariant_to_check(frame);
 728     StgInvariantCheckQueue_my_execution(q) = trec;
 729     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 730     /* Don't free trec -- it's linked from q and will be stashed in the
 731      * invariant if we eventually commit. */
 732     q = StgInvariantCheckQueue_next_queue_entry(q);
 733     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 734     trec = outer;
 735   }
 736
 737   q = StgAtomicallyFrame_next_invariant_to_check(frame);
 738
 739   if (q != END_INVARIANT_CHECK_QUEUE) {
 740     /* We can't commit yet: another invariant to check */
 741     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
 742     StgTSO_trec(CurrentTSO) = trec;
 743
 744     next_invariant = StgInvariantCheckQueue_invariant(q);
 745     R1 = StgAtomicInvariant_code(next_invariant);
 746     jump stg_ap_v_fast;
 747
 748   } else {
 749
 750     /* We've got no more invariants to check, try to commit */
 751     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
 752     if (valid != 0) {
 753       /* Transaction was valid: commit succeeded */
 754       StgTSO_trec(CurrentTSO) = NO_TREC;
 755       R1 = StgAtomicallyFrame_result(frame);
 756       Sp = Sp + SIZEOF_StgAtomicallyFrame;
 757       jump %ENTRY_CODE(Sp(SP_OFF));
 758     } else {
 759       /* Transaction was not valid: try again */
 760       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 761       StgTSO_trec(CurrentTSO) = trec;
 762       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 763       R1 = StgAtomicallyFrame_code(frame);
 764       jump stg_ap_v_fast;
 765     }
 766   }
 767 }
 768
 769 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
 770 #if defined(PROFILING)
 771   W_ unused1, W_ unused2,
 772 #endif
 773   P_ code, P_ next_invariant_to_check, P_ result)
 774 {
 775   W_ frame, trec, valid;
 776
 777   frame = Sp;
 778
 779   /* The TSO is currently waiting: should we stop waiting? */
 780   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
 781   if (valid != 0) {
 782     /* Previous attempt is still valid: no point trying again yet */
 783     jump stg_block_noregs;
 784   } else {
 785     /* Previous attempt is no longer valid: try again */
 786     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 787     StgTSO_trec(CurrentTSO) = trec;
 788     StgHeader_info(frame) = stg_atomically_frame_info;
 789     R1 = StgAtomicallyFrame_code(frame);
 790     jump stg_ap_v_fast;
 791   }
 792 }
 793
 794 // STM catch frame --------------------------------------------------------------
 795
 796 #define SP_OFF 0
 797
 798 /* Catch frames are very similar to update frames, but when entering
 799  * one we just pop the frame off the stack and perform the correct
 800  * kind of return to the activation record underneath us on the stack.
 801  */
 802
 803 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
 804 #if defined(PROFILING)
 805   W_ unused1, W_ unused2,
 806 #endif
 807   P_ unused3, P_ unused4)
 808    {
 809       W_ r, frame, trec, outer;
 810       frame = Sp;
 811       trec = StgTSO_trec(CurrentTSO);
 812       outer  = StgTRecHeader_enclosing_trec(trec);
 813       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 814       if (r != 0) {
 815         /* Commit succeeded */
 816         StgTSO_trec(CurrentTSO) = outer;
 817         Sp = Sp + SIZEOF_StgCatchSTMFrame;
 818         jump Sp(SP_OFF);
 819       } else {
 820         /* Commit failed */
 821         W_ new_trec;
 822         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 823         StgTSO_trec(CurrentTSO) = new_trec;
 824         R1 = StgCatchSTMFrame_code(frame);
 825         jump stg_ap_v_fast;
 826       }
 827    }
 828
 829
 830 // Primop definition ------------------------------------------------------------
 831
 832 stg_atomicallyzh
 833 {
 834   W_ frame;
 835   W_ old_trec;
 836   W_ new_trec;
 837
 838   // stmStartTransaction may allocate
 839   MAYBE_GC (R1_PTR, stg_atomicallyzh);
 840
 841   /* Args: R1 = m :: STM a */
 842   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
 843
 844   old_trec = StgTSO_trec(CurrentTSO);
 845
 846   /* Nested transactions are not allowed; raise an exception */
 847   if (old_trec != NO_TREC) {
 848      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
 849      jump stg_raisezh;
 850   }
 851
 852   /* Set up the atomically frame */
 853   Sp = Sp - SIZEOF_StgAtomicallyFrame;
 854   frame = Sp;
 855
 856   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
 857   StgAtomicallyFrame_code(frame) = R1;
 858   StgAtomicallyFrame_result(frame) = NO_TREC;
 859   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 860
 861   /* Start the memory transcation */
 862   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
 863   StgTSO_trec(CurrentTSO) = new_trec;
 864
 865   /* Apply R1 to the realworld token */
 866   jump stg_ap_v_fast;
 867 }
 868
 869
 870 stg_catchSTMzh
 871 {
 872   W_ frame;
 873
 874   /* Args: R1 :: STM a */
 875   /* Args: R2 :: Exception -> STM a */
 876   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
 877
 878   /* Set up the catch frame */
 879   Sp = Sp - SIZEOF_StgCatchSTMFrame;
 880   frame = Sp;
 881
 882   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
 883   StgCatchSTMFrame_handler(frame) = R2;
 884   StgCatchSTMFrame_code(frame) = R1;
 885
 886   /* Start a nested transaction to run the body of the try block in */
 887   W_ cur_trec;
 888   W_ new_trec;
 889   cur_trec = StgTSO_trec(CurrentTSO);
 890   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
 891   StgTSO_trec(CurrentTSO) = new_trec;
 892
 893   /* Apply R1 to the realworld token */
 894   jump stg_ap_v_fast;
 895 }
 896
 897
 898 stg_catchRetryzh
 899 {
 900   W_ frame;
 901   W_ new_trec;
 902   W_ trec;
 903
 904   // stmStartTransaction may allocate
 905   MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
 906
 907   /* Args: R1 :: STM a */
 908   /* Args: R2 :: STM a */
 909   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
 910
 911   /* Start a nested transaction within which to run the first code */
 912   trec = StgTSO_trec(CurrentTSO);
 913   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
 914   StgTSO_trec(CurrentTSO) = new_trec;
 915
 916   /* Set up the catch-retry frame */
 917   Sp = Sp - SIZEOF_StgCatchRetryFrame;
 918   frame = Sp;
 919
 920   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
 921   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
 922   StgCatchRetryFrame_first_code(frame) = R1;
 923   StgCatchRetryFrame_alt_code(frame) = R2;
 924
 925   /* Apply R1 to the realworld token */
 926   jump stg_ap_v_fast;
 927 }
 928
 929
 930 stg_retryzh
 931 {
 932   W_ frame_type;
 933   W_ frame;
 934   W_ trec;
 935   W_ outer;
 936   W_ r;
 937
 938   MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
 939
 940   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 941 retry_pop_stack:
 942   StgTSO_sp(CurrentTSO) = Sp;
 943   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
 944   Sp = StgTSO_sp(CurrentTSO);
 945   frame = Sp;
 946   trec = StgTSO_trec(CurrentTSO);
 947   outer  = StgTRecHeader_enclosing_trec(trec);
 948
 949   if (frame_type == CATCH_RETRY_FRAME) {
 950     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
 951     ASSERT(outer != NO_TREC);
 952     // Abort the transaction attempting the current branch
 953     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 954     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 955     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 956       // Retry in the first branch: try the alternative
 957       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 958       StgTSO_trec(CurrentTSO) = trec;
 959       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
 960       R1 = StgCatchRetryFrame_alt_code(frame);
 961       jump stg_ap_v_fast;
 962     } else {
 963       // Retry in the alternative code: propagate the retry
 964       StgTSO_trec(CurrentTSO) = outer;
 965       Sp = Sp + SIZEOF_StgCatchRetryFrame;
 966       goto retry_pop_stack;
 967     }
 968   }
 969
 970   // We've reached the ATOMICALLY_FRAME: attempt to wait
 971   ASSERT(frame_type == ATOMICALLY_FRAME);
 972   if (outer != NO_TREC) {
 973     // We called retry while checking invariants, so abort the current
 974     // invariant check (merging its TVar accesses into the parents read
 975     // set so we'll wait on them)
 976     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 977     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 978     trec = outer;
 979     StgTSO_trec(CurrentTSO) = trec;
 980     outer  = StgTRecHeader_enclosing_trec(trec);
 981   }
 982   ASSERT(outer == NO_TREC);
 983
 984   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
 985   if (r != 0) {
 986     // Transaction was valid: stmWait put us on the TVars' queues, we now block
 987     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
 988     Sp = frame;
 989     // Fix up the stack in the unregisterised case: the return convention is different.
 990     R3 = trec; // passing to stmWaitUnblock()
 991     jump stg_block_stmwait;
 992   } else {
 993     // Transaction was not valid: retry immediately
 994     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 995     StgTSO_trec(CurrentTSO) = trec;
 996     R1 = StgAtomicallyFrame_code(frame);
 997     Sp = frame;
 998     jump stg_ap_v_fast;
 999   }
1000 }
1001
1002
1003 stg_checkzh
1004 {
1005   W_ trec, closure;
1006
1007   /* Args: R1 = invariant closure */
1008   MAYBE_GC (R1_PTR, stg_checkzh);
1009
1010   trec = StgTSO_trec(CurrentTSO);
1011   closure = R1;
1012   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
1013                                      trec "ptr",
1014                                      closure "ptr") [];
1015
1016   jump %ENTRY_CODE(Sp(0));
1017 }
1018
1019
1020 stg_newTVarzh
1021 {
1022   W_ tv;
1023   W_ new_value;
1024
1025   /* Args: R1 = initialisation value */
1026
1027   MAYBE_GC (R1_PTR, stg_newTVarzh);
1028   new_value = R1;
1029   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1030   RET_P(tv);
1031 }
1032
1033
1034 stg_readTVarzh
1035 {
1036   W_ trec;
1037   W_ tvar;
1038   W_ result;
1039
1040   /* Args: R1 = TVar closure */
1041
1042   MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
1043   trec = StgTSO_trec(CurrentTSO);
1044   tvar = R1;
1045   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1046
1047   RET_P(result);
1048 }
1049
1050 stg_readTVarIOzh
1051 {
1052     W_ result;
1053
1054 again:
1055     result = StgTVar_current_value(R1);
1056     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1057         goto again;
1058     }
1059     RET_P(result);
1060 }
1061
1062 stg_writeTVarzh
1063 {
1064   W_ trec;
1065   W_ tvar;
1066   W_ new_value;
1067
1068   /* Args: R1 = TVar closure */
1069   /*       R2 = New value    */
1070
1071   MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
1072   trec = StgTSO_trec(CurrentTSO);
1073   tvar = R1;
1074   new_value = R2;
1075   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1076
1077   jump %ENTRY_CODE(Sp(0));
1078 }
1079
1080
1081 /* -----------------------------------------------------------------------------
1082  * MVar primitives
1083  *
1084  * take & putMVar work as follows.  Firstly, an important invariant:
1085  *
1086  *    If the MVar is full, then the blocking queue contains only
1087  *    threads blocked on putMVar, and if the MVar is empty then the
1088  *    blocking queue contains only threads blocked on takeMVar.
1089  *
1090  * takeMvar:
1091  *    MVar empty : then add ourselves to the blocking queue
1092  *    MVar full  : remove the value from the MVar, and
1093  *                 blocking queue empty     : return
1094  *                 blocking queue non-empty : perform the first blocked putMVar
1095  *                                            from the queue, and wake up the
1096  *                                            thread (MVar is now full again)
1097  *
1098  * putMVar is just the dual of the above algorithm.
1099  *
1100  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1101  * the stack of the thread waiting to do the putMVar.  See
1102  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1103  * the stack layout, and the PerformPut and PerformTake macros below.
1104  *
1105  * It is important that a blocked take or put is woken up with the
1106  * take/put already performed, because otherwise there would be a
1107  * small window of vulnerability where the thread could receive an
1108  * exception and never perform its take or put, and we'd end up with a
1109  * deadlock.
1110  *
1111  * -------------------------------------------------------------------------- */
1112
1113 stg_isEmptyMVarzh
1114 {
1115     /* args: R1 = MVar closure */
1116
1117     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1118         RET_N(1);
1119     } else {
1120         RET_N(0);
1121     }
1122 }
1123
1124 stg_newMVarzh
1125 {
1126     /* args: none */
1127     W_ mvar;
1128
1129     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
1130
1131     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1132     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1133         // MVARs start dirty: generation 0 has no mutable list
1134     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1135     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1136     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1137     RET_P(mvar);
1138 }
1139
1140
1141 #define PerformTake(tso, value)                         \
1142     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1143     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1144
1145 #define PerformPut(tso,lval)                    \
1146     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1147     lval = W_[StgTSO_sp(tso) - WDS(1)];
1148
1149 stg_takeMVarzh
1150 {
1151     W_ mvar, val, info, tso, q;
1152
1153     /* args: R1 = MVar closure */
1154     mvar = R1;
1155
1156 #if defined(THREADED_RTS)
1157     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1158 #else
1159     info = GET_INFO(mvar);
1160 #endif
1161
1162     if (info == stg_MVAR_CLEAN_info) {
1163         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1164     }
1165
1166     /* If the MVar is empty, put ourselves on its blocking queue,
1167      * and wait until we're woken up.
1168      */
1169     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1170
1171         // Note [mvar-heap-check] We want to do the heap check in the
1172         // branch here, to avoid the conditional in the common case.
1173         // However, we've already locked the MVar above, so we better
1174         // be careful to unlock it again if the the heap check fails.
1175         // Unfortunately we don't have an easy way to inject any code
1176         // into the heap check generated by the code generator, so we
1177         // have to do it in stg_gc_gen (see HeapStackCheck.cmm).
1178         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
1179
1180         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1181
1182         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1183         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1184         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1185
1186         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1187             StgMVar_head(mvar) = q;
1188         } else {
1189             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1190             foreign "C" recordClosureMutated(MyCapability() "ptr",
1191                                              StgMVar_tail(mvar)) [];
1192         }
1193         StgTSO__link(CurrentTSO)       = q;
1194         StgTSO_block_info(CurrentTSO)  = mvar;
1195         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1196         StgMVar_tail(mvar)             = q;
1197
1198         R1 = mvar;
1199         jump stg_block_takemvar;
1200     }
1201
1202     /* we got the value... */
1203     val = StgMVar_value(mvar);
1204
1205     q = StgMVar_head(mvar);
1206 loop:
1207     if (q == stg_END_TSO_QUEUE_closure) {
1208         /* No further putMVars, MVar is now empty */
1209         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1210         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1211         RET_P(val);
1212     }
1213     if (StgHeader_info(q) == stg_IND_info ||
1214         StgHeader_info(q) == stg_MSG_NULL_info) {
1215         q = StgInd_indirectee(q);
1216         goto loop;
1217     }
1218
1219     // There are putMVar(s) waiting... wake up the first thread on the queue
1220
1221     tso = StgMVarTSOQueue_tso(q);
1222     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1223     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1224         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1225     }
1226
1227 loop2:
1228     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1229         tso = StgTSO__link(tso);
1230         goto loop2;
1231     }
1232
1233     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1234     ASSERT(StgTSO_block_info(tso) == mvar);
1235
1236     // actually perform the putMVar for the thread that we just woke up
1237     PerformPut(tso,StgMVar_value(mvar));
1238
1239     // indicate that the MVar operation has now completed.
1240     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1241
1242     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1243
1244     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1245
1246     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1247     RET_P(val);
1248 }
1249
1250
1251 stg_tryTakeMVarzh
1252 {
1253     W_ mvar, val, info, tso, q;
1254
1255     /* args: R1 = MVar closure */
1256     mvar = R1;
1257
1258 #if defined(THREADED_RTS)
1259     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1260 #else
1261     info = GET_INFO(mvar);
1262 #endif
1263
1264     /* If the MVar is empty, put ourselves on its blocking queue,
1265      * and wait until we're woken up.
1266      */
1267     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1268 #if defined(THREADED_RTS)
1269         unlockClosure(mvar, info);
1270 #endif
1271         /* HACK: we need a pointer to pass back,
1272          * so we abuse NO_FINALIZER_closure
1273          */
1274         RET_NP(0, stg_NO_FINALIZER_closure);
1275     }
1276
1277     if (info == stg_MVAR_CLEAN_info) {
1278         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1279     }
1280
1281     /* we got the value... */
1282     val = StgMVar_value(mvar);
1283
1284     q = StgMVar_head(mvar);
1285 loop:
1286     if (q == stg_END_TSO_QUEUE_closure) {
1287         /* No further putMVars, MVar is now empty */
1288         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1289         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1290         RET_NP(1, val);
1291     }
1292     if (StgHeader_info(q) == stg_IND_info ||
1293         StgHeader_info(q) == stg_MSG_NULL_info) {
1294         q = StgInd_indirectee(q);
1295         goto loop;
1296     }
1297
1298     // There are putMVar(s) waiting... wake up the first thread on the queue
1299
1300     tso = StgMVarTSOQueue_tso(q);
1301     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1302     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1303         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1304     }
1305
1306 loop2:
1307     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1308         tso = StgTSO__link(tso);
1309         goto loop2;
1310     }
1311
1312     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1313     ASSERT(StgTSO_block_info(tso) == mvar);
1314
1315     // actually perform the putMVar for the thread that we just woke up
1316     PerformPut(tso,StgMVar_value(mvar));
1317
1318     // indicate that the MVar operation has now completed.
1319     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1320
1321     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1322
1323     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1324
1325     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1326     RET_P(val);
1327 }
1328
1329
1330 stg_putMVarzh
1331 {
1332     W_ mvar, val, info, tso, q;
1333
1334     /* args: R1 = MVar, R2 = value */
1335     mvar = R1;
1336     val  = R2;
1337
1338 #if defined(THREADED_RTS)
1339     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1340 #else
1341     info = GET_INFO(mvar);
1342 #endif
1343
1344     if (info == stg_MVAR_CLEAN_info) {
1345         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1346     }
1347
1348     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1349
1350         // see Note [mvar-heap-check] above
1351         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
1352
1353         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1354
1355         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1356         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1357         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1358
1359         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1360             StgMVar_head(mvar) = q;
1361         } else {
1362             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1363             foreign "C" recordClosureMutated(MyCapability() "ptr",
1364                                              StgMVar_tail(mvar)) [];
1365         }
1366         StgTSO__link(CurrentTSO)       = q;
1367         StgTSO_block_info(CurrentTSO)  = mvar;
1368         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1369         StgMVar_tail(mvar)             = q;
1370
1371         R1 = mvar;
1372         R2 = val;
1373         jump stg_block_putmvar;
1374     }
1375
1376     q = StgMVar_head(mvar);
1377 loop:
1378     if (q == stg_END_TSO_QUEUE_closure) {
1379         /* No further takes, the MVar is now full. */
1380         StgMVar_value(mvar) = val;
1381         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1382         jump %ENTRY_CODE(Sp(0));
1383     }
1384     if (StgHeader_info(q) == stg_IND_info ||
1385         StgHeader_info(q) == stg_MSG_NULL_info) {
1386         q = StgInd_indirectee(q);
1387         goto loop;
1388     }
1389
1390     // There are takeMVar(s) waiting: wake up the first one
1391
1392     tso = StgMVarTSOQueue_tso(q);
1393     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1394     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1395         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1396     }
1397
1398 loop2:
1399     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1400         tso = StgTSO__link(tso);
1401         goto loop2;
1402     }
1403
1404     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1405     ASSERT(StgTSO_block_info(tso) == mvar);
1406
1407     // actually perform the takeMVar
1408     PerformTake(tso, val);
1409
1410     // indicate that the MVar operation has now completed.
1411     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1412
1413     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1414         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1415     }
1416
1417     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1418
1419     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1420     jump %ENTRY_CODE(Sp(0));
1421 }
1422
1423
1424 stg_tryPutMVarzh
1425 {
1426     W_ mvar, val, info, tso, q;
1427
1428     /* args: R1 = MVar, R2 = value */
1429     mvar = R1;
1430     val  = R2;
1431
1432 #if defined(THREADED_RTS)
1433     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1434 #else
1435     info = GET_INFO(mvar);
1436 #endif
1437
1438     if (info == stg_MVAR_CLEAN_info) {
1439         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1440     }
1441
1442     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1443 #if defined(THREADED_RTS)
1444         unlockClosure(mvar, info);
1445 #endif
1446         RET_N(0);
1447     }
1448
1449     q = StgMVar_head(mvar);
1450 loop:
1451     if (q == stg_END_TSO_QUEUE_closure) {
1452         /* No further takes, the MVar is now full. */
1453         StgMVar_value(mvar) = val;
1454         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1455         jump %ENTRY_CODE(Sp(0));
1456     }
1457     if (StgHeader_info(q) == stg_IND_info ||
1458         StgHeader_info(q) == stg_MSG_NULL_info) {
1459         q = StgInd_indirectee(q);
1460         goto loop;
1461     }
1462
1463     // There are takeMVar(s) waiting: wake up the first one
1464
1465     tso = StgMVarTSOQueue_tso(q);
1466     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1467     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1468         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1469     }
1470
1471 loop2:
1472     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1473         tso = StgTSO__link(tso);
1474         goto loop2;
1475     }
1476
1477     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1478     ASSERT(StgTSO_block_info(tso) == mvar);
1479
1480     // actually perform the takeMVar
1481     PerformTake(tso, val);
1482
1483     // indicate that the MVar operation has now completed.
1484     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1485
1486     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1487         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1488     }
1489
1490     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1491
1492     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1493     jump %ENTRY_CODE(Sp(0));
1494 }
1495
1496
1497 /* -----------------------------------------------------------------------------
1498    Stable pointer primitives
1499    -------------------------------------------------------------------------  */
1500
1501 stg_makeStableNamezh
1502 {
1503     W_ index, sn_obj;
1504
1505     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
1506
1507     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1508
1509     /* Is there already a StableName for this heap object?
1510      *  stable_ptr_table is a pointer to an array of snEntry structs.
1511      */
1512     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1513         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1514         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1515         StgStableName_sn(sn_obj) = index;
1516         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1517     } else {
1518         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1519     }
1520
1521     RET_P(sn_obj);
1522 }
1523
1524
1525 stg_makeStablePtrzh
1526 {
1527     /* Args: R1 = a */
1528     W_ sp;
1529     MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
1530     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1531     RET_N(sp);
1532 }
1533
1534 stg_deRefStablePtrzh
1535 {
1536     /* Args: R1 = the stable ptr */
1537     W_ r, sp;
1538     sp = R1;
1539     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1540     RET_P(r);
1541 }
1542
1543 /* -----------------------------------------------------------------------------
1544    Bytecode object primitives
1545    -------------------------------------------------------------------------  */
1546
1547 stg_newBCOzh
1548 {
1549     /* R1 = instrs
1550        R2 = literals
1551        R3 = ptrs
1552        R4 = arity
1553        R5 = bitmap array
1554     */
1555     W_ bco, bitmap_arr, bytes, words;
1556
1557     bitmap_arr = R5;
1558
1559     words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
1560     bytes = WDS(words);
1561
1562     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
1563
1564     bco = Hp - bytes + WDS(1);
1565     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1566
1567     StgBCO_instrs(bco)     = R1;
1568     StgBCO_literals(bco)   = R2;
1569     StgBCO_ptrs(bco)       = R3;
1570     StgBCO_arity(bco)      = HALF_W_(R4);
1571     StgBCO_size(bco)       = HALF_W_(words);
1572
1573     // Copy the arity/bitmap info into the BCO
1574     W_ i;
1575     i = 0;
1576 for:
1577     if (i < BYTE_ARR_WDS(bitmap_arr)) {
1578         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1579         i = i + 1;
1580         goto for;
1581     }
1582
1583     RET_P(bco);
1584 }
1585
1586
1587 stg_mkApUpd0zh
1588 {
1589     // R1 = the BCO# for the AP
1590     //
1591     W_ ap;
1592
1593     // This function is *only* used to wrap zero-arity BCOs in an
1594     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1595     // saturated and always points directly to a FUN or BCO.
1596     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1597            StgBCO_arity(R1) == HALF_W_(0));
1598
1599     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
1600     TICK_ALLOC_UP_THK(0, 0);
1601     CCCS_ALLOC(SIZEOF_StgAP);
1602
1603     ap = Hp - SIZEOF_StgAP + WDS(1);
1604     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1605
1606     StgAP_n_args(ap) = HALF_W_(0);
1607     StgAP_fun(ap) = R1;
1608
1609     RET_P(ap);
1610 }
1611
1612 stg_unpackClosurezh
1613 {
1614 /* args: R1 = closure to analyze */
1615 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1616
1617     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1618     info  = %GET_STD_INFO(UNTAG(R1));
1619
1620     // Some closures have non-standard layout, so we omit those here.
1621     W_ type;
1622     type = TO_W_(%INFO_TYPE(info));
1623     switch [0 .. N_CLOSURE_TYPES] type {
1624     case THUNK_SELECTOR : {
1625         ptrs = 1;
1626         nptrs = 0;
1627         goto out;
1628     }
1629     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
1630          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1631         ptrs = 0;
1632         nptrs = 0;
1633         goto out;
1634     }
1635     default: {
1636         ptrs  = TO_W_(%INFO_PTRS(info));
1637         nptrs = TO_W_(%INFO_NPTRS(info));
1638         goto out;
1639     }}
1640 out:
1641
1642     W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
1643     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1644     ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
1645     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
1646
1647     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
1648
1649     W_ clos;
1650     clos = UNTAG(R1);
1651
1652     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1653     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1654
1655     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1656     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1657     StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
1658
1659     p = 0;
1660 for:
1661     if(p < ptrs) {
1662          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1663          p = p + 1;
1664          goto for;
1665     }
1666     /* We can leave the card table uninitialised, since the array is
1667        allocated in the nursery.  The GC will fill it in if/when the array
1668        is promoted. */
1669
1670     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1671     StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
1672     p = 0;
1673 for2:
1674     if(p < nptrs) {
1675          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1676          p = p + 1;
1677          goto for2;
1678     }
1679     RET_NPP(info, ptrs_arr, nptrs_arr);
1680 }
1681
1682 /* -----------------------------------------------------------------------------
1683    Thread I/O blocking primitives
1684    -------------------------------------------------------------------------- */
1685
1686 /* Add a thread to the end of the blocked queue. (C-- version of the C
1687  * macro in Schedule.h).
1688  */
1689 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1690     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
1691     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1692       W_[blocked_queue_hd] = tso;                       \
1693     } else {                                            \
1694       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
1695     }                                                   \
1696     W_[blocked_queue_tl] = tso;
1697
1698 stg_waitReadzh
1699 {
1700     /* args: R1 */
1701 #ifdef THREADED_RTS
1702     foreign "C" barf("waitRead# on threaded RTS") never returns;
1703 #else
1704
1705     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1706     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1707     StgTSO_block_info(CurrentTSO) = R1;
1708     // No locking - we're not going to use this interface in the
1709     // threaded RTS anyway.
1710     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1711     jump stg_block_noregs;
1712 #endif
1713 }
1714
1715 stg_waitWritezh
1716 {
1717     /* args: R1 */
1718 #ifdef THREADED_RTS
1719     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1720 #else
1721
1722     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1723     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1724     StgTSO_block_info(CurrentTSO) = R1;
1725     // No locking - we're not going to use this interface in the
1726     // threaded RTS anyway.
1727     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1728     jump stg_block_noregs;
1729 #endif
1730 }
1731
1732
1733 STRING(stg_delayzh_malloc_str, "stg_delayzh")
1734 stg_delayzh
1735 {
1736 #ifdef mingw32_HOST_OS
1737     W_ ares;
1738     CInt reqID;
1739 #else
1740     W_ t, prev, target;
1741 #endif
1742
1743 #ifdef THREADED_RTS
1744     foreign "C" barf("delay# on threaded RTS") never returns;
1745 #else
1746
1747     /* args: R1 (microsecond delay amount) */
1748     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1749     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1750
1751 #ifdef mingw32_HOST_OS
1752
1753     /* could probably allocate this on the heap instead */
1754     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1755                                             stg_delayzh_malloc_str);
1756     (reqID) = foreign "C" addDelayRequest(R1);
1757     StgAsyncIOResult_reqID(ares)   = reqID;
1758     StgAsyncIOResult_len(ares)     = 0;
1759     StgAsyncIOResult_errCode(ares) = 0;
1760     StgTSO_block_info(CurrentTSO)  = ares;
1761
1762     /* Having all async-blocked threads reside on the blocked_queue
1763      * simplifies matters, so change the status to OnDoProc put the
1764      * delayed thread on the blocked_queue.
1765      */
1766     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1767     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1768     jump stg_block_async_void;
1769
1770 #else
1771
1772     W_ time;
1773     W_ divisor;
1774     (time) = foreign "C" getourtimeofday() [R1];
1775     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
1776     if (divisor == 0) {
1777         divisor = 50;
1778     }
1779     divisor = divisor * 1000;
1780     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
1781            + time + 1; /* Add 1 as getourtimeofday rounds down */
1782     StgTSO_block_info(CurrentTSO) = target;
1783
1784     /* Insert the new thread in the sleeping queue. */
1785     prev = NULL;
1786     t = W_[sleeping_queue];
1787 while:
1788     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1789         prev = t;
1790         t = StgTSO__link(t);
1791         goto while;
1792     }
1793
1794     StgTSO__link(CurrentTSO) = t;
1795     if (prev == NULL) {
1796         W_[sleeping_queue] = CurrentTSO;
1797     } else {
1798         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
1799     }
1800     jump stg_block_noregs;
1801 #endif
1802 #endif /* !THREADED_RTS */
1803 }
1804
1805
1806 #ifdef mingw32_HOST_OS
1807 STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
1808 stg_asyncReadzh
1809 {
1810     W_ ares;
1811     CInt reqID;
1812
1813 #ifdef THREADED_RTS
1814     foreign "C" barf("asyncRead# on threaded RTS") never returns;
1815 #else
1816
1817     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1818     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1819     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1820
1821     /* could probably allocate this on the heap instead */
1822     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1823                                             stg_asyncReadzh_malloc_str)
1824                         [R1,R2,R3,R4];
1825     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
1826     StgAsyncIOResult_reqID(ares)   = reqID;
1827     StgAsyncIOResult_len(ares)     = 0;
1828     StgAsyncIOResult_errCode(ares) = 0;
1829     StgTSO_block_info(CurrentTSO)  = ares;
1830     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1831     jump stg_block_async;
1832 #endif
1833 }
1834
1835 STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
1836 stg_asyncWritezh
1837 {
1838     W_ ares;
1839     CInt reqID;
1840
1841 #ifdef THREADED_RTS
1842     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
1843 #else
1844
1845     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1846     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1847     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1848
1849     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1850                                             stg_asyncWritezh_malloc_str)
1851                         [R1,R2,R3,R4];
1852     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
1853
1854     StgAsyncIOResult_reqID(ares)   = reqID;
1855     StgAsyncIOResult_len(ares)     = 0;
1856     StgAsyncIOResult_errCode(ares) = 0;
1857     StgTSO_block_info(CurrentTSO)  = ares;
1858     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1859     jump stg_block_async;
1860 #endif
1861 }
1862
1863 STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
1864 stg_asyncDoProczh
1865 {
1866     W_ ares;
1867     CInt reqID;
1868
1869 #ifdef THREADED_RTS
1870     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
1871 #else
1872
1873     /* args: R1 = proc, R2 = param */
1874     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1875     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1876
1877     /* could probably allocate this on the heap instead */
1878     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1879                                             stg_asyncDoProczh_malloc_str)
1880                                 [R1,R2];
1881     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
1882     StgAsyncIOResult_reqID(ares)   = reqID;
1883     StgAsyncIOResult_len(ares)     = 0;
1884     StgAsyncIOResult_errCode(ares) = 0;
1885     StgTSO_block_info(CurrentTSO) = ares;
1886     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1887     jump stg_block_async;
1888 #endif
1889 }
1890 #endif
1891
1892 /* -----------------------------------------------------------------------------
1893  * noDuplicate#
1894  *
1895  * noDuplicate# tries to ensure that none of the thunks under
1896  * evaluation by the current thread are also under evaluation by
1897  * another thread.  It relies on *both* threads doing noDuplicate#;
1898  * the second one will get blocked if they are duplicating some work.
1899  *
1900  * The idea is that noDuplicate# is used within unsafePerformIO to
1901  * ensure that the IO operation is performed at most once.
1902  * noDuplicate# calls threadPaused which acquires an exclusive lock on
1903  * all the thunks currently under evaluation by the current thread.
1904  *
1905  * Consider the following scenario.  There is a thunk A, whose
1906  * evaluation requires evaluating thunk B, where thunk B is an
1907  * unsafePerformIO.  Two threads, 1 and 2, bother enter A.  Thread 2
1908  * is pre-empted before it enters B, and claims A by blackholing it
1909  * (in threadPaused).  Thread 1 now enters B, and calls noDuplicate#.
1910  *
1911  *      thread 1                      thread 2
1912  *   +-----------+                 +---------------+
1913  *   |    -------+-----> A <-------+-------        |
1914  *   |  update   |   BLACKHOLE     | marked_update |
1915  *   +-----------+                 +---------------+
1916  *   |           |                 |               |
1917  *        ...                             ...
1918  *   |           |                 +---------------+
1919  *   +-----------+
1920  *   |     ------+-----> B
1921  *   |  update   |   BLACKHOLE
1922  *   +-----------+
1923  *
1924  * At this point: A is a blackhole, owned by thread 2.  noDuplicate#
1925  * calls threadPaused, which walks up the stack and
1926  *  - claims B on behalf of thread 1
1927  *  - then it reaches the update frame for A, which it sees is already
1928  *    a BLACKHOLE and is therefore owned by another thread.  Since
1929  *    thread 1 is duplicating work, the computation up to the update
1930  *    frame for A is suspended, including thunk B.
1931  *  - thunk B, which is an unsafePerformIO, has now been reverted to
1932  *    an AP_STACK which could be duplicated - BAD!
1933  *  - The solution is as follows: before calling threadPaused, we
1934  *    leave a frame on the stack (stg_noDuplicate_info) that will call
1935  *    noDuplicate# again if the current computation is suspended and
1936  *    restarted.
1937  *
1938  * See the test program in concurrent/prog003 for a way to demonstrate
1939  * this.  It needs to be run with +RTS -N3 or greater, and the bug
1940  * only manifests occasionally (once very 10 runs or so).
1941  * -------------------------------------------------------------------------- */
1942
1943 INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
1944 {
1945     Sp_adj(1);
1946     jump stg_noDuplicatezh;
1947 }
1948
1949 stg_noDuplicatezh
1950 {
1951     STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
1952     // leave noDuplicate frame in case the current
1953     // computation is suspended and restarted (see above).
1954     Sp_adj(-1);
1955     Sp(0) = stg_noDuplicate_info;
1956
1957     SAVE_THREAD_STATE();
1958     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1959     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
1960
1961     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
1962         jump stg_threadFinished;
1963     } else {
1964         LOAD_THREAD_STATE();
1965         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1966         // remove the stg_noDuplicate frame if it is still there.
1967         if (Sp(0) == stg_noDuplicate_info) {
1968             Sp_adj(1);
1969         }
1970         jump %ENTRY_CODE(Sp(0));
1971     }
1972 }
1973
1974 /* -----------------------------------------------------------------------------
1975    Misc. primitives
1976    -------------------------------------------------------------------------- */
1977
1978 stg_getApStackValzh
1979 {
1980    W_ ap_stack, offset, val, ok;
1981
1982    /* args: R1 = AP_STACK, R2 = offset */
1983    ap_stack = R1;
1984    offset   = R2;
1985
1986    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
1987         ok = 1;
1988         val = StgAP_STACK_payload(ap_stack,offset);
1989    } else {
1990         ok = 0;
1991         val = R1;
1992    }
1993    RET_NP(ok,val);
1994 }
1995
1996 // Write the cost center stack of the first argument on stderr; return
1997 // the second.  Possibly only makes sense for already evaluated
1998 // things?
1999 stg_traceCcszh
2000 {
2001     W_ ccs;
2002
2003 #ifdef PROFILING
2004     ccs = StgHeader_ccs(UNTAG(R1));
2005     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2006 #endif
2007
2008     R1 = R2;
2009     ENTER();
2010 }
2011
2012 stg_getSparkzh
2013 {
2014    W_ spark;
2015
2016 #ifndef THREADED_RTS
2017    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2018 #else
2019    (spark) = foreign "C" findSpark(MyCapability());
2020    if (spark != 0) {
2021       RET_NP(1,spark);
2022    } else {
2023       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2024    }
2025 #endif
2026 }
2027
2028 stg_numSparkszh
2029 {
2030   W_ n;
2031 #ifdef THREADED_RTS
2032   (n) = foreign "C" dequeElements(Capability_sparks(MyCapability()));
2033 #else
2034   n = 0;
2035 #endif
2036   RET_N(n);
2037 }
2038
2039 stg_traceEventzh
2040 {
2041    W_ msg;
2042    msg = R1;
2043
2044 #if defined(TRACING) || defined(DEBUG)
2045
2046    foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
2047
2048 #elif defined(DTRACE)
2049
2050    W_ enabled;
2051
2052    // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
2053    // RtsProbes.h, but that header file includes unistd.h, which doesn't
2054    // work in Cmm
2055    (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
2056    if (enabled != 0) {
2057      foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
2058    }
2059
2060 #endif
2061    jump %ENTRY_CODE(Sp(0));
2062 }