rts/PrimOps.cmm

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2011
   4  *
   5  * Out-of-line primitive operations
   6  *
   7  * This file contains the implementations of all the primitive
   8  * operations ("primops") which are not expanded inline.  See
   9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
  10  * this file contains code for most of those with the attribute
  11  * out_of_line=True.
  12  *
  13  * Entry convention: the entry convention for a primop is that all the
  14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
  15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
  16  *
  17  * Return convention: results from a primop are generally returned
  18  * using the ordinary unboxed tuple return convention.  The C-- parser
  19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
  20  * based on the prevailing return convention.
  21  *
  22  * This file is written in a subset of C--, extended with various
  23  * features specific to GHC.  It is compiled by GHC directly.  For the
  24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
  25  *
  26  * ---------------------------------------------------------------------------*/
  27
  28 #include "Cmm.h"
  29
  30 #ifdef __PIC__
  31 import pthread_mutex_lock;
  32 import pthread_mutex_unlock;
  33 #endif
  34 import base_ControlziExceptionziBase_nestedAtomically_closure;
  35 import EnterCriticalSection;
  36 import LeaveCriticalSection;
  37 import ghczmprim_GHCziTypes_False_closure;
  38 #if !defined(mingw32_HOST_OS)
  39 import sm_mutex;
  40 #endif
  41
  42 /*-----------------------------------------------------------------------------
  43   Array Primitives
  44
  45   Basically just new*Array - the others are all inline macros.
  46
  47   The size arg is always passed in R1, and the result returned in R1.
  48
  49   The slow entry point is for returning from a heap check, the saved
  50   size argument must be re-loaded from the stack.
  51   -------------------------------------------------------------------------- */
  52
  53 /* for objects that are *less* than the size of a word, make sure we
  54  * round up to the nearest word for the size of the array.
  55  */
  56
  57 stg_newByteArrayzh
  58 {
  59     W_ words, payload_words, n, p;
  60     MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
  61     n = R1;
  62     payload_words = ROUNDUP_BYTES_TO_WDS(n);
  63     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
  64     ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
  65     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  66     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  67     StgArrWords_bytes(p) = n;
  68     RET_P(p);
  69 }
  70
  71 #define BA_ALIGN 16
  72 #define BA_MASK  (BA_ALIGN-1)
  73
  74 stg_newPinnedByteArrayzh
  75 {
  76     W_ words, n, bytes, payload_words, p;
  77
  78     MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
  79     n = R1;
  80     bytes = n;
  81     /* payload_words is what we will tell the profiler we had to allocate */
  82     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
  83     /* When we actually allocate memory, we need to allow space for the
  84        header: */
  85     bytes = bytes + SIZEOF_StgArrWords;
  86     /* And we want to align to BA_ALIGN bytes, so we need to allow space
  87        to shift up to BA_ALIGN - 1 bytes: */
  88     bytes = bytes + BA_ALIGN - 1;
  89     /* Now we convert to a number of words: */
  90     words = ROUNDUP_BYTES_TO_WDS(bytes);
  91
  92     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
  93     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  94
  95     /* Now we need to move p forward so that the payload is aligned
  96        to BA_ALIGN bytes: */
  97     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
  98
  99     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 100     StgArrWords_bytes(p) = n;
 101     RET_P(p);
 102 }
 103
 104 stg_newAlignedPinnedByteArrayzh
 105 {
 106     W_ words, n, bytes, payload_words, p, alignment;
 107
 108     MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
 109     n = R1;
 110     alignment = R2;
 111
 112     /* we always supply at least word-aligned memory, so there's no
 113        need to allow extra space for alignment if the requirement is less
 114        than a word.  This also prevents mischief with alignment == 0. */
 115     if (alignment <= SIZEOF_W) { alignment = 1; }
 116
 117     bytes = n;
 118
 119     /* payload_words is what we will tell the profiler we had to allocate */
 120     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
 121
 122     /* When we actually allocate memory, we need to allow space for the
 123        header: */
 124     bytes = bytes + SIZEOF_StgArrWords;
 125     /* And we want to align to <alignment> bytes, so we need to allow space
 126        to shift up to <alignment - 1> bytes: */
 127     bytes = bytes + alignment - 1;
 128     /* Now we convert to a number of words: */
 129     words = ROUNDUP_BYTES_TO_WDS(bytes);
 130
 131     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
 132     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 133
 134     /* Now we need to move p forward so that the payload is aligned
 135        to <alignment> bytes. Note that we are assuming that
 136        <alignment> is a power of 2, which is technically not guaranteed */
 137     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 138
 139     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 140     StgArrWords_bytes(p) = n;
 141     RET_P(p);
 142 }
 143
 144 stg_newArrayzh
 145 {
 146     W_ words, n, init, arr, p, size;
 147     /* Args: R1 = words, R2 = initialisation value */
 148
 149     n = R1;
 150     MAYBE_GC(R2_PTR,stg_newArrayzh);
 151
 152     // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
 153     // in the array, making sure we round up, and then rounding up to a whole
 154     // number of words.
 155     size = n + mutArrPtrsCardWords(n);
 156     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
 157     ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
 158     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
 159
 160     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
 161     StgMutArrPtrs_ptrs(arr) = n;
 162     StgMutArrPtrs_size(arr) = size;
 163
 164     // Initialise all elements of the the array with the value in R2
 165     init = R2;
 166     p = arr + SIZEOF_StgMutArrPtrs;
 167   for:
 168     if (p < arr + WDS(words)) {
 169         W_[p] = init;
 170         p = p + WDS(1);
 171         goto for;
 172     }
 173     // Initialise the mark bits with 0
 174   for2:
 175     if (p < arr + WDS(size)) {
 176         W_[p] = 0;
 177         p = p + WDS(1);
 178         goto for2;
 179     }
 180
 181     RET_P(arr);
 182 }
 183
 184 stg_unsafeThawArrayzh
 185 {
 186   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
 187   //
 188   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
 189   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
 190   // it on the mutable list for the GC to remove (removing something from
 191   // the mutable list is not easy).
 192   //
 193   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
 194   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
 195   // to indicate that it is still on the mutable list.
 196   //
 197   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
 198   // either it is on a mut_list, or it isn't.  We adopt the convention that
 199   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
 200   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
 201   // we put it on the mutable list more than once, but it would get scavenged
 202   // multiple times during GC, which would be unnecessarily slow.
 203   //
 204   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
 205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 206         recordMutable(R1, R1);
 207         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
 208         RET_P(R1);
 209   } else {
 210         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 211         RET_P(R1);
 212   }
 213 }
 214
 215 /* -----------------------------------------------------------------------------
 216    MutVar primitives
 217    -------------------------------------------------------------------------- */
 218
 219 stg_newMutVarzh
 220 {
 221     W_ mv;
 222     /* Args: R1 = initialisation value */
 223
 224     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
 225
 226     mv = Hp - SIZEOF_StgMutVar + WDS(1);
 227     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
 228     StgMutVar_var(mv) = R1;
 229
 230     RET_P(mv);
 231 }
 232
 233 stg_casMutVarzh
 234  /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */
 235 {
 236     W_ mv, old, new, h;
 237
 238     mv  = R1;
 239     old = R2;
 240     new = R3;
 241
 242     (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var,
 243                           old, new) [];
 244     if (h != old) {
 245         RET_NP(1,h);
 246     } else {
 247         RET_NP(0,h);
 248     }
 249 }
 250
 251
 252 stg_atomicModifyMutVarzh
 253 {
 254     W_ mv, f, z, x, y, r, h;
 255     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
 256
 257     /* If x is the current contents of the MutVar#, then
 258        We want to make the new contents point to
 259
 260          (sel_0 (f x))
 261
 262        and the return value is
 263
 264          (sel_1 (f x))
 265
 266         obviously we can share (f x).
 267
 268          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
 269          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
 270          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
 271     */
 272
 273 #if MIN_UPD_SIZE > 1
 274 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 275 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
 276 #else
 277 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
 278 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
 279 #endif
 280
 281 #if MIN_UPD_SIZE > 2
 282 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 283 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
 284 #else
 285 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
 286 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 287 #endif
 288
 289 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
 290
 291    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
 292
 293    mv = R1;
 294    f = R2;
 295
 296    TICK_ALLOC_THUNK_2();
 297    CCCS_ALLOC(THUNK_2_SIZE);
 298    z = Hp - THUNK_2_SIZE + WDS(1);
 299    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
 300    LDV_RECORD_CREATE(z);
 301    StgThunk_payload(z,0) = f;
 302
 303    TICK_ALLOC_THUNK_1();
 304    CCCS_ALLOC(THUNK_1_SIZE);
 305    y = z - THUNK_1_SIZE;
 306    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
 307    LDV_RECORD_CREATE(y);
 308    StgThunk_payload(y,0) = z;
 309
 310    TICK_ALLOC_THUNK_1();
 311    CCCS_ALLOC(THUNK_1_SIZE);
 312    r = y - THUNK_1_SIZE;
 313    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
 314    LDV_RECORD_CREATE(r);
 315    StgThunk_payload(r,0) = z;
 316
 317  retry:
 318    x = StgMutVar_var(mv);
 319    StgThunk_payload(z,1) = x;
 320 #ifdef THREADED_RTS
 321    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
 322    if (h != x) { goto retry; }
 323 #else
 324    StgMutVar_var(mv) = y;
 325 #endif
 326
 327    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
 328      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
 329    }
 330
 331    RET_P(r);
 332 }
 333
 334 /* -----------------------------------------------------------------------------
 335    Weak Pointer Primitives
 336    -------------------------------------------------------------------------- */
 337
 338 STRING(stg_weak_msg,"New weak pointer at %p\n")
 339
 340 stg_mkWeakzh
 341 {
 342   /* R1 = key
 343      R2 = value
 344      R3 = finalizer (or NULL)
 345   */
 346   W_ w;
 347
 348   if (R3 == NULL) {
 349     R3 = stg_NO_FINALIZER_closure;
 350   }
 351
 352   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
 353
 354   w = Hp - SIZEOF_StgWeak + WDS(1);
 355   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 356
 357   // We don't care about cfinalizer here.
 358   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
 359   // something else?
 360
 361   StgWeak_key(w)        = R1;
 362   StgWeak_value(w)      = R2;
 363   StgWeak_finalizer(w)  = R3;
 364   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
 365
 366   ACQUIRE_LOCK(sm_mutex);
 367   StgWeak_link(w)       = W_[weak_ptr_list];
 368   W_[weak_ptr_list]     = w;
 369   RELEASE_LOCK(sm_mutex);
 370
 371   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 372
 373   RET_P(w);
 374 }
 375
 376 stg_mkWeakForeignEnvzh
 377 {
 378   /* R1 = key
 379      R2 = value
 380      R3 = finalizer
 381      R4 = pointer
 382      R5 = has environment (0 or 1)
 383      R6 = environment
 384   */
 385   W_ w, payload_words, words, p;
 386
 387   W_ key, val, fptr, ptr, flag, eptr;
 388
 389   key  = R1;
 390   val  = R2;
 391   fptr = R3;
 392   ptr  = R4;
 393   flag = R5;
 394   eptr = R6;
 395
 396   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
 397
 398   w = Hp - SIZEOF_StgWeak + WDS(1);
 399   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 400
 401   payload_words = 4;
 402   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
 403   ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
 404
 405   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 406   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 407
 408   StgArrWords_bytes(p)     = WDS(payload_words);
 409   StgArrWords_payload(p,0) = fptr;
 410   StgArrWords_payload(p,1) = ptr;
 411   StgArrWords_payload(p,2) = eptr;
 412   StgArrWords_payload(p,3) = flag;
 413
 414   // We don't care about the value here.
 415   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
 416
 417   StgWeak_key(w)        = key;
 418   StgWeak_value(w)      = val;
 419   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
 420   StgWeak_cfinalizer(w) = p;
 421
 422   ACQUIRE_LOCK(sm_mutex);
 423   StgWeak_link(w)   = W_[weak_ptr_list];
 424   W_[weak_ptr_list] = w;
 425   RELEASE_LOCK(sm_mutex);
 426
 427   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 428
 429   RET_P(w);
 430 }
 431
 432 stg_finalizzeWeakzh
 433 {
 434   /* R1 = weak ptr
 435    */
 436   W_ w, f, arr;
 437
 438   w = R1;
 439
 440   // already dead?
 441   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
 442       RET_NP(0,stg_NO_FINALIZER_closure);
 443   }
 444
 445   // kill it
 446 #ifdef PROFILING
 447   // @LDV profiling
 448   // A weak pointer is inherently used, so we do not need to call
 449   // LDV_recordDead_FILL_SLOP_DYNAMIC():
 450   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
 451   // or, LDV_recordDead():
 452   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
 453   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
 454   // large as weak pointers, so there is no need to fill the slop, either.
 455   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
 456 #endif
 457
 458   //
 459   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
 460   //
 461   SET_INFO(w,stg_DEAD_WEAK_info);
 462   LDV_RECORD_CREATE(w);
 463
 464   f   = StgWeak_finalizer(w);
 465   arr = StgWeak_cfinalizer(w);
 466
 467   StgDeadWeak_link(w) = StgWeak_link(w);
 468
 469   if (arr != stg_NO_FINALIZER_closure) {
 470     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
 471                               StgArrWords_payload(arr,1),
 472                               StgArrWords_payload(arr,2),
 473                               StgArrWords_payload(arr,3)) [];
 474   }
 475
 476   /* return the finalizer */
 477   if (f == stg_NO_FINALIZER_closure) {
 478       RET_NP(0,stg_NO_FINALIZER_closure);
 479   } else {
 480       RET_NP(1,f);
 481   }
 482 }
 483
 484 stg_deRefWeakzh
 485 {
 486   /* R1 = weak ptr */
 487   W_ w, code, val;
 488
 489   w = R1;
 490   if (GET_INFO(w) == stg_WEAK_info) {
 491     code = 1;
 492     val = StgWeak_value(w);
 493   } else {
 494     code = 0;
 495     val = w;
 496   }
 497   RET_NP(code,val);
 498 }
 499
 500 /* -----------------------------------------------------------------------------
 501    Floating point operations.
 502    -------------------------------------------------------------------------- */
 503
 504 stg_decodeFloatzuIntzh
 505 {
 506     W_ p;
 507     F_ arg;
 508     W_ mp_tmp1;
 509     W_ mp_tmp_w;
 510
 511     STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
 512
 513     mp_tmp1  = Sp - WDS(1);
 514     mp_tmp_w = Sp - WDS(2);
 515
 516     /* arguments: F1 = Float# */
 517     arg = F1;
 518
 519     /* Perform the operation */
 520     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
 521
 522     /* returns: (Int# (mantissa), Int# (exponent)) */
 523     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
 524 }
 525
 526 stg_decodeDoublezu2Intzh
 527 {
 528     D_ arg;
 529     W_ p;
 530     W_ mp_tmp1;
 531     W_ mp_tmp2;
 532     W_ mp_result1;
 533     W_ mp_result2;
 534
 535     STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
 536
 537     mp_tmp1    = Sp - WDS(1);
 538     mp_tmp2    = Sp - WDS(2);
 539     mp_result1 = Sp - WDS(3);
 540     mp_result2 = Sp - WDS(4);
 541
 542     /* arguments: D1 = Double# */
 543     arg = D1;
 544
 545     /* Perform the operation */
 546     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
 547                                     mp_result1 "ptr", mp_result2 "ptr",
 548                                     arg) [];
 549
 550     /* returns:
 551        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
 552     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
 553 }
 554
 555 /* -----------------------------------------------------------------------------
 556  * Concurrency primitives
 557  * -------------------------------------------------------------------------- */
 558
 559 stg_forkzh
 560 {
 561   /* args: R1 = closure to spark */
 562
 563   MAYBE_GC(R1_PTR, stg_forkzh);
 564
 565   W_ closure;
 566   W_ threadid;
 567   closure = R1;
 568
 569   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 570                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 571                                 closure "ptr") [];
 572
 573   /* start blocked if the current thread is blocked */
 574   StgTSO_flags(threadid) = %lobits16(
 575      TO_W_(StgTSO_flags(threadid)) |
 576      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 577
 578   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 579
 580   // context switch soon, but not immediately: we don't want every
 581   // forkIO to force a context-switch.
 582   Capability_context_switch(MyCapability()) = 1 :: CInt;
 583
 584   RET_P(threadid);
 585 }
 586
 587 stg_forkOnzh
 588 {
 589   /* args: R1 = cpu, R2 = closure to spark */
 590
 591   MAYBE_GC(R2_PTR, stg_forkOnzh);
 592
 593   W_ cpu;
 594   W_ closure;
 595   W_ threadid;
 596   cpu = R1;
 597   closure = R2;
 598
 599   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 600                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 601                                 closure "ptr") [];
 602
 603   /* start blocked if the current thread is blocked */
 604   StgTSO_flags(threadid) = %lobits16(
 605      TO_W_(StgTSO_flags(threadid)) |
 606      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 607
 608   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 609
 610   // context switch soon, but not immediately: we don't want every
 611   // forkIO to force a context-switch.
 612   Capability_context_switch(MyCapability()) = 1 :: CInt;
 613
 614   RET_P(threadid);
 615 }
 616
 617 stg_yieldzh
 618 {
 619   jump stg_yield_noregs;
 620 }
 621
 622 stg_myThreadIdzh
 623 {
 624   /* no args. */
 625   RET_P(CurrentTSO);
 626 }
 627
 628 stg_labelThreadzh
 629 {
 630   /* args:
 631         R1 = ThreadId#
 632         R2 = Addr# */
 633 #ifdef DEBUG
 634   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
 635 #endif
 636   jump %ENTRY_CODE(Sp(0));
 637 }
 638
 639 stg_isCurrentThreadBoundzh
 640 {
 641   /* no args */
 642   W_ r;
 643   (r) = foreign "C" isThreadBound(CurrentTSO) [];
 644   RET_N(r);
 645 }
 646
 647 stg_threadStatuszh
 648 {
 649     /* args: R1 :: ThreadId# */
 650     W_ tso;
 651     W_ why_blocked;
 652     W_ what_next;
 653     W_ ret, cap, locked;
 654
 655     tso = R1;
 656
 657     what_next   = TO_W_(StgTSO_what_next(tso));
 658     why_blocked = TO_W_(StgTSO_why_blocked(tso));
 659     // Note: these two reads are not atomic, so they might end up
 660     // being inconsistent.  It doesn't matter, since we
 661     // only return one or the other.  If we wanted to return the
 662     // contents of block_info too, then we'd have to do some synchronisation.
 663
 664     if (what_next == ThreadComplete) {
 665         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
 666     } else {
 667         if (what_next == ThreadKilled) {
 668             ret = 17;
 669         } else {
 670             ret = why_blocked;
 671         }
 672     }
 673
 674     cap = TO_W_(Capability_no(StgTSO_cap(tso)));
 675
 676     if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
 677         locked = 1;
 678     } else {
 679         locked = 0;
 680     }
 681
 682     RET_NNN(ret,cap,locked);
 683 }
 684
 685 /* -----------------------------------------------------------------------------
 686  * TVar primitives
 687  * -------------------------------------------------------------------------- */
 688
 689 #define SP_OFF 0
 690
 691 // Catch retry frame ------------------------------------------------------------
 692
 693 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 694 #if defined(PROFILING)
 695   W_ unused1, W_ unused2,
 696 #endif
 697   W_ unused3, P_ unused4, P_ unused5)
 698 {
 699    W_ r, frame, trec, outer;
 700
 701    frame = Sp;
 702    trec = StgTSO_trec(CurrentTSO);
 703    outer  = StgTRecHeader_enclosing_trec(trec);
 704    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 705    if (r != 0) {
 706      /* Succeeded (either first branch or second branch) */
 707      StgTSO_trec(CurrentTSO) = outer;
 708      Sp = Sp + SIZEOF_StgCatchRetryFrame;
 709      jump %ENTRY_CODE(Sp(SP_OFF));
 710    } else {
 711      /* Did not commit: re-execute */
 712      W_ new_trec;
 713      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 714      StgTSO_trec(CurrentTSO) = new_trec;
 715      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 716        R1 = StgCatchRetryFrame_alt_code(frame);
 717      } else {
 718        R1 = StgCatchRetryFrame_first_code(frame);
 719      }
 720      jump stg_ap_v_fast;
 721    }
 722 }
 723
 724
 725 // Atomically frame ------------------------------------------------------------
 726
 727 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
 728 #if defined(PROFILING)
 729   W_ unused1, W_ unused2,
 730 #endif
 731   P_ code, P_ next_invariant_to_check, P_ result)
 732 {
 733   W_ frame, trec, valid, next_invariant, q, outer;
 734
 735   frame  = Sp;
 736   trec   = StgTSO_trec(CurrentTSO);
 737   result = R1;
 738   outer  = StgTRecHeader_enclosing_trec(trec);
 739
 740   if (outer == NO_TREC) {
 741     /* First time back at the atomically frame -- pick up invariants */
 742     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
 743     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 744     StgAtomicallyFrame_result(frame) = result;
 745
 746   } else {
 747     /* Second/subsequent time back at the atomically frame -- abort the
 748      * tx that's checking the invariant and move on to the next one */
 749     StgTSO_trec(CurrentTSO) = outer;
 750     q = StgAtomicallyFrame_next_invariant_to_check(frame);
 751     StgInvariantCheckQueue_my_execution(q) = trec;
 752     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 753     /* Don't free trec -- it's linked from q and will be stashed in the
 754      * invariant if we eventually commit. */
 755     q = StgInvariantCheckQueue_next_queue_entry(q);
 756     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 757     trec = outer;
 758   }
 759
 760   q = StgAtomicallyFrame_next_invariant_to_check(frame);
 761
 762   if (q != END_INVARIANT_CHECK_QUEUE) {
 763     /* We can't commit yet: another invariant to check */
 764     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
 765     StgTSO_trec(CurrentTSO) = trec;
 766
 767     next_invariant = StgInvariantCheckQueue_invariant(q);
 768     R1 = StgAtomicInvariant_code(next_invariant);
 769     jump stg_ap_v_fast;
 770
 771   } else {
 772
 773     /* We've got no more invariants to check, try to commit */
 774     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
 775     if (valid != 0) {
 776       /* Transaction was valid: commit succeeded */
 777       StgTSO_trec(CurrentTSO) = NO_TREC;
 778       R1 = StgAtomicallyFrame_result(frame);
 779       Sp = Sp + SIZEOF_StgAtomicallyFrame;
 780       jump %ENTRY_CODE(Sp(SP_OFF));
 781     } else {
 782       /* Transaction was not valid: try again */
 783       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 784       StgTSO_trec(CurrentTSO) = trec;
 785       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 786       R1 = StgAtomicallyFrame_code(frame);
 787       jump stg_ap_v_fast;
 788     }
 789   }
 790 }
 791
 792 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
 793 #if defined(PROFILING)
 794   W_ unused1, W_ unused2,
 795 #endif
 796   P_ code, P_ next_invariant_to_check, P_ result)
 797 {
 798   W_ frame, trec, valid;
 799
 800   frame = Sp;
 801
 802   /* The TSO is currently waiting: should we stop waiting? */
 803   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
 804   if (valid != 0) {
 805     /* Previous attempt is still valid: no point trying again yet */
 806     jump stg_block_noregs;
 807   } else {
 808     /* Previous attempt is no longer valid: try again */
 809     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 810     StgTSO_trec(CurrentTSO) = trec;
 811     StgHeader_info(frame) = stg_atomically_frame_info;
 812     R1 = StgAtomicallyFrame_code(frame);
 813     jump stg_ap_v_fast;
 814   }
 815 }
 816
 817 // STM catch frame --------------------------------------------------------------
 818
 819 #define SP_OFF 0
 820
 821 /* Catch frames are very similar to update frames, but when entering
 822  * one we just pop the frame off the stack and perform the correct
 823  * kind of return to the activation record underneath us on the stack.
 824  */
 825
 826 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
 827 #if defined(PROFILING)
 828   W_ unused1, W_ unused2,
 829 #endif
 830   P_ unused3, P_ unused4)
 831    {
 832       W_ r, frame, trec, outer;
 833       frame = Sp;
 834       trec = StgTSO_trec(CurrentTSO);
 835       outer  = StgTRecHeader_enclosing_trec(trec);
 836       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 837       if (r != 0) {
 838         /* Commit succeeded */
 839         StgTSO_trec(CurrentTSO) = outer;
 840         Sp = Sp + SIZEOF_StgCatchSTMFrame;
 841         jump Sp(SP_OFF);
 842       } else {
 843         /* Commit failed */
 844         W_ new_trec;
 845         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 846         StgTSO_trec(CurrentTSO) = new_trec;
 847         R1 = StgCatchSTMFrame_code(frame);
 848         jump stg_ap_v_fast;
 849       }
 850    }
 851
 852
 853 // Primop definition ------------------------------------------------------------
 854
 855 stg_atomicallyzh
 856 {
 857   W_ frame;
 858   W_ old_trec;
 859   W_ new_trec;
 860
 861   // stmStartTransaction may allocate
 862   MAYBE_GC (R1_PTR, stg_atomicallyzh);
 863
 864   /* Args: R1 = m :: STM a */
 865   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
 866
 867   old_trec = StgTSO_trec(CurrentTSO);
 868
 869   /* Nested transactions are not allowed; raise an exception */
 870   if (old_trec != NO_TREC) {
 871      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
 872      jump stg_raisezh;
 873   }
 874
 875   /* Set up the atomically frame */
 876   Sp = Sp - SIZEOF_StgAtomicallyFrame;
 877   frame = Sp;
 878
 879   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
 880   StgAtomicallyFrame_code(frame) = R1;
 881   StgAtomicallyFrame_result(frame) = NO_TREC;
 882   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 883
 884   /* Start the memory transcation */
 885   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
 886   StgTSO_trec(CurrentTSO) = new_trec;
 887
 888   /* Apply R1 to the realworld token */
 889   jump stg_ap_v_fast;
 890 }
 891
 892
 893 stg_catchSTMzh
 894 {
 895   W_ frame;
 896
 897   /* Args: R1 :: STM a */
 898   /* Args: R2 :: Exception -> STM a */
 899   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
 900
 901   /* Set up the catch frame */
 902   Sp = Sp - SIZEOF_StgCatchSTMFrame;
 903   frame = Sp;
 904
 905   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
 906   StgCatchSTMFrame_handler(frame) = R2;
 907   StgCatchSTMFrame_code(frame) = R1;
 908
 909   /* Start a nested transaction to run the body of the try block in */
 910   W_ cur_trec;
 911   W_ new_trec;
 912   cur_trec = StgTSO_trec(CurrentTSO);
 913   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
 914   StgTSO_trec(CurrentTSO) = new_trec;
 915
 916   /* Apply R1 to the realworld token */
 917   jump stg_ap_v_fast;
 918 }
 919
 920
 921 stg_catchRetryzh
 922 {
 923   W_ frame;
 924   W_ new_trec;
 925   W_ trec;
 926
 927   // stmStartTransaction may allocate
 928   MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
 929
 930   /* Args: R1 :: STM a */
 931   /* Args: R2 :: STM a */
 932   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
 933
 934   /* Start a nested transaction within which to run the first code */
 935   trec = StgTSO_trec(CurrentTSO);
 936   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
 937   StgTSO_trec(CurrentTSO) = new_trec;
 938
 939   /* Set up the catch-retry frame */
 940   Sp = Sp - SIZEOF_StgCatchRetryFrame;
 941   frame = Sp;
 942
 943   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
 944   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
 945   StgCatchRetryFrame_first_code(frame) = R1;
 946   StgCatchRetryFrame_alt_code(frame) = R2;
 947
 948   /* Apply R1 to the realworld token */
 949   jump stg_ap_v_fast;
 950 }
 951
 952
 953 stg_retryzh
 954 {
 955   W_ frame_type;
 956   W_ frame;
 957   W_ trec;
 958   W_ outer;
 959   W_ r;
 960
 961   MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
 962
 963   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 964 retry_pop_stack:
 965   SAVE_THREAD_STATE();
 966   (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
 967   LOAD_THREAD_STATE();
 968   frame = Sp;
 969   trec = StgTSO_trec(CurrentTSO);
 970   outer  = StgTRecHeader_enclosing_trec(trec);
 971
 972   if (frame_type == CATCH_RETRY_FRAME) {
 973     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
 974     ASSERT(outer != NO_TREC);
 975     // Abort the transaction attempting the current branch
 976     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 977     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 978     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 979       // Retry in the first branch: try the alternative
 980       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 981       StgTSO_trec(CurrentTSO) = trec;
 982       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
 983       R1 = StgCatchRetryFrame_alt_code(frame);
 984       jump stg_ap_v_fast;
 985     } else {
 986       // Retry in the alternative code: propagate the retry
 987       StgTSO_trec(CurrentTSO) = outer;
 988       Sp = Sp + SIZEOF_StgCatchRetryFrame;
 989       goto retry_pop_stack;
 990     }
 991   }
 992
 993   // We've reached the ATOMICALLY_FRAME: attempt to wait
 994   ASSERT(frame_type == ATOMICALLY_FRAME);
 995   if (outer != NO_TREC) {
 996     // We called retry while checking invariants, so abort the current
 997     // invariant check (merging its TVar accesses into the parents read
 998     // set so we'll wait on them)
 999     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1000     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1001     trec = outer;
1002     StgTSO_trec(CurrentTSO) = trec;
1003     outer  = StgTRecHeader_enclosing_trec(trec);
1004   }
1005   ASSERT(outer == NO_TREC);
1006
1007   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1008   if (r != 0) {
1009     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1010     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1011     Sp = frame;
1012     // Fix up the stack in the unregisterised case: the return convention is different.
1013     R3 = trec; // passing to stmWaitUnblock()
1014     jump stg_block_stmwait;
1015   } else {
1016     // Transaction was not valid: retry immediately
1017     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1018     StgTSO_trec(CurrentTSO) = trec;
1019     R1 = StgAtomicallyFrame_code(frame);
1020     Sp = frame;
1021     jump stg_ap_v_fast;
1022   }
1023 }
1024
1025
1026 stg_checkzh
1027 {
1028   W_ trec, closure;
1029
1030   /* Args: R1 = invariant closure */
1031   MAYBE_GC (R1_PTR, stg_checkzh);
1032
1033   trec = StgTSO_trec(CurrentTSO);
1034   closure = R1;
1035   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
1036                                      trec "ptr",
1037                                      closure "ptr") [];
1038
1039   jump %ENTRY_CODE(Sp(0));
1040 }
1041
1042
1043 stg_newTVarzh
1044 {
1045   W_ tv;
1046   W_ new_value;
1047
1048   /* Args: R1 = initialisation value */
1049
1050   MAYBE_GC (R1_PTR, stg_newTVarzh);
1051   new_value = R1;
1052   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1053   RET_P(tv);
1054 }
1055
1056
1057 stg_readTVarzh
1058 {
1059   W_ trec;
1060   W_ tvar;
1061   W_ result;
1062
1063   /* Args: R1 = TVar closure */
1064
1065   MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
1066   trec = StgTSO_trec(CurrentTSO);
1067   tvar = R1;
1068   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1069
1070   RET_P(result);
1071 }
1072
1073 stg_readTVarIOzh
1074 {
1075     W_ result;
1076
1077 again:
1078     result = StgTVar_current_value(R1);
1079     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1080         goto again;
1081     }
1082     RET_P(result);
1083 }
1084
1085 stg_writeTVarzh
1086 {
1087   W_ trec;
1088   W_ tvar;
1089   W_ new_value;
1090
1091   /* Args: R1 = TVar closure */
1092   /*       R2 = New value    */
1093
1094   MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
1095   trec = StgTSO_trec(CurrentTSO);
1096   tvar = R1;
1097   new_value = R2;
1098   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1099
1100   jump %ENTRY_CODE(Sp(0));
1101 }
1102
1103
1104 /* -----------------------------------------------------------------------------
1105  * MVar primitives
1106  *
1107  * take & putMVar work as follows.  Firstly, an important invariant:
1108  *
1109  *    If the MVar is full, then the blocking queue contains only
1110  *    threads blocked on putMVar, and if the MVar is empty then the
1111  *    blocking queue contains only threads blocked on takeMVar.
1112  *
1113  * takeMvar:
1114  *    MVar empty : then add ourselves to the blocking queue
1115  *    MVar full  : remove the value from the MVar, and
1116  *                 blocking queue empty     : return
1117  *                 blocking queue non-empty : perform the first blocked putMVar
1118  *                                            from the queue, and wake up the
1119  *                                            thread (MVar is now full again)
1120  *
1121  * putMVar is just the dual of the above algorithm.
1122  *
1123  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1124  * the stack of the thread waiting to do the putMVar.  See
1125  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1126  * the stack layout, and the PerformPut and PerformTake macros below.
1127  *
1128  * It is important that a blocked take or put is woken up with the
1129  * take/put already performed, because otherwise there would be a
1130  * small window of vulnerability where the thread could receive an
1131  * exception and never perform its take or put, and we'd end up with a
1132  * deadlock.
1133  *
1134  * -------------------------------------------------------------------------- */
1135
1136 stg_isEmptyMVarzh
1137 {
1138     /* args: R1 = MVar closure */
1139
1140     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1141         RET_N(1);
1142     } else {
1143         RET_N(0);
1144     }
1145 }
1146
1147 stg_newMVarzh
1148 {
1149     /* args: none */
1150     W_ mvar;
1151
1152     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
1153
1154     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1155     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1156         // MVARs start dirty: generation 0 has no mutable list
1157     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1158     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1159     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1160     RET_P(mvar);
1161 }
1162
1163
1164 #define PerformTake(stack, value)               \
1165     W_ sp;                                      \
1166     sp = StgStack_sp(stack);                    \
1167     W_[sp + WDS(1)] = value;                    \
1168     W_[sp + WDS(0)] = stg_gc_unpt_r1_info;
1169
1170 #define PerformPut(stack,lval)                  \
1171     W_ sp;                                      \
1172     sp = StgStack_sp(stack) + WDS(3);           \
1173     StgStack_sp(stack) = sp;                    \
1174     lval = W_[sp - WDS(1)];
1175
1176 stg_takeMVarzh
1177 {
1178     W_ mvar, val, info, tso, q;
1179
1180     /* args: R1 = MVar closure */
1181     mvar = R1;
1182
1183 #if defined(THREADED_RTS)
1184     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1185 #else
1186     info = GET_INFO(mvar);
1187 #endif
1188
1189     if (info == stg_MVAR_CLEAN_info) {
1190         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1191     }
1192
1193     /* If the MVar is empty, put ourselves on its blocking queue,
1194      * and wait until we're woken up.
1195      */
1196     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1197
1198         // Note [mvar-heap-check] We want to do the heap check in the
1199         // branch here, to avoid the conditional in the common case.
1200         // However, we've already locked the MVar above, so we better
1201         // be careful to unlock it again if the the heap check fails.
1202         // Unfortunately we don't have an easy way to inject any code
1203         // into the heap check generated by the code generator, so we
1204         // have to do it in stg_gc_gen (see HeapStackCheck.cmm).
1205         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
1206
1207         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1208
1209         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1210         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1211         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1212
1213         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1214             StgMVar_head(mvar) = q;
1215         } else {
1216             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1217             foreign "C" recordClosureMutated(MyCapability() "ptr",
1218                                              StgMVar_tail(mvar)) [];
1219         }
1220         StgTSO__link(CurrentTSO)       = q;
1221         StgTSO_block_info(CurrentTSO)  = mvar;
1222         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1223         StgMVar_tail(mvar)             = q;
1224
1225         R1 = mvar;
1226         jump stg_block_takemvar;
1227     }
1228
1229     /* we got the value... */
1230     val = StgMVar_value(mvar);
1231
1232     q = StgMVar_head(mvar);
1233 loop:
1234     if (q == stg_END_TSO_QUEUE_closure) {
1235         /* No further putMVars, MVar is now empty */
1236         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1237         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1238         RET_P(val);
1239     }
1240     if (StgHeader_info(q) == stg_IND_info ||
1241         StgHeader_info(q) == stg_MSG_NULL_info) {
1242         q = StgInd_indirectee(q);
1243         goto loop;
1244     }
1245
1246     // There are putMVar(s) waiting... wake up the first thread on the queue
1247
1248     tso = StgMVarTSOQueue_tso(q);
1249     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1250     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1251         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1252     }
1253
1254     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1255     ASSERT(StgTSO_block_info(tso) == mvar);
1256
1257     // actually perform the putMVar for the thread that we just woke up
1258     W_ stack;
1259     stack = StgTSO_stackobj(tso);
1260     PerformPut(stack, StgMVar_value(mvar));
1261
1262     // indicate that the MVar operation has now completed.
1263     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1264
1265     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1266
1267     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1268
1269     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1270     RET_P(val);
1271 }
1272
1273
1274 stg_tryTakeMVarzh
1275 {
1276     W_ mvar, val, info, tso, q;
1277
1278     /* args: R1 = MVar closure */
1279     mvar = R1;
1280
1281 #if defined(THREADED_RTS)
1282     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1283 #else
1284     info = GET_INFO(mvar);
1285 #endif
1286
1287     /* If the MVar is empty, put ourselves on its blocking queue,
1288      * and wait until we're woken up.
1289      */
1290     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1291 #if defined(THREADED_RTS)
1292         unlockClosure(mvar, info);
1293 #endif
1294         /* HACK: we need a pointer to pass back,
1295          * so we abuse NO_FINALIZER_closure
1296          */
1297         RET_NP(0, stg_NO_FINALIZER_closure);
1298     }
1299
1300     if (info == stg_MVAR_CLEAN_info) {
1301         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1302     }
1303
1304     /* we got the value... */
1305     val = StgMVar_value(mvar);
1306
1307     q = StgMVar_head(mvar);
1308 loop:
1309     if (q == stg_END_TSO_QUEUE_closure) {
1310         /* No further putMVars, MVar is now empty */
1311         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1312         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1313         RET_NP(1, val);
1314     }
1315     if (StgHeader_info(q) == stg_IND_info ||
1316         StgHeader_info(q) == stg_MSG_NULL_info) {
1317         q = StgInd_indirectee(q);
1318         goto loop;
1319     }
1320
1321     // There are putMVar(s) waiting... wake up the first thread on the queue
1322
1323     tso = StgMVarTSOQueue_tso(q);
1324     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1325     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1326         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1327     }
1328
1329     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1330     ASSERT(StgTSO_block_info(tso) == mvar);
1331
1332     // actually perform the putMVar for the thread that we just woke up
1333     W_ stack;
1334     stack = StgTSO_stackobj(tso);
1335     PerformPut(stack, StgMVar_value(mvar));
1336
1337     // indicate that the MVar operation has now completed.
1338     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1339
1340     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1341
1342     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1343
1344     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1345     RET_NP(1,val);
1346 }
1347
1348
1349 stg_putMVarzh
1350 {
1351     W_ mvar, val, info, tso, q;
1352
1353     /* args: R1 = MVar, R2 = value */
1354     mvar = R1;
1355     val  = R2;
1356
1357 #if defined(THREADED_RTS)
1358     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1359 #else
1360     info = GET_INFO(mvar);
1361 #endif
1362
1363     if (info == stg_MVAR_CLEAN_info) {
1364         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1365     }
1366
1367     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1368
1369         // see Note [mvar-heap-check] above
1370         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
1371
1372         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1373
1374         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1375         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1376         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1377
1378         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1379             StgMVar_head(mvar) = q;
1380         } else {
1381             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1382             foreign "C" recordClosureMutated(MyCapability() "ptr",
1383                                              StgMVar_tail(mvar)) [];
1384         }
1385         StgTSO__link(CurrentTSO)       = q;
1386         StgTSO_block_info(CurrentTSO)  = mvar;
1387         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1388         StgMVar_tail(mvar)             = q;
1389
1390         R1 = mvar;
1391         R2 = val;
1392         jump stg_block_putmvar;
1393     }
1394
1395     q = StgMVar_head(mvar);
1396 loop:
1397     if (q == stg_END_TSO_QUEUE_closure) {
1398         /* No further takes, the MVar is now full. */
1399         StgMVar_value(mvar) = val;
1400         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1401         jump %ENTRY_CODE(Sp(0));
1402     }
1403     if (StgHeader_info(q) == stg_IND_info ||
1404         StgHeader_info(q) == stg_MSG_NULL_info) {
1405         q = StgInd_indirectee(q);
1406         goto loop;
1407     }
1408
1409     // There are takeMVar(s) waiting: wake up the first one
1410
1411     tso = StgMVarTSOQueue_tso(q);
1412     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1413     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1414         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1415     }
1416
1417     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1418     ASSERT(StgTSO_block_info(tso) == mvar);
1419
1420     // actually perform the takeMVar
1421     W_ stack;
1422     stack = StgTSO_stackobj(tso);
1423     PerformTake(stack, val);
1424
1425     // indicate that the MVar operation has now completed.
1426     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1427
1428     if (TO_W_(StgStack_dirty(stack)) == 0) {
1429         foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
1430     }
1431
1432     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1433
1434     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1435     jump %ENTRY_CODE(Sp(0));
1436 }
1437
1438
1439 stg_tryPutMVarzh
1440 {
1441     W_ mvar, val, info, tso, q;
1442
1443     /* args: R1 = MVar, R2 = value */
1444     mvar = R1;
1445     val  = R2;
1446
1447 #if defined(THREADED_RTS)
1448     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1449 #else
1450     info = GET_INFO(mvar);
1451 #endif
1452
1453     if (info == stg_MVAR_CLEAN_info) {
1454         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1455     }
1456
1457     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1458 #if defined(THREADED_RTS)
1459         unlockClosure(mvar, info);
1460 #endif
1461         RET_N(0);
1462     }
1463
1464     q = StgMVar_head(mvar);
1465 loop:
1466     if (q == stg_END_TSO_QUEUE_closure) {
1467         /* No further takes, the MVar is now full. */
1468         StgMVar_value(mvar) = val;
1469         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1470         RET_N(1);
1471     }
1472     if (StgHeader_info(q) == stg_IND_info ||
1473         StgHeader_info(q) == stg_MSG_NULL_info) {
1474         q = StgInd_indirectee(q);
1475         goto loop;
1476     }
1477
1478     // There are takeMVar(s) waiting: wake up the first one
1479
1480     tso = StgMVarTSOQueue_tso(q);
1481     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1482     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1483         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1484     }
1485
1486     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1487     ASSERT(StgTSO_block_info(tso) == mvar);
1488
1489     // actually perform the takeMVar
1490     W_ stack;
1491     stack = StgTSO_stackobj(tso);
1492     PerformTake(stack, val);
1493
1494     // indicate that the MVar operation has now completed.
1495     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1496
1497     if (TO_W_(StgStack_dirty(stack)) == 0) {
1498         foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
1499     }
1500
1501     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1502
1503     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1504     RET_N(1);
1505 }
1506
1507
1508 /* -----------------------------------------------------------------------------
1509    Stable pointer primitives
1510    -------------------------------------------------------------------------  */
1511
1512 stg_makeStableNamezh
1513 {
1514     W_ index, sn_obj;
1515
1516     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
1517
1518     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1519
1520     /* Is there already a StableName for this heap object?
1521      *  stable_ptr_table is a pointer to an array of snEntry structs.
1522      */
1523     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1524         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1525         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1526         StgStableName_sn(sn_obj) = index;
1527         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1528     } else {
1529         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1530     }
1531
1532     RET_P(sn_obj);
1533 }
1534
1535
1536 stg_makeStablePtrzh
1537 {
1538     /* Args: R1 = a */
1539     W_ sp;
1540     MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
1541     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1542     RET_N(sp);
1543 }
1544
1545 stg_deRefStablePtrzh
1546 {
1547     /* Args: R1 = the stable ptr */
1548     W_ r, sp;
1549     sp = R1;
1550     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1551     RET_P(r);
1552 }
1553
1554 /* -----------------------------------------------------------------------------
1555    Bytecode object primitives
1556    -------------------------------------------------------------------------  */
1557
1558 stg_newBCOzh
1559 {
1560     /* R1 = instrs
1561        R2 = literals
1562        R3 = ptrs
1563        R4 = arity
1564        R5 = bitmap array
1565     */
1566     W_ bco, bitmap_arr, bytes, words;
1567
1568     bitmap_arr = R5;
1569
1570     words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
1571     bytes = WDS(words);
1572
1573     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
1574
1575     bco = Hp - bytes + WDS(1);
1576     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1577
1578     StgBCO_instrs(bco)     = R1;
1579     StgBCO_literals(bco)   = R2;
1580     StgBCO_ptrs(bco)       = R3;
1581     StgBCO_arity(bco)      = HALF_W_(R4);
1582     StgBCO_size(bco)       = HALF_W_(words);
1583
1584     // Copy the arity/bitmap info into the BCO
1585     W_ i;
1586     i = 0;
1587 for:
1588     if (i < BYTE_ARR_WDS(bitmap_arr)) {
1589         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1590         i = i + 1;
1591         goto for;
1592     }
1593
1594     RET_P(bco);
1595 }
1596
1597
1598 stg_mkApUpd0zh
1599 {
1600     // R1 = the BCO# for the AP
1601     //
1602     W_ ap;
1603
1604     // This function is *only* used to wrap zero-arity BCOs in an
1605     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1606     // saturated and always points directly to a FUN or BCO.
1607     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1608            StgBCO_arity(R1) == HALF_W_(0));
1609
1610     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
1611     TICK_ALLOC_UP_THK(0, 0);
1612     CCCS_ALLOC(SIZEOF_StgAP);
1613
1614     ap = Hp - SIZEOF_StgAP + WDS(1);
1615     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1616
1617     StgAP_n_args(ap) = HALF_W_(0);
1618     StgAP_fun(ap) = R1;
1619
1620     RET_P(ap);
1621 }
1622
1623 stg_unpackClosurezh
1624 {
1625 /* args: R1 = closure to analyze */
1626 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1627
1628     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1629     info  = %GET_STD_INFO(UNTAG(R1));
1630
1631     // Some closures have non-standard layout, so we omit those here.
1632     W_ type;
1633     type = TO_W_(%INFO_TYPE(info));
1634     switch [0 .. N_CLOSURE_TYPES] type {
1635     case THUNK_SELECTOR : {
1636         ptrs = 1;
1637         nptrs = 0;
1638         goto out;
1639     }
1640     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
1641          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1642         ptrs = 0;
1643         nptrs = 0;
1644         goto out;
1645     }
1646     default: {
1647         ptrs  = TO_W_(%INFO_PTRS(info));
1648         nptrs = TO_W_(%INFO_NPTRS(info));
1649         goto out;
1650     }}
1651 out:
1652
1653     W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
1654     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1655     ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
1656     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
1657
1658     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
1659
1660     W_ clos;
1661     clos = UNTAG(R1);
1662
1663     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1664     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1665
1666     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1667     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1668     StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
1669
1670     p = 0;
1671 for:
1672     if(p < ptrs) {
1673          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1674          p = p + 1;
1675          goto for;
1676     }
1677     /* We can leave the card table uninitialised, since the array is
1678        allocated in the nursery.  The GC will fill it in if/when the array
1679        is promoted. */
1680
1681     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1682     StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
1683     p = 0;
1684 for2:
1685     if(p < nptrs) {
1686          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1687          p = p + 1;
1688          goto for2;
1689     }
1690     RET_NPP(info, ptrs_arr, nptrs_arr);
1691 }
1692
1693 /* -----------------------------------------------------------------------------
1694    Thread I/O blocking primitives
1695    -------------------------------------------------------------------------- */
1696
1697 /* Add a thread to the end of the blocked queue. (C-- version of the C
1698  * macro in Schedule.h).
1699  */
1700 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1701     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
1702     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1703       W_[blocked_queue_hd] = tso;                       \
1704     } else {                                            \
1705       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
1706     }                                                   \
1707     W_[blocked_queue_tl] = tso;
1708
1709 stg_waitReadzh
1710 {
1711     /* args: R1 */
1712 #ifdef THREADED_RTS
1713     foreign "C" barf("waitRead# on threaded RTS") never returns;
1714 #else
1715
1716     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1717     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1718     StgTSO_block_info(CurrentTSO) = R1;
1719     // No locking - we're not going to use this interface in the
1720     // threaded RTS anyway.
1721     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1722     jump stg_block_noregs;
1723 #endif
1724 }
1725
1726 stg_waitWritezh
1727 {
1728     /* args: R1 */
1729 #ifdef THREADED_RTS
1730     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1731 #else
1732
1733     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1734     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1735     StgTSO_block_info(CurrentTSO) = R1;
1736     // No locking - we're not going to use this interface in the
1737     // threaded RTS anyway.
1738     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1739     jump stg_block_noregs;
1740 #endif
1741 }
1742
1743
1744 STRING(stg_delayzh_malloc_str, "stg_delayzh")
1745 stg_delayzh
1746 {
1747 #ifdef mingw32_HOST_OS
1748     W_ ares;
1749     CInt reqID;
1750 #else
1751     W_ t, prev, target;
1752 #endif
1753
1754 #ifdef THREADED_RTS
1755     foreign "C" barf("delay# on threaded RTS") never returns;
1756 #else
1757
1758     /* args: R1 (microsecond delay amount) */
1759     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1760     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1761
1762 #ifdef mingw32_HOST_OS
1763
1764     /* could probably allocate this on the heap instead */
1765     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1766                                             stg_delayzh_malloc_str);
1767     (reqID) = foreign "C" addDelayRequest(R1);
1768     StgAsyncIOResult_reqID(ares)   = reqID;
1769     StgAsyncIOResult_len(ares)     = 0;
1770     StgAsyncIOResult_errCode(ares) = 0;
1771     StgTSO_block_info(CurrentTSO)  = ares;
1772
1773     /* Having all async-blocked threads reside on the blocked_queue
1774      * simplifies matters, so change the status to OnDoProc put the
1775      * delayed thread on the blocked_queue.
1776      */
1777     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1778     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1779     jump stg_block_async_void;
1780
1781 #else
1782
1783     W_ time;
1784     W_ divisor;
1785     (time) = foreign "C" getourtimeofday() [R1];
1786     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
1787     if (divisor == 0) {
1788         divisor = 50;
1789     }
1790     divisor = divisor * 1000;
1791     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
1792            + time + 1; /* Add 1 as getourtimeofday rounds down */
1793     StgTSO_block_info(CurrentTSO) = target;
1794
1795     /* Insert the new thread in the sleeping queue. */
1796     prev = NULL;
1797     t = W_[sleeping_queue];
1798 while:
1799     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1800         prev = t;
1801         t = StgTSO__link(t);
1802         goto while;
1803     }
1804
1805     StgTSO__link(CurrentTSO) = t;
1806     if (prev == NULL) {
1807         W_[sleeping_queue] = CurrentTSO;
1808     } else {
1809         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
1810     }
1811     jump stg_block_noregs;
1812 #endif
1813 #endif /* !THREADED_RTS */
1814 }
1815
1816
1817 #ifdef mingw32_HOST_OS
1818 STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
1819 stg_asyncReadzh
1820 {
1821     W_ ares;
1822     CInt reqID;
1823
1824 #ifdef THREADED_RTS
1825     foreign "C" barf("asyncRead# on threaded RTS") never returns;
1826 #else
1827
1828     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1829     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1830     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1831
1832     /* could probably allocate this on the heap instead */
1833     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1834                                             stg_asyncReadzh_malloc_str)
1835                         [R1,R2,R3,R4];
1836     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
1837     StgAsyncIOResult_reqID(ares)   = reqID;
1838     StgAsyncIOResult_len(ares)     = 0;
1839     StgAsyncIOResult_errCode(ares) = 0;
1840     StgTSO_block_info(CurrentTSO)  = ares;
1841     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1842     jump stg_block_async;
1843 #endif
1844 }
1845
1846 STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
1847 stg_asyncWritezh
1848 {
1849     W_ ares;
1850     CInt reqID;
1851
1852 #ifdef THREADED_RTS
1853     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
1854 #else
1855
1856     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1857     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1858     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1859
1860     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1861                                             stg_asyncWritezh_malloc_str)
1862                         [R1,R2,R3,R4];
1863     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
1864
1865     StgAsyncIOResult_reqID(ares)   = reqID;
1866     StgAsyncIOResult_len(ares)     = 0;
1867     StgAsyncIOResult_errCode(ares) = 0;
1868     StgTSO_block_info(CurrentTSO)  = ares;
1869     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1870     jump stg_block_async;
1871 #endif
1872 }
1873
1874 STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
1875 stg_asyncDoProczh
1876 {
1877     W_ ares;
1878     CInt reqID;
1879
1880 #ifdef THREADED_RTS
1881     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
1882 #else
1883
1884     /* args: R1 = proc, R2 = param */
1885     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1886     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1887
1888     /* could probably allocate this on the heap instead */
1889     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1890                                             stg_asyncDoProczh_malloc_str)
1891                                 [R1,R2];
1892     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
1893     StgAsyncIOResult_reqID(ares)   = reqID;
1894     StgAsyncIOResult_len(ares)     = 0;
1895     StgAsyncIOResult_errCode(ares) = 0;
1896     StgTSO_block_info(CurrentTSO) = ares;
1897     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1898     jump stg_block_async;
1899 #endif
1900 }
1901 #endif
1902
1903 /* -----------------------------------------------------------------------------
1904  * noDuplicate#
1905  *
1906  * noDuplicate# tries to ensure that none of the thunks under
1907  * evaluation by the current thread are also under evaluation by
1908  * another thread.  It relies on *both* threads doing noDuplicate#;
1909  * the second one will get blocked if they are duplicating some work.
1910  *
1911  * The idea is that noDuplicate# is used within unsafePerformIO to
1912  * ensure that the IO operation is performed at most once.
1913  * noDuplicate# calls threadPaused which acquires an exclusive lock on
1914  * all the thunks currently under evaluation by the current thread.
1915  *
1916  * Consider the following scenario.  There is a thunk A, whose
1917  * evaluation requires evaluating thunk B, where thunk B is an
1918  * unsafePerformIO.  Two threads, 1 and 2, bother enter A.  Thread 2
1919  * is pre-empted before it enters B, and claims A by blackholing it
1920  * (in threadPaused).  Thread 1 now enters B, and calls noDuplicate#.
1921  *
1922  *      thread 1                      thread 2
1923  *   +-----------+                 +---------------+
1924  *   |    -------+-----> A <-------+-------        |
1925  *   |  update   |   BLACKHOLE     | marked_update |
1926  *   +-----------+                 +---------------+
1927  *   |           |                 |               |
1928  *        ...                             ...
1929  *   |           |                 +---------------+
1930  *   +-----------+
1931  *   |     ------+-----> B
1932  *   |  update   |   BLACKHOLE
1933  *   +-----------+
1934  *
1935  * At this point: A is a blackhole, owned by thread 2.  noDuplicate#
1936  * calls threadPaused, which walks up the stack and
1937  *  - claims B on behalf of thread 1
1938  *  - then it reaches the update frame for A, which it sees is already
1939  *    a BLACKHOLE and is therefore owned by another thread.  Since
1940  *    thread 1 is duplicating work, the computation up to the update
1941  *    frame for A is suspended, including thunk B.
1942  *  - thunk B, which is an unsafePerformIO, has now been reverted to
1943  *    an AP_STACK which could be duplicated - BAD!
1944  *  - The solution is as follows: before calling threadPaused, we
1945  *    leave a frame on the stack (stg_noDuplicate_info) that will call
1946  *    noDuplicate# again if the current computation is suspended and
1947  *    restarted.
1948  *
1949  * See the test program in concurrent/prog003 for a way to demonstrate
1950  * this.  It needs to be run with +RTS -N3 or greater, and the bug
1951  * only manifests occasionally (once very 10 runs or so).
1952  * -------------------------------------------------------------------------- */
1953
1954 INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
1955 {
1956     Sp_adj(1);
1957     jump stg_noDuplicatezh;
1958 }
1959
1960 stg_noDuplicatezh
1961 {
1962     STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
1963     // leave noDuplicate frame in case the current
1964     // computation is suspended and restarted (see above).
1965     Sp_adj(-1);
1966     Sp(0) = stg_noDuplicate_info;
1967
1968     SAVE_THREAD_STATE();
1969     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1970     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
1971
1972     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
1973         jump stg_threadFinished;
1974     } else {
1975         LOAD_THREAD_STATE();
1976         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1977         // remove the stg_noDuplicate frame if it is still there.
1978         if (Sp(0) == stg_noDuplicate_info) {
1979             Sp_adj(1);
1980         }
1981         jump %ENTRY_CODE(Sp(0));
1982     }
1983 }
1984
1985 /* -----------------------------------------------------------------------------
1986    Misc. primitives
1987    -------------------------------------------------------------------------- */
1988
1989 stg_getApStackValzh
1990 {
1991    W_ ap_stack, offset, val, ok;
1992
1993    /* args: R1 = AP_STACK, R2 = offset */
1994    ap_stack = R1;
1995    offset   = R2;
1996
1997    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
1998         ok = 1;
1999         val = StgAP_STACK_payload(ap_stack,offset);
2000    } else {
2001         ok = 0;
2002         val = R1;
2003    }
2004    RET_NP(ok,val);
2005 }
2006
2007 // Write the cost center stack of the first argument on stderr; return
2008 // the second.  Possibly only makes sense for already evaluated
2009 // things?
2010 stg_traceCcszh
2011 {
2012     W_ ccs;
2013
2014 #ifdef PROFILING
2015     ccs = StgHeader_ccs(UNTAG(R1));
2016     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2017 #endif
2018
2019     R1 = R2;
2020     ENTER();
2021 }
2022
2023 stg_getSparkzh
2024 {
2025    W_ spark;
2026
2027 #ifndef THREADED_RTS
2028    RET_NP(0,ghczmprim_GHCziTypes_False_closure);
2029 #else
2030    (spark) = foreign "C" findSpark(MyCapability());
2031    if (spark != 0) {
2032       RET_NP(1,spark);
2033    } else {
2034       RET_NP(0,ghczmprim_GHCziTypes_False_closure);
2035    }
2036 #endif
2037 }
2038
2039 stg_numSparkszh
2040 {
2041   W_ n;
2042 #ifdef THREADED_RTS
2043   (n) = foreign "C" dequeElements(Capability_sparks(MyCapability()));
2044 #else
2045   n = 0;
2046 #endif
2047   RET_N(n);
2048 }
2049
2050 stg_traceEventzh
2051 {
2052    W_ msg;
2053    msg = R1;
2054
2055 #if defined(TRACING) || defined(DEBUG)
2056
2057    foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
2058
2059 #elif defined(DTRACE)
2060
2061    W_ enabled;
2062
2063    // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
2064    // RtsProbes.h, but that header file includes unistd.h, which doesn't
2065    // work in Cmm
2066 #if !defined(solaris2_TARGET_OS)
2067    (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
2068 #else
2069    // Solaris' DTrace can't handle the
2070    //     __dtrace_isenabled$HaskellEvent$user__msg$v1
2071    // call above. This call is just for testing whether the user__msg
2072    // probe is enabled, and is here for just performance optimization.
2073    // Since preparation for the probe is not that complex I disable usage of
2074    // this test above for Solaris and enable the probe usage manually
2075    // here. Please note that this does not mean that the probe will be
2076    // used during the runtime! You still need to enable it by consumption
2077    // in your dtrace script as you do with any other probe.
2078    enabled = 1;
2079 #endif
2080    if (enabled != 0) {
2081      foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
2082    }
2083
2084 #endif
2085    jump %ENTRY_CODE(Sp(0));
2086 }