rts/PrimOps.cmm

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Out-of-line primitive operations
   6  *
   7  * This file contains the implementations of all the primitive
   8  * operations ("primops") which are not expanded inline.  See
   9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
  10  * this file contains code for most of those with the attribute
  11  * out_of_line=True.
  12  *
  13  * Entry convention: the entry convention for a primop is that all the
  14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
  15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
  16  *
  17  * Return convention: results from a primop are generally returned
  18  * using the ordinary unboxed tuple return convention.  The C-- parser
  19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
  20  * based on the prevailing return convention.
  21  *
  22  * This file is written in a subset of C--, extended with various
  23  * features specific to GHC.  It is compiled by GHC directly.  For the
  24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
  25  *
  26  * ---------------------------------------------------------------------------*/
  27
  28 #include "Cmm.h"
  29
  30 #ifdef __PIC__
  31 import pthread_mutex_lock;
  32 import pthread_mutex_unlock;
  33 #endif
  34 import base_ControlziExceptionziBase_nestedAtomically_closure;
  35 import EnterCriticalSection;
  36 import LeaveCriticalSection;
  37 import ghczmprim_GHCziBool_False_closure;
  38 #if !defined(mingw32_HOST_OS)
  39 import sm_mutex;
  40 #endif
  41
  42 /*-----------------------------------------------------------------------------
  43   Array Primitives
  44
  45   Basically just new*Array - the others are all inline macros.
  46
  47   The size arg is always passed in R1, and the result returned in R1.
  48
  49   The slow entry point is for returning from a heap check, the saved
  50   size argument must be re-loaded from the stack.
  51   -------------------------------------------------------------------------- */
  52
  53 /* for objects that are *less* than the size of a word, make sure we
  54  * round up to the nearest word for the size of the array.
  55  */
  56
  57 stg_newByteArrayzh
  58 {
  59     W_ words, payload_words, n, p;
  60     MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
  61     n = R1;
  62     payload_words = ROUNDUP_BYTES_TO_WDS(n);
  63     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
  64     ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
  65     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  66     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  67     StgArrWords_bytes(p) = n;
  68     RET_P(p);
  69 }
  70
  71 #define BA_ALIGN 16
  72 #define BA_MASK  (BA_ALIGN-1)
  73
  74 stg_newPinnedByteArrayzh
  75 {
  76     W_ words, n, bytes, payload_words, p;
  77
  78     MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
  79     n = R1;
  80     bytes = n;
  81     /* payload_words is what we will tell the profiler we had to allocate */
  82     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
  83     /* When we actually allocate memory, we need to allow space for the
  84        header: */
  85     bytes = bytes + SIZEOF_StgArrWords;
  86     /* And we want to align to BA_ALIGN bytes, so we need to allow space
  87        to shift up to BA_ALIGN - 1 bytes: */
  88     bytes = bytes + BA_ALIGN - 1;
  89     /* Now we convert to a number of words: */
  90     words = ROUNDUP_BYTES_TO_WDS(bytes);
  91
  92     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
  93     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  94
  95     /* Now we need to move p forward so that the payload is aligned
  96        to BA_ALIGN bytes: */
  97     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
  98
  99     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 100     StgArrWords_bytes(p) = n;
 101     RET_P(p);
 102 }
 103
 104 stg_newAlignedPinnedByteArrayzh
 105 {
 106     W_ words, n, bytes, payload_words, p, alignment;
 107
 108     MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
 109     n = R1;
 110     alignment = R2;
 111
 112     bytes = n;
 113
 114     /* payload_words is what we will tell the profiler we had to allocate */
 115     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
 116
 117     /* When we actually allocate memory, we need to allow space for the
 118        header: */
 119     bytes = bytes + SIZEOF_StgArrWords;
 120     /* And we want to align to <alignment> bytes, so we need to allow space
 121        to shift up to <alignment - 1> bytes: */
 122     bytes = bytes + alignment - 1;
 123     /* Now we convert to a number of words: */
 124     words = ROUNDUP_BYTES_TO_WDS(bytes);
 125
 126     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
 127     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 128
 129     /* Now we need to move p forward so that the payload is aligned
 130        to <alignment> bytes. Note that we are assuming that
 131        <alignment> is a power of 2, which is technically not guaranteed */
 132     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 133
 134     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 135     StgArrWords_bytes(p) = n;
 136     RET_P(p);
 137 }
 138
 139 stg_newArrayzh
 140 {
 141     W_ words, n, init, arr, p, size;
 142     /* Args: R1 = words, R2 = initialisation value */
 143
 144     n = R1;
 145     MAYBE_GC(R2_PTR,stg_newArrayzh);
 146
 147     // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
 148     // in the array, making sure we round up, and then rounding up to a whole
 149     // number of words.
 150     size = n + mutArrPtrsCardWords(n);
 151     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
 152     ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
 153     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
 154
 155     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
 156     StgMutArrPtrs_ptrs(arr) = n;
 157     StgMutArrPtrs_size(arr) = size;
 158
 159     // Initialise all elements of the the array with the value in R2
 160     init = R2;
 161     p = arr + SIZEOF_StgMutArrPtrs;
 162   for:
 163     if (p < arr + WDS(words)) {
 164         W_[p] = init;
 165         p = p + WDS(1);
 166         goto for;
 167     }
 168     // Initialise the mark bits with 0
 169   for2:
 170     if (p < arr + WDS(size)) {
 171         W_[p] = 0;
 172         p = p + WDS(1);
 173         goto for2;
 174     }
 175
 176     RET_P(arr);
 177 }
 178
 179 stg_unsafeThawArrayzh
 180 {
 181   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
 182   //
 183   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
 184   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
 185   // it on the mutable list for the GC to remove (removing something from
 186   // the mutable list is not easy).
 187   //
 188   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
 189   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
 190   // to indicate that it is still on the mutable list.
 191   //
 192   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
 193   // either it is on a mut_list, or it isn't.  We adopt the convention that
 194   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
 195   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
 196   // we put it on the mutable list more than once, but it would get scavenged
 197   // multiple times during GC, which would be unnecessarily slow.
 198   //
 199   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
 200         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 201         recordMutable(R1, R1);
 202         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
 203         RET_P(R1);
 204   } else {
 205         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 206         RET_P(R1);
 207   }
 208 }
 209
 210 /* -----------------------------------------------------------------------------
 211    MutVar primitives
 212    -------------------------------------------------------------------------- */
 213
 214 stg_newMutVarzh
 215 {
 216     W_ mv;
 217     /* Args: R1 = initialisation value */
 218
 219     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
 220
 221     mv = Hp - SIZEOF_StgMutVar + WDS(1);
 222     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
 223     StgMutVar_var(mv) = R1;
 224
 225     RET_P(mv);
 226 }
 227
 228 stg_atomicModifyMutVarzh
 229 {
 230     W_ mv, f, z, x, y, r, h;
 231     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
 232
 233     /* If x is the current contents of the MutVar#, then
 234        We want to make the new contents point to
 235
 236          (sel_0 (f x))
 237
 238        and the return value is
 239
 240          (sel_1 (f x))
 241
 242         obviously we can share (f x).
 243
 244          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
 245          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
 246          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
 247     */
 248
 249 #if MIN_UPD_SIZE > 1
 250 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 251 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
 252 #else
 253 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
 254 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
 255 #endif
 256
 257 #if MIN_UPD_SIZE > 2
 258 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 259 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
 260 #else
 261 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
 262 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 263 #endif
 264
 265 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
 266
 267    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
 268
 269    mv = R1;
 270    f = R2;
 271
 272    TICK_ALLOC_THUNK_2();
 273    CCCS_ALLOC(THUNK_2_SIZE);
 274    z = Hp - THUNK_2_SIZE + WDS(1);
 275    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
 276    LDV_RECORD_CREATE(z);
 277    StgThunk_payload(z,0) = f;
 278
 279    TICK_ALLOC_THUNK_1();
 280    CCCS_ALLOC(THUNK_1_SIZE);
 281    y = z - THUNK_1_SIZE;
 282    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
 283    LDV_RECORD_CREATE(y);
 284    StgThunk_payload(y,0) = z;
 285
 286    TICK_ALLOC_THUNK_1();
 287    CCCS_ALLOC(THUNK_1_SIZE);
 288    r = y - THUNK_1_SIZE;
 289    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
 290    LDV_RECORD_CREATE(r);
 291    StgThunk_payload(r,0) = z;
 292
 293  retry:
 294    x = StgMutVar_var(mv);
 295    StgThunk_payload(z,1) = x;
 296 #ifdef THREADED_RTS
 297    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
 298    if (h != x) { goto retry; }
 299 #else
 300    StgMutVar_var(mv) = y;
 301 #endif
 302
 303    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
 304      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
 305    }
 306
 307    RET_P(r);
 308 }
 309
 310 /* -----------------------------------------------------------------------------
 311    Weak Pointer Primitives
 312    -------------------------------------------------------------------------- */
 313
 314 STRING(stg_weak_msg,"New weak pointer at %p\n")
 315
 316 stg_mkWeakzh
 317 {
 318   /* R1 = key
 319      R2 = value
 320      R3 = finalizer (or NULL)
 321   */
 322   W_ w;
 323
 324   if (R3 == NULL) {
 325     R3 = stg_NO_FINALIZER_closure;
 326   }
 327
 328   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
 329
 330   w = Hp - SIZEOF_StgWeak + WDS(1);
 331   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 332
 333   // We don't care about cfinalizer here.
 334   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
 335   // something else?
 336
 337   StgWeak_key(w)        = R1;
 338   StgWeak_value(w)      = R2;
 339   StgWeak_finalizer(w)  = R3;
 340   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
 341
 342   ACQUIRE_LOCK(sm_mutex);
 343   StgWeak_link(w)       = W_[weak_ptr_list];
 344   W_[weak_ptr_list]     = w;
 345   RELEASE_LOCK(sm_mutex);
 346
 347   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 348
 349   RET_P(w);
 350 }
 351
 352 stg_mkWeakForeignEnvzh
 353 {
 354   /* R1 = key
 355      R2 = value
 356      R3 = finalizer
 357      R4 = pointer
 358      R5 = has environment (0 or 1)
 359      R6 = environment
 360   */
 361   W_ w, payload_words, words, p;
 362
 363   W_ key, val, fptr, ptr, flag, eptr;
 364
 365   key  = R1;
 366   val  = R2;
 367   fptr = R3;
 368   ptr  = R4;
 369   flag = R5;
 370   eptr = R6;
 371
 372   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
 373
 374   w = Hp - SIZEOF_StgWeak + WDS(1);
 375   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 376
 377   payload_words = 4;
 378   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
 379   ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
 380
 381   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 382   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 383
 384   StgArrWords_bytes(p)     = WDS(payload_words);
 385   StgArrWords_payload(p,0) = fptr;
 386   StgArrWords_payload(p,1) = ptr;
 387   StgArrWords_payload(p,2) = eptr;
 388   StgArrWords_payload(p,3) = flag;
 389
 390   // We don't care about the value here.
 391   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
 392
 393   StgWeak_key(w)        = key;
 394   StgWeak_value(w)      = val;
 395   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
 396   StgWeak_cfinalizer(w) = p;
 397
 398   ACQUIRE_LOCK(sm_mutex);
 399   StgWeak_link(w)   = W_[weak_ptr_list];
 400   W_[weak_ptr_list] = w;
 401   RELEASE_LOCK(sm_mutex);
 402
 403   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 404
 405   RET_P(w);
 406 }
 407
 408 stg_finalizzeWeakzh
 409 {
 410   /* R1 = weak ptr
 411    */
 412   W_ w, f, arr;
 413
 414   w = R1;
 415
 416   // already dead?
 417   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
 418       RET_NP(0,stg_NO_FINALIZER_closure);
 419   }
 420
 421   // kill it
 422 #ifdef PROFILING
 423   // @LDV profiling
 424   // A weak pointer is inherently used, so we do not need to call
 425   // LDV_recordDead_FILL_SLOP_DYNAMIC():
 426   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
 427   // or, LDV_recordDead():
 428   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
 429   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
 430   // large as weak pointers, so there is no need to fill the slop, either.
 431   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
 432 #endif
 433
 434   //
 435   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
 436   //
 437   SET_INFO(w,stg_DEAD_WEAK_info);
 438   LDV_RECORD_CREATE(w);
 439
 440   f   = StgWeak_finalizer(w);
 441   arr = StgWeak_cfinalizer(w);
 442
 443   StgDeadWeak_link(w) = StgWeak_link(w);
 444
 445   if (arr != stg_NO_FINALIZER_closure) {
 446     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
 447                               StgArrWords_payload(arr,1),
 448                               StgArrWords_payload(arr,2),
 449                               StgArrWords_payload(arr,3)) [];
 450   }
 451
 452   /* return the finalizer */
 453   if (f == stg_NO_FINALIZER_closure) {
 454       RET_NP(0,stg_NO_FINALIZER_closure);
 455   } else {
 456       RET_NP(1,f);
 457   }
 458 }
 459
 460 stg_deRefWeakzh
 461 {
 462   /* R1 = weak ptr */
 463   W_ w, code, val;
 464
 465   w = R1;
 466   if (GET_INFO(w) == stg_WEAK_info) {
 467     code = 1;
 468     val = StgWeak_value(w);
 469   } else {
 470     code = 0;
 471     val = w;
 472   }
 473   RET_NP(code,val);
 474 }
 475
 476 /* -----------------------------------------------------------------------------
 477    Floating point operations.
 478    -------------------------------------------------------------------------- */
 479
 480 stg_decodeFloatzuIntzh
 481 {
 482     W_ p;
 483     F_ arg;
 484     W_ mp_tmp1;
 485     W_ mp_tmp_w;
 486
 487     STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
 488
 489     mp_tmp1  = Sp - WDS(1);
 490     mp_tmp_w = Sp - WDS(2);
 491
 492     /* arguments: F1 = Float# */
 493     arg = F1;
 494
 495     /* Perform the operation */
 496     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
 497
 498     /* returns: (Int# (mantissa), Int# (exponent)) */
 499     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
 500 }
 501
 502 stg_decodeDoublezu2Intzh
 503 {
 504     D_ arg;
 505     W_ p;
 506     W_ mp_tmp1;
 507     W_ mp_tmp2;
 508     W_ mp_result1;
 509     W_ mp_result2;
 510
 511     STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
 512
 513     mp_tmp1    = Sp - WDS(1);
 514     mp_tmp2    = Sp - WDS(2);
 515     mp_result1 = Sp - WDS(3);
 516     mp_result2 = Sp - WDS(4);
 517
 518     /* arguments: D1 = Double# */
 519     arg = D1;
 520
 521     /* Perform the operation */
 522     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
 523                                     mp_result1 "ptr", mp_result2 "ptr",
 524                                     arg) [];
 525
 526     /* returns:
 527        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
 528     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
 529 }
 530
 531 /* -----------------------------------------------------------------------------
 532  * Concurrency primitives
 533  * -------------------------------------------------------------------------- */
 534
 535 stg_forkzh
 536 {
 537   /* args: R1 = closure to spark */
 538
 539   MAYBE_GC(R1_PTR, stg_forkzh);
 540
 541   W_ closure;
 542   W_ threadid;
 543   closure = R1;
 544
 545   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 546                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 547                                 closure "ptr") [];
 548
 549   /* start blocked if the current thread is blocked */
 550   StgTSO_flags(threadid) = %lobits16(
 551      TO_W_(StgTSO_flags(threadid)) |
 552      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 553
 554   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 555
 556   // context switch soon, but not immediately: we don't want every
 557   // forkIO to force a context-switch.
 558   Capability_context_switch(MyCapability()) = 1 :: CInt;
 559
 560   RET_P(threadid);
 561 }
 562
 563 stg_forkOnzh
 564 {
 565   /* args: R1 = cpu, R2 = closure to spark */
 566
 567   MAYBE_GC(R2_PTR, stg_forkOnzh);
 568
 569   W_ cpu;
 570   W_ closure;
 571   W_ threadid;
 572   cpu = R1;
 573   closure = R2;
 574
 575   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 576                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 577                                 closure "ptr") [];
 578
 579   /* start blocked if the current thread is blocked */
 580   StgTSO_flags(threadid) = %lobits16(
 581      TO_W_(StgTSO_flags(threadid)) |
 582      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 583
 584   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 585
 586   // context switch soon, but not immediately: we don't want every
 587   // forkIO to force a context-switch.
 588   Capability_context_switch(MyCapability()) = 1 :: CInt;
 589
 590   RET_P(threadid);
 591 }
 592
 593 stg_yieldzh
 594 {
 595   jump stg_yield_noregs;
 596 }
 597
 598 stg_myThreadIdzh
 599 {
 600   /* no args. */
 601   RET_P(CurrentTSO);
 602 }
 603
 604 stg_labelThreadzh
 605 {
 606   /* args:
 607         R1 = ThreadId#
 608         R2 = Addr# */
 609 #ifdef DEBUG
 610   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
 611 #endif
 612   jump %ENTRY_CODE(Sp(0));
 613 }
 614
 615 stg_isCurrentThreadBoundzh
 616 {
 617   /* no args */
 618   W_ r;
 619   (r) = foreign "C" isThreadBound(CurrentTSO) [];
 620   RET_N(r);
 621 }
 622
 623 stg_threadStatuszh
 624 {
 625     /* args: R1 :: ThreadId# */
 626     W_ tso;
 627     W_ why_blocked;
 628     W_ what_next;
 629     W_ ret;
 630
 631     tso = R1;
 632     loop:
 633       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
 634           tso = StgTSO__link(tso);
 635           goto loop;
 636       }
 637
 638     what_next   = TO_W_(StgTSO_what_next(tso));
 639     why_blocked = TO_W_(StgTSO_why_blocked(tso));
 640     // Note: these two reads are not atomic, so they might end up
 641     // being inconsistent.  It doesn't matter, since we
 642     // only return one or the other.  If we wanted to return the
 643     // contents of block_info too, then we'd have to do some synchronisation.
 644
 645     if (what_next == ThreadComplete) {
 646         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
 647     } else {
 648         if (what_next == ThreadKilled) {
 649             ret = 17;
 650         } else {
 651             ret = why_blocked;
 652         }
 653     }
 654     RET_N(ret);
 655 }
 656
 657 /* -----------------------------------------------------------------------------
 658  * TVar primitives
 659  * -------------------------------------------------------------------------- */
 660
 661 #define SP_OFF 0
 662
 663 // Catch retry frame ------------------------------------------------------------
 664
 665 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 666 #if defined(PROFILING)
 667   W_ unused1, W_ unused2,
 668 #endif
 669   W_ unused3, P_ unused4, P_ unused5)
 670 {
 671    W_ r, frame, trec, outer;
 672
 673    frame = Sp;
 674    trec = StgTSO_trec(CurrentTSO);
 675    outer  = StgTRecHeader_enclosing_trec(trec);
 676    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 677    if (r != 0) {
 678      /* Succeeded (either first branch or second branch) */
 679      StgTSO_trec(CurrentTSO) = outer;
 680      Sp = Sp + SIZEOF_StgCatchRetryFrame;
 681      jump %ENTRY_CODE(Sp(SP_OFF));
 682    } else {
 683      /* Did not commit: re-execute */
 684      W_ new_trec;
 685      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 686      StgTSO_trec(CurrentTSO) = new_trec;
 687      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 688        R1 = StgCatchRetryFrame_alt_code(frame);
 689      } else {
 690        R1 = StgCatchRetryFrame_first_code(frame);
 691      }
 692      jump stg_ap_v_fast;
 693    }
 694 }
 695
 696
 697 // Atomically frame ------------------------------------------------------------
 698
 699 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
 700 #if defined(PROFILING)
 701   W_ unused1, W_ unused2,
 702 #endif
 703   P_ code, P_ next_invariant_to_check, P_ result)
 704 {
 705   W_ frame, trec, valid, next_invariant, q, outer;
 706
 707   frame  = Sp;
 708   trec   = StgTSO_trec(CurrentTSO);
 709   result = R1;
 710   outer  = StgTRecHeader_enclosing_trec(trec);
 711
 712   if (outer == NO_TREC) {
 713     /* First time back at the atomically frame -- pick up invariants */
 714     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
 715     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 716     StgAtomicallyFrame_result(frame) = result;
 717
 718   } else {
 719     /* Second/subsequent time back at the atomically frame -- abort the
 720      * tx that's checking the invariant and move on to the next one */
 721     StgTSO_trec(CurrentTSO) = outer;
 722     q = StgAtomicallyFrame_next_invariant_to_check(frame);
 723     StgInvariantCheckQueue_my_execution(q) = trec;
 724     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 725     /* Don't free trec -- it's linked from q and will be stashed in the
 726      * invariant if we eventually commit. */
 727     q = StgInvariantCheckQueue_next_queue_entry(q);
 728     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 729     trec = outer;
 730   }
 731
 732   q = StgAtomicallyFrame_next_invariant_to_check(frame);
 733
 734   if (q != END_INVARIANT_CHECK_QUEUE) {
 735     /* We can't commit yet: another invariant to check */
 736     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
 737     StgTSO_trec(CurrentTSO) = trec;
 738
 739     next_invariant = StgInvariantCheckQueue_invariant(q);
 740     R1 = StgAtomicInvariant_code(next_invariant);
 741     jump stg_ap_v_fast;
 742
 743   } else {
 744
 745     /* We've got no more invariants to check, try to commit */
 746     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
 747     if (valid != 0) {
 748       /* Transaction was valid: commit succeeded */
 749       StgTSO_trec(CurrentTSO) = NO_TREC;
 750       R1 = StgAtomicallyFrame_result(frame);
 751       Sp = Sp + SIZEOF_StgAtomicallyFrame;
 752       jump %ENTRY_CODE(Sp(SP_OFF));
 753     } else {
 754       /* Transaction was not valid: try again */
 755       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 756       StgTSO_trec(CurrentTSO) = trec;
 757       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 758       R1 = StgAtomicallyFrame_code(frame);
 759       jump stg_ap_v_fast;
 760     }
 761   }
 762 }
 763
 764 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
 765 #if defined(PROFILING)
 766   W_ unused1, W_ unused2,
 767 #endif
 768   P_ code, P_ next_invariant_to_check, P_ result)
 769 {
 770   W_ frame, trec, valid;
 771
 772   frame = Sp;
 773
 774   /* The TSO is currently waiting: should we stop waiting? */
 775   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
 776   if (valid != 0) {
 777     /* Previous attempt is still valid: no point trying again yet */
 778     jump stg_block_noregs;
 779   } else {
 780     /* Previous attempt is no longer valid: try again */
 781     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 782     StgTSO_trec(CurrentTSO) = trec;
 783     StgHeader_info(frame) = stg_atomically_frame_info;
 784     R1 = StgAtomicallyFrame_code(frame);
 785     jump stg_ap_v_fast;
 786   }
 787 }
 788
 789 // STM catch frame --------------------------------------------------------------
 790
 791 #define SP_OFF 0
 792
 793 /* Catch frames are very similar to update frames, but when entering
 794  * one we just pop the frame off the stack and perform the correct
 795  * kind of return to the activation record underneath us on the stack.
 796  */
 797
 798 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
 799 #if defined(PROFILING)
 800   W_ unused1, W_ unused2,
 801 #endif
 802   P_ unused3, P_ unused4)
 803    {
 804       W_ r, frame, trec, outer;
 805       frame = Sp;
 806       trec = StgTSO_trec(CurrentTSO);
 807       outer  = StgTRecHeader_enclosing_trec(trec);
 808       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 809       if (r != 0) {
 810         /* Commit succeeded */
 811         StgTSO_trec(CurrentTSO) = outer;
 812         Sp = Sp + SIZEOF_StgCatchSTMFrame;
 813         jump Sp(SP_OFF);
 814       } else {
 815         /* Commit failed */
 816         W_ new_trec;
 817         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 818         StgTSO_trec(CurrentTSO) = new_trec;
 819         R1 = StgCatchSTMFrame_code(frame);
 820         jump stg_ap_v_fast;
 821       }
 822    }
 823
 824
 825 // Primop definition ------------------------------------------------------------
 826
 827 stg_atomicallyzh
 828 {
 829   W_ frame;
 830   W_ old_trec;
 831   W_ new_trec;
 832
 833   // stmStartTransaction may allocate
 834   MAYBE_GC (R1_PTR, stg_atomicallyzh);
 835
 836   /* Args: R1 = m :: STM a */
 837   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
 838
 839   old_trec = StgTSO_trec(CurrentTSO);
 840
 841   /* Nested transactions are not allowed; raise an exception */
 842   if (old_trec != NO_TREC) {
 843      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
 844      jump stg_raisezh;
 845   }
 846
 847   /* Set up the atomically frame */
 848   Sp = Sp - SIZEOF_StgAtomicallyFrame;
 849   frame = Sp;
 850
 851   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
 852   StgAtomicallyFrame_code(frame) = R1;
 853   StgAtomicallyFrame_result(frame) = NO_TREC;
 854   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 855
 856   /* Start the memory transcation */
 857   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
 858   StgTSO_trec(CurrentTSO) = new_trec;
 859
 860   /* Apply R1 to the realworld token */
 861   jump stg_ap_v_fast;
 862 }
 863
 864
 865 stg_catchSTMzh
 866 {
 867   W_ frame;
 868
 869   /* Args: R1 :: STM a */
 870   /* Args: R2 :: Exception -> STM a */
 871   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
 872
 873   /* Set up the catch frame */
 874   Sp = Sp - SIZEOF_StgCatchSTMFrame;
 875   frame = Sp;
 876
 877   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
 878   StgCatchSTMFrame_handler(frame) = R2;
 879   StgCatchSTMFrame_code(frame) = R1;
 880
 881   /* Start a nested transaction to run the body of the try block in */
 882   W_ cur_trec;
 883   W_ new_trec;
 884   cur_trec = StgTSO_trec(CurrentTSO);
 885   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
 886   StgTSO_trec(CurrentTSO) = new_trec;
 887
 888   /* Apply R1 to the realworld token */
 889   jump stg_ap_v_fast;
 890 }
 891
 892
 893 stg_catchRetryzh
 894 {
 895   W_ frame;
 896   W_ new_trec;
 897   W_ trec;
 898
 899   // stmStartTransaction may allocate
 900   MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
 901
 902   /* Args: R1 :: STM a */
 903   /* Args: R2 :: STM a */
 904   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
 905
 906   /* Start a nested transaction within which to run the first code */
 907   trec = StgTSO_trec(CurrentTSO);
 908   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
 909   StgTSO_trec(CurrentTSO) = new_trec;
 910
 911   /* Set up the catch-retry frame */
 912   Sp = Sp - SIZEOF_StgCatchRetryFrame;
 913   frame = Sp;
 914
 915   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
 916   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
 917   StgCatchRetryFrame_first_code(frame) = R1;
 918   StgCatchRetryFrame_alt_code(frame) = R2;
 919
 920   /* Apply R1 to the realworld token */
 921   jump stg_ap_v_fast;
 922 }
 923
 924
 925 stg_retryzh
 926 {
 927   W_ frame_type;
 928   W_ frame;
 929   W_ trec;
 930   W_ outer;
 931   W_ r;
 932
 933   MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
 934
 935   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 936 retry_pop_stack:
 937   StgTSO_sp(CurrentTSO) = Sp;
 938   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
 939   Sp = StgTSO_sp(CurrentTSO);
 940   frame = Sp;
 941   trec = StgTSO_trec(CurrentTSO);
 942   outer  = StgTRecHeader_enclosing_trec(trec);
 943
 944   if (frame_type == CATCH_RETRY_FRAME) {
 945     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
 946     ASSERT(outer != NO_TREC);
 947     // Abort the transaction attempting the current branch
 948     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 949     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 950     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 951       // Retry in the first branch: try the alternative
 952       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 953       StgTSO_trec(CurrentTSO) = trec;
 954       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
 955       R1 = StgCatchRetryFrame_alt_code(frame);
 956       jump stg_ap_v_fast;
 957     } else {
 958       // Retry in the alternative code: propagate the retry
 959       StgTSO_trec(CurrentTSO) = outer;
 960       Sp = Sp + SIZEOF_StgCatchRetryFrame;
 961       goto retry_pop_stack;
 962     }
 963   }
 964
 965   // We've reached the ATOMICALLY_FRAME: attempt to wait
 966   ASSERT(frame_type == ATOMICALLY_FRAME);
 967   if (outer != NO_TREC) {
 968     // We called retry while checking invariants, so abort the current
 969     // invariant check (merging its TVar accesses into the parents read
 970     // set so we'll wait on them)
 971     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 972     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 973     trec = outer;
 974     StgTSO_trec(CurrentTSO) = trec;
 975     outer  = StgTRecHeader_enclosing_trec(trec);
 976   }
 977   ASSERT(outer == NO_TREC);
 978
 979   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
 980   if (r != 0) {
 981     // Transaction was valid: stmWait put us on the TVars' queues, we now block
 982     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
 983     Sp = frame;
 984     // Fix up the stack in the unregisterised case: the return convention is different.
 985     R3 = trec; // passing to stmWaitUnblock()
 986     jump stg_block_stmwait;
 987   } else {
 988     // Transaction was not valid: retry immediately
 989     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 990     StgTSO_trec(CurrentTSO) = trec;
 991     R1 = StgAtomicallyFrame_code(frame);
 992     Sp = frame;
 993     jump stg_ap_v_fast;
 994   }
 995 }
 996
 997
 998 stg_checkzh
 999 {
1000   W_ trec, closure;
1001
1002   /* Args: R1 = invariant closure */
1003   MAYBE_GC (R1_PTR, stg_checkzh);
1004
1005   trec = StgTSO_trec(CurrentTSO);
1006   closure = R1;
1007   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
1008                                      trec "ptr",
1009                                      closure "ptr") [];
1010
1011   jump %ENTRY_CODE(Sp(0));
1012 }
1013
1014
1015 stg_newTVarzh
1016 {
1017   W_ tv;
1018   W_ new_value;
1019
1020   /* Args: R1 = initialisation value */
1021
1022   MAYBE_GC (R1_PTR, stg_newTVarzh);
1023   new_value = R1;
1024   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1025   RET_P(tv);
1026 }
1027
1028
1029 stg_readTVarzh
1030 {
1031   W_ trec;
1032   W_ tvar;
1033   W_ result;
1034
1035   /* Args: R1 = TVar closure */
1036
1037   MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
1038   trec = StgTSO_trec(CurrentTSO);
1039   tvar = R1;
1040   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1041
1042   RET_P(result);
1043 }
1044
1045 stg_readTVarIOzh
1046 {
1047     W_ result;
1048
1049 again:
1050     result = StgTVar_current_value(R1);
1051     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1052         goto again;
1053     }
1054     RET_P(result);
1055 }
1056
1057 stg_writeTVarzh
1058 {
1059   W_ trec;
1060   W_ tvar;
1061   W_ new_value;
1062
1063   /* Args: R1 = TVar closure */
1064   /*       R2 = New value    */
1065
1066   MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
1067   trec = StgTSO_trec(CurrentTSO);
1068   tvar = R1;
1069   new_value = R2;
1070   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1071
1072   jump %ENTRY_CODE(Sp(0));
1073 }
1074
1075
1076 /* -----------------------------------------------------------------------------
1077  * MVar primitives
1078  *
1079  * take & putMVar work as follows.  Firstly, an important invariant:
1080  *
1081  *    If the MVar is full, then the blocking queue contains only
1082  *    threads blocked on putMVar, and if the MVar is empty then the
1083  *    blocking queue contains only threads blocked on takeMVar.
1084  *
1085  * takeMvar:
1086  *    MVar empty : then add ourselves to the blocking queue
1087  *    MVar full  : remove the value from the MVar, and
1088  *                 blocking queue empty     : return
1089  *                 blocking queue non-empty : perform the first blocked putMVar
1090  *                                            from the queue, and wake up the
1091  *                                            thread (MVar is now full again)
1092  *
1093  * putMVar is just the dual of the above algorithm.
1094  *
1095  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1096  * the stack of the thread waiting to do the putMVar.  See
1097  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1098  * the stack layout, and the PerformPut and PerformTake macros below.
1099  *
1100  * It is important that a blocked take or put is woken up with the
1101  * take/put already performed, because otherwise there would be a
1102  * small window of vulnerability where the thread could receive an
1103  * exception and never perform its take or put, and we'd end up with a
1104  * deadlock.
1105  *
1106  * -------------------------------------------------------------------------- */
1107
1108 stg_isEmptyMVarzh
1109 {
1110     /* args: R1 = MVar closure */
1111
1112     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1113         RET_N(1);
1114     } else {
1115         RET_N(0);
1116     }
1117 }
1118
1119 stg_newMVarzh
1120 {
1121     /* args: none */
1122     W_ mvar;
1123
1124     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
1125
1126     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1127     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1128         // MVARs start dirty: generation 0 has no mutable list
1129     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1130     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1131     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1132     RET_P(mvar);
1133 }
1134
1135
1136 #define PerformTake(tso, value)                         \
1137     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1138     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1139
1140 #define PerformPut(tso,lval)                    \
1141     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1142     lval = W_[StgTSO_sp(tso) - WDS(1)];
1143
1144 stg_takeMVarzh
1145 {
1146     W_ mvar, val, info, tso, q;
1147
1148     /* args: R1 = MVar closure */
1149     mvar = R1;
1150
1151 #if defined(THREADED_RTS)
1152     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1153 #else
1154     info = GET_INFO(mvar);
1155 #endif
1156
1157     if (info == stg_MVAR_CLEAN_info) {
1158         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1159     }
1160
1161     /* If the MVar is empty, put ourselves on its blocking queue,
1162      * and wait until we're woken up.
1163      */
1164     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1165
1166         // Note [mvar-heap-check] We want to do the heap check in the
1167         // branch here, to avoid the conditional in the common case.
1168         // However, we've already locked the MVar above, so we better
1169         // be careful to unlock it again if the the heap check fails.
1170         // Unfortunately we don't have an easy way to inject any code
1171         // into the heap check generated by the code generator, so we
1172         // have to do it in stg_gc_gen (see HeapStackCheck.cmm).
1173         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
1174
1175         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1176
1177         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1178         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1179         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1180
1181         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1182             StgMVar_head(mvar) = q;
1183         } else {
1184             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1185             foreign "C" recordClosureMutated(MyCapability() "ptr",
1186                                              StgMVar_tail(mvar)) [];
1187         }
1188         StgTSO__link(CurrentTSO)       = q;
1189         StgTSO_block_info(CurrentTSO)  = mvar;
1190         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1191         StgMVar_tail(mvar)             = q;
1192
1193         R1 = mvar;
1194         jump stg_block_takemvar;
1195     }
1196
1197     /* we got the value... */
1198     val = StgMVar_value(mvar);
1199
1200     q = StgMVar_head(mvar);
1201 loop:
1202     if (q == stg_END_TSO_QUEUE_closure) {
1203         /* No further putMVars, MVar is now empty */
1204         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1205         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1206         RET_P(val);
1207     }
1208     if (StgHeader_info(q) == stg_IND_info ||
1209         StgHeader_info(q) == stg_MSG_NULL_info) {
1210         q = StgInd_indirectee(q);
1211         goto loop;
1212     }
1213
1214     // There are putMVar(s) waiting... wake up the first thread on the queue
1215
1216     tso = StgMVarTSOQueue_tso(q);
1217     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1218     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1219         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1220     }
1221
1222 loop2:
1223     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1224         tso = StgTSO__link(tso);
1225         goto loop2;
1226     }
1227
1228     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1229     ASSERT(StgTSO_block_info(tso) == mvar);
1230
1231     // actually perform the putMVar for the thread that we just woke up
1232     PerformPut(tso,StgMVar_value(mvar));
1233
1234     // indicate that the MVar operation has now completed.
1235     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1236
1237     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1238
1239     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1240
1241     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1242     RET_P(val);
1243 }
1244
1245
1246 stg_tryTakeMVarzh
1247 {
1248     W_ mvar, val, info, tso, q;
1249
1250     /* args: R1 = MVar closure */
1251     mvar = R1;
1252
1253 #if defined(THREADED_RTS)
1254     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1255 #else
1256     info = GET_INFO(mvar);
1257 #endif
1258
1259     /* If the MVar is empty, put ourselves on its blocking queue,
1260      * and wait until we're woken up.
1261      */
1262     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1263 #if defined(THREADED_RTS)
1264         unlockClosure(mvar, info);
1265 #endif
1266         /* HACK: we need a pointer to pass back,
1267          * so we abuse NO_FINALIZER_closure
1268          */
1269         RET_NP(0, stg_NO_FINALIZER_closure);
1270     }
1271
1272     if (info == stg_MVAR_CLEAN_info) {
1273         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1274     }
1275
1276     /* we got the value... */
1277     val = StgMVar_value(mvar);
1278
1279     q = StgMVar_head(mvar);
1280 loop:
1281     if (q == stg_END_TSO_QUEUE_closure) {
1282         /* No further putMVars, MVar is now empty */
1283         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1284         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1285         RET_NP(1, val);
1286     }
1287     if (StgHeader_info(q) == stg_IND_info ||
1288         StgHeader_info(q) == stg_MSG_NULL_info) {
1289         q = StgInd_indirectee(q);
1290         goto loop;
1291     }
1292
1293     // There are putMVar(s) waiting... wake up the first thread on the queue
1294
1295     tso = StgMVarTSOQueue_tso(q);
1296     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1297     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1298         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1299     }
1300
1301 loop2:
1302     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1303         tso = StgTSO__link(tso);
1304         goto loop2;
1305     }
1306
1307     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1308     ASSERT(StgTSO_block_info(tso) == mvar);
1309
1310     // actually perform the putMVar for the thread that we just woke up
1311     PerformPut(tso,StgMVar_value(mvar));
1312
1313     // indicate that the MVar operation has now completed.
1314     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1315
1316     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1317
1318     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1319
1320     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1321     RET_P(val);
1322 }
1323
1324
1325 stg_putMVarzh
1326 {
1327     W_ mvar, val, info, tso, q;
1328
1329     /* args: R1 = MVar, R2 = value */
1330     mvar = R1;
1331     val  = R2;
1332
1333 #if defined(THREADED_RTS)
1334     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1335 #else
1336     info = GET_INFO(mvar);
1337 #endif
1338
1339     if (info == stg_MVAR_CLEAN_info) {
1340         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1341     }
1342
1343     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1344
1345         // see Note [mvar-heap-check] above
1346         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
1347
1348         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1349
1350         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
1351         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1352         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1353
1354         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1355             StgMVar_head(mvar) = q;
1356         } else {
1357             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1358             foreign "C" recordClosureMutated(MyCapability() "ptr",
1359                                              StgMVar_tail(mvar)) [];
1360         }
1361         StgTSO__link(CurrentTSO)       = q;
1362         StgTSO_block_info(CurrentTSO)  = mvar;
1363         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1364         StgMVar_tail(mvar)             = q;
1365
1366         R1 = mvar;
1367         R2 = val;
1368         jump stg_block_putmvar;
1369     }
1370
1371     q = StgMVar_head(mvar);
1372 loop:
1373     if (q == stg_END_TSO_QUEUE_closure) {
1374         /* No further takes, the MVar is now full. */
1375         StgMVar_value(mvar) = val;
1376         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1377         jump %ENTRY_CODE(Sp(0));
1378     }
1379     if (StgHeader_info(q) == stg_IND_info ||
1380         StgHeader_info(q) == stg_MSG_NULL_info) {
1381         q = StgInd_indirectee(q);
1382         goto loop;
1383     }
1384
1385     // There are takeMVar(s) waiting: wake up the first one
1386
1387     tso = StgMVarTSOQueue_tso(q);
1388     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1389     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1390         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1391     }
1392
1393 loop2:
1394     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1395         tso = StgTSO__link(tso);
1396         goto loop2;
1397     }
1398
1399     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1400     ASSERT(StgTSO_block_info(tso) == mvar);
1401
1402     // actually perform the takeMVar
1403     PerformTake(tso, val);
1404
1405     // indicate that the MVar operation has now completed.
1406     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1407
1408     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1409         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1410     }
1411
1412     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1413
1414     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1415     jump %ENTRY_CODE(Sp(0));
1416 }
1417
1418
1419 stg_tryPutMVarzh
1420 {
1421     W_ mvar, val, info, tso, q;
1422
1423     /* args: R1 = MVar, R2 = value */
1424     mvar = R1;
1425     val  = R2;
1426
1427 #if defined(THREADED_RTS)
1428     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1429 #else
1430     info = GET_INFO(mvar);
1431 #endif
1432
1433     if (info == stg_MVAR_CLEAN_info) {
1434         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1435     }
1436
1437     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1438 #if defined(THREADED_RTS)
1439         unlockClosure(mvar, info);
1440 #endif
1441         RET_N(0);
1442     }
1443
1444     q = StgMVar_head(mvar);
1445 loop:
1446     if (q == stg_END_TSO_QUEUE_closure) {
1447         /* No further takes, the MVar is now full. */
1448         StgMVar_value(mvar) = val;
1449         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1450         jump %ENTRY_CODE(Sp(0));
1451     }
1452     if (StgHeader_info(q) == stg_IND_info ||
1453         StgHeader_info(q) == stg_MSG_NULL_info) {
1454         q = StgInd_indirectee(q);
1455         goto loop;
1456     }
1457
1458     // There are takeMVar(s) waiting: wake up the first one
1459
1460     tso = StgMVarTSOQueue_tso(q);
1461     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1462     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1463         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1464     }
1465
1466 loop2:
1467     if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1468         tso = StgTSO__link(tso);
1469         goto loop2;
1470     }
1471
1472     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1473     ASSERT(StgTSO_block_info(tso) == mvar);
1474
1475     // actually perform the takeMVar
1476     PerformTake(tso, val);
1477
1478     // indicate that the MVar operation has now completed.
1479     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1480
1481     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1482         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1483     }
1484
1485     foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
1486
1487     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1488     jump %ENTRY_CODE(Sp(0));
1489 }
1490
1491
1492 /* -----------------------------------------------------------------------------
1493    Stable pointer primitives
1494    -------------------------------------------------------------------------  */
1495
1496 stg_makeStableNamezh
1497 {
1498     W_ index, sn_obj;
1499
1500     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
1501
1502     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1503
1504     /* Is there already a StableName for this heap object?
1505      *  stable_ptr_table is a pointer to an array of snEntry structs.
1506      */
1507     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1508         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1509         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1510         StgStableName_sn(sn_obj) = index;
1511         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1512     } else {
1513         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1514     }
1515
1516     RET_P(sn_obj);
1517 }
1518
1519
1520 stg_makeStablePtrzh
1521 {
1522     /* Args: R1 = a */
1523     W_ sp;
1524     MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
1525     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1526     RET_N(sp);
1527 }
1528
1529 stg_deRefStablePtrzh
1530 {
1531     /* Args: R1 = the stable ptr */
1532     W_ r, sp;
1533     sp = R1;
1534     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1535     RET_P(r);
1536 }
1537
1538 /* -----------------------------------------------------------------------------
1539    Bytecode object primitives
1540    -------------------------------------------------------------------------  */
1541
1542 stg_newBCOzh
1543 {
1544     /* R1 = instrs
1545        R2 = literals
1546        R3 = ptrs
1547        R4 = arity
1548        R5 = bitmap array
1549     */
1550     W_ bco, bitmap_arr, bytes, words;
1551
1552     bitmap_arr = R5;
1553
1554     words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
1555     bytes = WDS(words);
1556
1557     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
1558
1559     bco = Hp - bytes + WDS(1);
1560     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1561
1562     StgBCO_instrs(bco)     = R1;
1563     StgBCO_literals(bco)   = R2;
1564     StgBCO_ptrs(bco)       = R3;
1565     StgBCO_arity(bco)      = HALF_W_(R4);
1566     StgBCO_size(bco)       = HALF_W_(words);
1567
1568     // Copy the arity/bitmap info into the BCO
1569     W_ i;
1570     i = 0;
1571 for:
1572     if (i < BYTE_ARR_WDS(bitmap_arr)) {
1573         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1574         i = i + 1;
1575         goto for;
1576     }
1577
1578     RET_P(bco);
1579 }
1580
1581
1582 stg_mkApUpd0zh
1583 {
1584     // R1 = the BCO# for the AP
1585     //
1586     W_ ap;
1587
1588     // This function is *only* used to wrap zero-arity BCOs in an
1589     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1590     // saturated and always points directly to a FUN or BCO.
1591     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1592            StgBCO_arity(R1) == HALF_W_(0));
1593
1594     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
1595     TICK_ALLOC_UP_THK(0, 0);
1596     CCCS_ALLOC(SIZEOF_StgAP);
1597
1598     ap = Hp - SIZEOF_StgAP + WDS(1);
1599     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1600
1601     StgAP_n_args(ap) = HALF_W_(0);
1602     StgAP_fun(ap) = R1;
1603
1604     RET_P(ap);
1605 }
1606
1607 stg_unpackClosurezh
1608 {
1609 /* args: R1 = closure to analyze */
1610 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1611
1612     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1613     info  = %GET_STD_INFO(UNTAG(R1));
1614
1615     // Some closures have non-standard layout, so we omit those here.
1616     W_ type;
1617     type = TO_W_(%INFO_TYPE(info));
1618     switch [0 .. N_CLOSURE_TYPES] type {
1619     case THUNK_SELECTOR : {
1620         ptrs = 1;
1621         nptrs = 0;
1622         goto out;
1623     }
1624     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
1625          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1626         ptrs = 0;
1627         nptrs = 0;
1628         goto out;
1629     }
1630     default: {
1631         ptrs  = TO_W_(%INFO_PTRS(info));
1632         nptrs = TO_W_(%INFO_NPTRS(info));
1633         goto out;
1634     }}
1635 out:
1636
1637     W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
1638     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1639     ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
1640     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
1641
1642     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
1643
1644     W_ clos;
1645     clos = UNTAG(R1);
1646
1647     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1648     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1649
1650     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1651     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1652     StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
1653
1654     p = 0;
1655 for:
1656     if(p < ptrs) {
1657          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1658          p = p + 1;
1659          goto for;
1660     }
1661     /* We can leave the card table uninitialised, since the array is
1662        allocated in the nursery.  The GC will fill it in if/when the array
1663        is promoted. */
1664
1665     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1666     StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
1667     p = 0;
1668 for2:
1669     if(p < nptrs) {
1670          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1671          p = p + 1;
1672          goto for2;
1673     }
1674     RET_NPP(info, ptrs_arr, nptrs_arr);
1675 }
1676
1677 /* -----------------------------------------------------------------------------
1678    Thread I/O blocking primitives
1679    -------------------------------------------------------------------------- */
1680
1681 /* Add a thread to the end of the blocked queue. (C-- version of the C
1682  * macro in Schedule.h).
1683  */
1684 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1685     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
1686     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1687       W_[blocked_queue_hd] = tso;                       \
1688     } else {                                            \
1689       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
1690     }                                                   \
1691     W_[blocked_queue_tl] = tso;
1692
1693 stg_waitReadzh
1694 {
1695     /* args: R1 */
1696 #ifdef THREADED_RTS
1697     foreign "C" barf("waitRead# on threaded RTS") never returns;
1698 #else
1699
1700     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1701     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1702     StgTSO_block_info(CurrentTSO) = R1;
1703     // No locking - we're not going to use this interface in the
1704     // threaded RTS anyway.
1705     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1706     jump stg_block_noregs;
1707 #endif
1708 }
1709
1710 stg_waitWritezh
1711 {
1712     /* args: R1 */
1713 #ifdef THREADED_RTS
1714     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1715 #else
1716
1717     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1718     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1719     StgTSO_block_info(CurrentTSO) = R1;
1720     // No locking - we're not going to use this interface in the
1721     // threaded RTS anyway.
1722     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1723     jump stg_block_noregs;
1724 #endif
1725 }
1726
1727
1728 STRING(stg_delayzh_malloc_str, "stg_delayzh")
1729 stg_delayzh
1730 {
1731 #ifdef mingw32_HOST_OS
1732     W_ ares;
1733     CInt reqID;
1734 #else
1735     W_ t, prev, target;
1736 #endif
1737
1738 #ifdef THREADED_RTS
1739     foreign "C" barf("delay# on threaded RTS") never returns;
1740 #else
1741
1742     /* args: R1 (microsecond delay amount) */
1743     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1744     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1745
1746 #ifdef mingw32_HOST_OS
1747
1748     /* could probably allocate this on the heap instead */
1749     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1750                                             stg_delayzh_malloc_str);
1751     (reqID) = foreign "C" addDelayRequest(R1);
1752     StgAsyncIOResult_reqID(ares)   = reqID;
1753     StgAsyncIOResult_len(ares)     = 0;
1754     StgAsyncIOResult_errCode(ares) = 0;
1755     StgTSO_block_info(CurrentTSO)  = ares;
1756
1757     /* Having all async-blocked threads reside on the blocked_queue
1758      * simplifies matters, so change the status to OnDoProc put the
1759      * delayed thread on the blocked_queue.
1760      */
1761     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1762     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1763     jump stg_block_async_void;
1764
1765 #else
1766
1767     W_ time;
1768     W_ divisor;
1769     (time) = foreign "C" getourtimeofday() [R1];
1770     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
1771     if (divisor == 0) {
1772         divisor = 50;
1773     }
1774     divisor = divisor * 1000;
1775     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
1776            + time + 1; /* Add 1 as getourtimeofday rounds down */
1777     StgTSO_block_info(CurrentTSO) = target;
1778
1779     /* Insert the new thread in the sleeping queue. */
1780     prev = NULL;
1781     t = W_[sleeping_queue];
1782 while:
1783     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1784         prev = t;
1785         t = StgTSO__link(t);
1786         goto while;
1787     }
1788
1789     StgTSO__link(CurrentTSO) = t;
1790     if (prev == NULL) {
1791         W_[sleeping_queue] = CurrentTSO;
1792     } else {
1793         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
1794     }
1795     jump stg_block_noregs;
1796 #endif
1797 #endif /* !THREADED_RTS */
1798 }
1799
1800
1801 #ifdef mingw32_HOST_OS
1802 STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
1803 stg_asyncReadzh
1804 {
1805     W_ ares;
1806     CInt reqID;
1807
1808 #ifdef THREADED_RTS
1809     foreign "C" barf("asyncRead# on threaded RTS") never returns;
1810 #else
1811
1812     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1813     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1814     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1815
1816     /* could probably allocate this on the heap instead */
1817     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1818                                             stg_asyncReadzh_malloc_str)
1819                         [R1,R2,R3,R4];
1820     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
1821     StgAsyncIOResult_reqID(ares)   = reqID;
1822     StgAsyncIOResult_len(ares)     = 0;
1823     StgAsyncIOResult_errCode(ares) = 0;
1824     StgTSO_block_info(CurrentTSO)  = ares;
1825     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1826     jump stg_block_async;
1827 #endif
1828 }
1829
1830 STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
1831 stg_asyncWritezh
1832 {
1833     W_ ares;
1834     CInt reqID;
1835
1836 #ifdef THREADED_RTS
1837     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
1838 #else
1839
1840     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1841     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1842     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1843
1844     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1845                                             stg_asyncWritezh_malloc_str)
1846                         [R1,R2,R3,R4];
1847     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
1848
1849     StgAsyncIOResult_reqID(ares)   = reqID;
1850     StgAsyncIOResult_len(ares)     = 0;
1851     StgAsyncIOResult_errCode(ares) = 0;
1852     StgTSO_block_info(CurrentTSO)  = ares;
1853     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1854     jump stg_block_async;
1855 #endif
1856 }
1857
1858 STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
1859 stg_asyncDoProczh
1860 {
1861     W_ ares;
1862     CInt reqID;
1863
1864 #ifdef THREADED_RTS
1865     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
1866 #else
1867
1868     /* args: R1 = proc, R2 = param */
1869     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1870     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1871
1872     /* could probably allocate this on the heap instead */
1873     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1874                                             stg_asyncDoProczh_malloc_str)
1875                                 [R1,R2];
1876     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
1877     StgAsyncIOResult_reqID(ares)   = reqID;
1878     StgAsyncIOResult_len(ares)     = 0;
1879     StgAsyncIOResult_errCode(ares) = 0;
1880     StgTSO_block_info(CurrentTSO) = ares;
1881     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1882     jump stg_block_async;
1883 #endif
1884 }
1885 #endif
1886
1887 /* -----------------------------------------------------------------------------
1888  * noDuplicate#
1889  *
1890  * noDuplicate# tries to ensure that none of the thunks under
1891  * evaluation by the current thread are also under evaluation by
1892  * another thread.  It relies on *both* threads doing noDuplicate#;
1893  * the second one will get blocked if they are duplicating some work.
1894  *
1895  * The idea is that noDuplicate# is used within unsafePerformIO to
1896  * ensure that the IO operation is performed at most once.
1897  * noDuplicate# calls threadPaused which acquires an exclusive lock on
1898  * all the thunks currently under evaluation by the current thread.
1899  *
1900  * Consider the following scenario.  There is a thunk A, whose
1901  * evaluation requires evaluating thunk B, where thunk B is an
1902  * unsafePerformIO.  Two threads, 1 and 2, bother enter A.  Thread 2
1903  * is pre-empted before it enters B, and claims A by blackholing it
1904  * (in threadPaused).  Thread 1 now enters B, and calls noDuplicate#.
1905  *
1906  *      thread 1                      thread 2
1907  *   +-----------+                 +---------------+
1908  *   |    -------+-----> A <-------+-------        |
1909  *   |  update   |   BLACKHOLE     | marked_update |
1910  *   +-----------+                 +---------------+
1911  *   |           |                 |               |
1912  *        ...                             ...
1913  *   |           |                 +---------------+
1914  *   +-----------+
1915  *   |     ------+-----> B
1916  *   |  update   |   BLACKHOLE
1917  *   +-----------+
1918  *
1919  * At this point: A is a blackhole, owned by thread 2.  noDuplicate#
1920  * calls threadPaused, which walks up the stack and
1921  *  - claims B on behalf of thread 1
1922  *  - then it reaches the update frame for A, which it sees is already
1923  *    a BLACKHOLE and is therefore owned by another thread.  Since
1924  *    thread 1 is duplicating work, the computation up to the update
1925  *    frame for A is suspended, including thunk B.
1926  *  - thunk B, which is an unsafePerformIO, has now been reverted to
1927  *    an AP_STACK which could be duplicated - BAD!
1928  *  - The solution is as follows: before calling threadPaused, we
1929  *    leave a frame on the stack (stg_noDuplicate_info) that will call
1930  *    noDuplicate# again if the current computation is suspended and
1931  *    restarted.
1932  *
1933  * See the test program in concurrent/prog003 for a way to demonstrate
1934  * this.  It needs to be run with +RTS -N3 or greater, and the bug
1935  * only manifests occasionally (once very 10 runs or so).
1936  * -------------------------------------------------------------------------- */
1937
1938 INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
1939 {
1940     Sp_adj(1);
1941     jump stg_noDuplicatezh;
1942 }
1943
1944 stg_noDuplicatezh
1945 {
1946     STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
1947     // leave noDuplicate frame in case the current
1948     // computation is suspended and restarted (see above).
1949     Sp_adj(-1);
1950     Sp(0) = stg_noDuplicate_info;
1951
1952     SAVE_THREAD_STATE();
1953     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1954     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
1955
1956     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
1957         jump stg_threadFinished;
1958     } else {
1959         LOAD_THREAD_STATE();
1960         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1961         // remove the stg_noDuplicate frame if it is still there.
1962         if (Sp(0) == stg_noDuplicate_info) {
1963             Sp_adj(1);
1964         }
1965         jump %ENTRY_CODE(Sp(0));
1966     }
1967 }
1968
1969 /* -----------------------------------------------------------------------------
1970    Misc. primitives
1971    -------------------------------------------------------------------------- */
1972
1973 stg_getApStackValzh
1974 {
1975    W_ ap_stack, offset, val, ok;
1976
1977    /* args: R1 = AP_STACK, R2 = offset */
1978    ap_stack = R1;
1979    offset   = R2;
1980
1981    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
1982         ok = 1;
1983         val = StgAP_STACK_payload(ap_stack,offset);
1984    } else {
1985         ok = 0;
1986         val = R1;
1987    }
1988    RET_NP(ok,val);
1989 }
1990
1991 // Write the cost center stack of the first argument on stderr; return
1992 // the second.  Possibly only makes sense for already evaluated
1993 // things?
1994 stg_traceCcszh
1995 {
1996     W_ ccs;
1997
1998 #ifdef PROFILING
1999     ccs = StgHeader_ccs(UNTAG(R1));
2000     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
2001 #endif
2002
2003     R1 = R2;
2004     ENTER();
2005 }
2006
2007 stg_getSparkzh
2008 {
2009    W_ spark;
2010
2011 #ifndef THREADED_RTS
2012    RET_NP(0,ghczmprim_GHCziBool_False_closure);
2013 #else
2014    (spark) = foreign "C" findSpark(MyCapability());
2015    if (spark != 0) {
2016       RET_NP(1,spark);
2017    } else {
2018       RET_NP(0,ghczmprim_GHCziBool_False_closure);
2019    }
2020 #endif
2021 }
2022
2023 stg_numSparkszh
2024 {
2025   W_ n;
2026 #ifdef THREADED_RTS
2027   (n) = foreign "C" dequeElements(Capability_sparks(MyCapability()));
2028 #else
2029   n = 0;
2030 #endif
2031   RET_N(n);
2032 }
2033
2034 stg_traceEventzh
2035 {
2036    W_ msg;
2037    msg = R1;
2038
2039 #if defined(TRACING) || defined(DEBUG)
2040
2041    foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
2042
2043 #elif defined(DTRACE)
2044
2045    W_ enabled;
2046
2047    // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
2048    // RtsProbes.h, but that header file includes unistd.h, which doesn't
2049    // work in Cmm
2050    (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
2051    if (enabled != 0) {
2052      foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
2053    }
2054
2055 #endif
2056    jump %ENTRY_CODE(Sp(0));
2057 }