rts/PrimOps.cmm

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Out-of-line primitive operations
   6  *
   7  * This file contains the implementations of all the primitive
   8  * operations ("primops") which are not expanded inline.  See
   9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
  10  * this file contains code for most of those with the attribute
  11  * out_of_line=True.
  12  *
  13  * Entry convention: the entry convention for a primop is that all the
  14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
  15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
  16  *
  17  * Return convention: results from a primop are generally returned
  18  * using the ordinary unboxed tuple return convention.  The C-- parser
  19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
  20  * based on the prevailing return convention.
  21  *
  22  * This file is written in a subset of C--, extended with various
  23  * features specific to GHC.  It is compiled by GHC directly.  For the
  24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
  25  *
  26  * ---------------------------------------------------------------------------*/
  27
  28 #include "Cmm.h"
  29
  30 #ifdef __PIC__
  31 import pthread_mutex_lock;
  32 import pthread_mutex_unlock;
  33 #endif
  34 import base_ControlziExceptionziBase_nestedAtomically_closure;
  35 import EnterCriticalSection;
  36 import LeaveCriticalSection;
  37 import ghczmprim_GHCziBool_False_closure;
  38 #if !defined(mingw32_HOST_OS)
  39 import sm_mutex;
  40 #endif
  41
  42 /*-----------------------------------------------------------------------------
  43   Array Primitives
  44
  45   Basically just new*Array - the others are all inline macros.
  46
  47   The size arg is always passed in R1, and the result returned in R1.
  48
  49   The slow entry point is for returning from a heap check, the saved
  50   size argument must be re-loaded from the stack.
  51   -------------------------------------------------------------------------- */
  52
  53 /* for objects that are *less* than the size of a word, make sure we
  54  * round up to the nearest word for the size of the array.
  55  */
  56
  57 stg_newByteArrayzh
  58 {
  59     W_ words, payload_words, n, p;
  60     MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
  61     n = R1;
  62     payload_words = ROUNDUP_BYTES_TO_WDS(n);
  63     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
  64     ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
  65     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  66     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  67     StgArrWords_words(p) = payload_words;
  68     RET_P(p);
  69 }
  70
  71 #define BA_ALIGN 16
  72 #define BA_MASK  (BA_ALIGN-1)
  73
  74 stg_newPinnedByteArrayzh
  75 {
  76     W_ words, bytes, payload_words, p;
  77
  78     MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
  79     bytes = R1;
  80     /* payload_words is what we will tell the profiler we had to allocate */
  81     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
  82     /* When we actually allocate memory, we need to allow space for the
  83        header: */
  84     bytes = bytes + SIZEOF_StgArrWords;
  85     /* And we want to align to BA_ALIGN bytes, so we need to allow space
  86        to shift up to BA_ALIGN - 1 bytes: */
  87     bytes = bytes + BA_ALIGN - 1;
  88     /* Now we convert to a number of words: */
  89     words = ROUNDUP_BYTES_TO_WDS(bytes);
  90
  91     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
  92     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  93
  94     /* Now we need to move p forward so that the payload is aligned
  95        to BA_ALIGN bytes: */
  96     p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
  97
  98     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
  99     StgArrWords_words(p) = payload_words;
 100     RET_P(p);
 101 }
 102
 103 stg_newAlignedPinnedByteArrayzh
 104 {
 105     W_ words, bytes, payload_words, p, alignment;
 106
 107     MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
 108     bytes = R1;
 109     alignment = R2;
 110
 111     /* payload_words is what we will tell the profiler we had to allocate */
 112     payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
 113
 114     /* When we actually allocate memory, we need to allow space for the
 115        header: */
 116     bytes = bytes + SIZEOF_StgArrWords;
 117     /* And we want to align to <alignment> bytes, so we need to allow space
 118        to shift up to <alignment - 1> bytes: */
 119     bytes = bytes + alignment - 1;
 120     /* Now we convert to a number of words: */
 121     words = ROUNDUP_BYTES_TO_WDS(bytes);
 122
 123     ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
 124     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 125
 126     /* Now we need to move p forward so that the payload is aligned
 127        to <alignment> bytes. Note that we are assuming that
 128        <alignment> is a power of 2, which is technically not guaranteed */
 129     p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 130
 131     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 132     StgArrWords_words(p) = payload_words;
 133     RET_P(p);
 134 }
 135
 136 stg_newArrayzh
 137 {
 138     W_ words, n, init, arr, p, size;
 139     /* Args: R1 = words, R2 = initialisation value */
 140
 141     n = R1;
 142     MAYBE_GC(R2_PTR,stg_newArrayzh);
 143
 144     // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
 145     // in the array, making sure we round up, and then rounding up to a whole
 146     // number of words.
 147     size = n + mutArrPtrsCardWords(n);
 148     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
 149     ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
 150     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
 151
 152     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
 153     StgMutArrPtrs_ptrs(arr) = n;
 154     StgMutArrPtrs_size(arr) = size;
 155
 156     // Initialise all elements of the the array with the value in R2
 157     init = R2;
 158     p = arr + SIZEOF_StgMutArrPtrs;
 159   for:
 160     if (p < arr + WDS(words)) {
 161         W_[p] = init;
 162         p = p + WDS(1);
 163         goto for;
 164     }
 165     // Initialise the mark bits with 0
 166   for2:
 167     if (p < arr + WDS(size)) {
 168         W_[p] = 0;
 169         p = p + WDS(1);
 170         goto for2;
 171     }
 172
 173     RET_P(arr);
 174 }
 175
 176 stg_unsafeThawArrayzh
 177 {
 178   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
 179   //
 180   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
 181   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
 182   // it on the mutable list for the GC to remove (removing something from
 183   // the mutable list is not easy).
 184   //
 185   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
 186   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
 187   // to indicate that it is still on the mutable list.
 188   //
 189   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
 190   // either it is on a mut_list, or it isn't.  We adopt the convention that
 191   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
 192   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
 193   // we put it on the mutable list more than once, but it would get scavenged
 194   // multiple times during GC, which would be unnecessarily slow.
 195   //
 196   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
 197         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 198         recordMutable(R1, R1);
 199         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
 200         RET_P(R1);
 201   } else {
 202         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
 203         RET_P(R1);
 204   }
 205 }
 206
 207 /* -----------------------------------------------------------------------------
 208    MutVar primitives
 209    -------------------------------------------------------------------------- */
 210
 211 stg_newMutVarzh
 212 {
 213     W_ mv;
 214     /* Args: R1 = initialisation value */
 215
 216     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
 217
 218     mv = Hp - SIZEOF_StgMutVar + WDS(1);
 219     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
 220     StgMutVar_var(mv) = R1;
 221
 222     RET_P(mv);
 223 }
 224
 225 stg_atomicModifyMutVarzh
 226 {
 227     W_ mv, f, z, x, y, r, h;
 228     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
 229
 230     /* If x is the current contents of the MutVar#, then
 231        We want to make the new contents point to
 232
 233          (sel_0 (f x))
 234
 235        and the return value is
 236
 237          (sel_1 (f x))
 238
 239         obviously we can share (f x).
 240
 241          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
 242          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
 243          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
 244     */
 245
 246 #if MIN_UPD_SIZE > 1
 247 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 248 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
 249 #else
 250 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
 251 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
 252 #endif
 253
 254 #if MIN_UPD_SIZE > 2
 255 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
 256 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
 257 #else
 258 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
 259 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 260 #endif
 261
 262 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
 263
 264    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
 265
 266    mv = R1;
 267    f = R2;
 268
 269    TICK_ALLOC_THUNK_2();
 270    CCCS_ALLOC(THUNK_2_SIZE);
 271    z = Hp - THUNK_2_SIZE + WDS(1);
 272    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
 273    LDV_RECORD_CREATE(z);
 274    StgThunk_payload(z,0) = f;
 275
 276    TICK_ALLOC_THUNK_1();
 277    CCCS_ALLOC(THUNK_1_SIZE);
 278    y = z - THUNK_1_SIZE;
 279    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
 280    LDV_RECORD_CREATE(y);
 281    StgThunk_payload(y,0) = z;
 282
 283    TICK_ALLOC_THUNK_1();
 284    CCCS_ALLOC(THUNK_1_SIZE);
 285    r = y - THUNK_1_SIZE;
 286    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
 287    LDV_RECORD_CREATE(r);
 288    StgThunk_payload(r,0) = z;
 289
 290  retry:
 291    x = StgMutVar_var(mv);
 292    StgThunk_payload(z,1) = x;
 293 #ifdef THREADED_RTS
 294    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
 295    if (h != x) { goto retry; }
 296 #else
 297    StgMutVar_var(mv) = y;
 298 #endif
 299
 300    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
 301      foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
 302    }
 303
 304    RET_P(r);
 305 }
 306
 307 /* -----------------------------------------------------------------------------
 308    Weak Pointer Primitives
 309    -------------------------------------------------------------------------- */
 310
 311 STRING(stg_weak_msg,"New weak pointer at %p\n")
 312
 313 stg_mkWeakzh
 314 {
 315   /* R1 = key
 316      R2 = value
 317      R3 = finalizer (or NULL)
 318   */
 319   W_ w;
 320
 321   if (R3 == NULL) {
 322     R3 = stg_NO_FINALIZER_closure;
 323   }
 324
 325   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
 326
 327   w = Hp - SIZEOF_StgWeak + WDS(1);
 328   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 329
 330   // We don't care about cfinalizer here.
 331   // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
 332   // something else?
 333
 334   StgWeak_key(w)        = R1;
 335   StgWeak_value(w)      = R2;
 336   StgWeak_finalizer(w)  = R3;
 337   StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
 338
 339   ACQUIRE_LOCK(sm_mutex);
 340   StgWeak_link(w)       = W_[weak_ptr_list];
 341   W_[weak_ptr_list]     = w;
 342   RELEASE_LOCK(sm_mutex);
 343
 344   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 345
 346   RET_P(w);
 347 }
 348
 349 stg_mkWeakForeignEnvzh
 350 {
 351   /* R1 = key
 352      R2 = value
 353      R3 = finalizer
 354      R4 = pointer
 355      R5 = has environment (0 or 1)
 356      R6 = environment
 357   */
 358   W_ w, payload_words, words, p;
 359
 360   W_ key, val, fptr, ptr, flag, eptr;
 361
 362   key  = R1;
 363   val  = R2;
 364   fptr = R3;
 365   ptr  = R4;
 366   flag = R5;
 367   eptr = R6;
 368
 369   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
 370
 371   w = Hp - SIZEOF_StgWeak + WDS(1);
 372   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
 373
 374   payload_words = 4;
 375   words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
 376   ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
 377
 378   TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 379   SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
 380
 381   StgArrWords_words(p)     = payload_words;
 382   StgArrWords_payload(p,0) = fptr;
 383   StgArrWords_payload(p,1) = ptr;
 384   StgArrWords_payload(p,2) = eptr;
 385   StgArrWords_payload(p,3) = flag;
 386
 387   // We don't care about the value here.
 388   // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
 389
 390   StgWeak_key(w)        = key;
 391   StgWeak_value(w)      = val;
 392   StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
 393   StgWeak_cfinalizer(w) = p;
 394
 395   ACQUIRE_LOCK(sm_mutex);
 396   StgWeak_link(w)   = W_[weak_ptr_list];
 397   W_[weak_ptr_list] = w;
 398   RELEASE_LOCK(sm_mutex);
 399
 400   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
 401
 402   RET_P(w);
 403 }
 404
 405 stg_finalizzeWeakzh
 406 {
 407   /* R1 = weak ptr
 408    */
 409   W_ w, f, arr;
 410
 411   w = R1;
 412
 413   // already dead?
 414   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
 415       RET_NP(0,stg_NO_FINALIZER_closure);
 416   }
 417
 418   // kill it
 419 #ifdef PROFILING
 420   // @LDV profiling
 421   // A weak pointer is inherently used, so we do not need to call
 422   // LDV_recordDead_FILL_SLOP_DYNAMIC():
 423   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
 424   // or, LDV_recordDead():
 425   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
 426   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
 427   // large as weak pointers, so there is no need to fill the slop, either.
 428   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
 429 #endif
 430
 431   //
 432   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
 433   //
 434   SET_INFO(w,stg_DEAD_WEAK_info);
 435   LDV_RECORD_CREATE(w);
 436
 437   f   = StgWeak_finalizer(w);
 438   arr = StgWeak_cfinalizer(w);
 439
 440   StgDeadWeak_link(w) = StgWeak_link(w);
 441
 442   if (arr != stg_NO_FINALIZER_closure) {
 443     foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
 444                               StgArrWords_payload(arr,1),
 445                               StgArrWords_payload(arr,2),
 446                               StgArrWords_payload(arr,3)) [];
 447   }
 448
 449   /* return the finalizer */
 450   if (f == stg_NO_FINALIZER_closure) {
 451       RET_NP(0,stg_NO_FINALIZER_closure);
 452   } else {
 453       RET_NP(1,f);
 454   }
 455 }
 456
 457 stg_deRefWeakzh
 458 {
 459   /* R1 = weak ptr */
 460   W_ w, code, val;
 461
 462   w = R1;
 463   if (GET_INFO(w) == stg_WEAK_info) {
 464     code = 1;
 465     val = StgWeak_value(w);
 466   } else {
 467     code = 0;
 468     val = w;
 469   }
 470   RET_NP(code,val);
 471 }
 472
 473 /* -----------------------------------------------------------------------------
 474    Floating point operations.
 475    -------------------------------------------------------------------------- */
 476
 477 stg_decodeFloatzuIntzh
 478 {
 479     W_ p;
 480     F_ arg;
 481     W_ mp_tmp1;
 482     W_ mp_tmp_w;
 483
 484     STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
 485
 486     mp_tmp1  = Sp - WDS(1);
 487     mp_tmp_w = Sp - WDS(2);
 488
 489     /* arguments: F1 = Float# */
 490     arg = F1;
 491
 492     /* Perform the operation */
 493     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
 494
 495     /* returns: (Int# (mantissa), Int# (exponent)) */
 496     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
 497 }
 498
 499 stg_decodeDoublezu2Intzh
 500 {
 501     D_ arg;
 502     W_ p;
 503     W_ mp_tmp1;
 504     W_ mp_tmp2;
 505     W_ mp_result1;
 506     W_ mp_result2;
 507
 508     STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
 509
 510     mp_tmp1    = Sp - WDS(1);
 511     mp_tmp2    = Sp - WDS(2);
 512     mp_result1 = Sp - WDS(3);
 513     mp_result2 = Sp - WDS(4);
 514
 515     /* arguments: D1 = Double# */
 516     arg = D1;
 517
 518     /* Perform the operation */
 519     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
 520                                     mp_result1 "ptr", mp_result2 "ptr",
 521                                     arg) [];
 522
 523     /* returns:
 524        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
 525     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
 526 }
 527
 528 /* -----------------------------------------------------------------------------
 529  * Concurrency primitives
 530  * -------------------------------------------------------------------------- */
 531
 532 stg_forkzh
 533 {
 534   /* args: R1 = closure to spark */
 535
 536   MAYBE_GC(R1_PTR, stg_forkzh);
 537
 538   W_ closure;
 539   W_ threadid;
 540   closure = R1;
 541
 542   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 543                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 544                                 closure "ptr") [];
 545
 546   /* start blocked if the current thread is blocked */
 547   StgTSO_flags(threadid) = %lobits16(
 548      TO_W_(StgTSO_flags(threadid)) |
 549      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 550
 551   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 552
 553   // context switch soon, but not immediately: we don't want every
 554   // forkIO to force a context-switch.
 555   Capability_context_switch(MyCapability()) = 1 :: CInt;
 556
 557   RET_P(threadid);
 558 }
 559
 560 stg_forkOnzh
 561 {
 562   /* args: R1 = cpu, R2 = closure to spark */
 563
 564   MAYBE_GC(R2_PTR, stg_forkOnzh);
 565
 566   W_ cpu;
 567   W_ closure;
 568   W_ threadid;
 569   cpu = R1;
 570   closure = R2;
 571
 572   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr",
 573                                 RtsFlags_GcFlags_initialStkSize(RtsFlags),
 574                                 closure "ptr") [];
 575
 576   /* start blocked if the current thread is blocked */
 577   StgTSO_flags(threadid) = %lobits16(
 578      TO_W_(StgTSO_flags(threadid)) |
 579      TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
 580
 581   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 582
 583   // context switch soon, but not immediately: we don't want every
 584   // forkIO to force a context-switch.
 585   Capability_context_switch(MyCapability()) = 1 :: CInt;
 586
 587   RET_P(threadid);
 588 }
 589
 590 stg_yieldzh
 591 {
 592   jump stg_yield_noregs;
 593 }
 594
 595 stg_myThreadIdzh
 596 {
 597   /* no args. */
 598   RET_P(CurrentTSO);
 599 }
 600
 601 stg_labelThreadzh
 602 {
 603   /* args:
 604         R1 = ThreadId#
 605         R2 = Addr# */
 606 #ifdef DEBUG
 607   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
 608 #endif
 609   jump %ENTRY_CODE(Sp(0));
 610 }
 611
 612 stg_isCurrentThreadBoundzh
 613 {
 614   /* no args */
 615   W_ r;
 616   (r) = foreign "C" isThreadBound(CurrentTSO) [];
 617   RET_N(r);
 618 }
 619
 620 stg_threadStatuszh
 621 {
 622     /* args: R1 :: ThreadId# */
 623     W_ tso;
 624     W_ why_blocked;
 625     W_ what_next;
 626     W_ ret;
 627
 628     tso = R1;
 629     loop:
 630       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
 631           tso = StgTSO__link(tso);
 632           goto loop;
 633       }
 634
 635     what_next   = TO_W_(StgTSO_what_next(tso));
 636     why_blocked = TO_W_(StgTSO_why_blocked(tso));
 637     // Note: these two reads are not atomic, so they might end up
 638     // being inconsistent.  It doesn't matter, since we
 639     // only return one or the other.  If we wanted to return the
 640     // contents of block_info too, then we'd have to do some synchronisation.
 641
 642     if (what_next == ThreadComplete) {
 643         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
 644     } else {
 645         if (what_next == ThreadKilled) {
 646             ret = 17;
 647         } else {
 648             ret = why_blocked;
 649         }
 650     }
 651     RET_N(ret);
 652 }
 653
 654 /* -----------------------------------------------------------------------------
 655  * TVar primitives
 656  * -------------------------------------------------------------------------- */
 657
 658 #define SP_OFF 0
 659
 660 // Catch retry frame ------------------------------------------------------------
 661
 662 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 663 #if defined(PROFILING)
 664   W_ unused1, W_ unused2,
 665 #endif
 666   W_ unused3, P_ unused4, P_ unused5)
 667 {
 668    W_ r, frame, trec, outer;
 669
 670    frame = Sp;
 671    trec = StgTSO_trec(CurrentTSO);
 672    outer  = StgTRecHeader_enclosing_trec(trec);
 673    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 674    if (r != 0) {
 675      /* Succeeded (either first branch or second branch) */
 676      StgTSO_trec(CurrentTSO) = outer;
 677      Sp = Sp + SIZEOF_StgCatchRetryFrame;
 678      jump %ENTRY_CODE(Sp(SP_OFF));
 679    } else {
 680      /* Did not commit: re-execute */
 681      W_ new_trec;
 682      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 683      StgTSO_trec(CurrentTSO) = new_trec;
 684      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 685        R1 = StgCatchRetryFrame_alt_code(frame);
 686      } else {
 687        R1 = StgCatchRetryFrame_first_code(frame);
 688      }
 689      jump stg_ap_v_fast;
 690    }
 691 }
 692
 693
 694 // Atomically frame ------------------------------------------------------------
 695
 696 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
 697 #if defined(PROFILING)
 698   W_ unused1, W_ unused2,
 699 #endif
 700   P_ code, P_ next_invariant_to_check, P_ result)
 701 {
 702   W_ frame, trec, valid, next_invariant, q, outer;
 703
 704   frame  = Sp;
 705   trec   = StgTSO_trec(CurrentTSO);
 706   result = R1;
 707   outer  = StgTRecHeader_enclosing_trec(trec);
 708
 709   if (outer == NO_TREC) {
 710     /* First time back at the atomically frame -- pick up invariants */
 711     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
 712     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 713     StgAtomicallyFrame_result(frame) = result;
 714
 715   } else {
 716     /* Second/subsequent time back at the atomically frame -- abort the
 717      * tx that's checking the invariant and move on to the next one */
 718     StgTSO_trec(CurrentTSO) = outer;
 719     q = StgAtomicallyFrame_next_invariant_to_check(frame);
 720     StgInvariantCheckQueue_my_execution(q) = trec;
 721     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 722     /* Don't free trec -- it's linked from q and will be stashed in the
 723      * invariant if we eventually commit. */
 724     q = StgInvariantCheckQueue_next_queue_entry(q);
 725     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 726     trec = outer;
 727   }
 728
 729   q = StgAtomicallyFrame_next_invariant_to_check(frame);
 730
 731   if (q != END_INVARIANT_CHECK_QUEUE) {
 732     /* We can't commit yet: another invariant to check */
 733     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
 734     StgTSO_trec(CurrentTSO) = trec;
 735
 736     next_invariant = StgInvariantCheckQueue_invariant(q);
 737     R1 = StgAtomicInvariant_code(next_invariant);
 738     jump stg_ap_v_fast;
 739
 740   } else {
 741
 742     /* We've got no more invariants to check, try to commit */
 743     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
 744     if (valid != 0) {
 745       /* Transaction was valid: commit succeeded */
 746       StgTSO_trec(CurrentTSO) = NO_TREC;
 747       R1 = StgAtomicallyFrame_result(frame);
 748       Sp = Sp + SIZEOF_StgAtomicallyFrame;
 749       jump %ENTRY_CODE(Sp(SP_OFF));
 750     } else {
 751       /* Transaction was not valid: try again */
 752       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 753       StgTSO_trec(CurrentTSO) = trec;
 754       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 755       R1 = StgAtomicallyFrame_code(frame);
 756       jump stg_ap_v_fast;
 757     }
 758   }
 759 }
 760
 761 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
 762 #if defined(PROFILING)
 763   W_ unused1, W_ unused2,
 764 #endif
 765   P_ code, P_ next_invariant_to_check, P_ result)
 766 {
 767   W_ frame, trec, valid;
 768
 769   frame = Sp;
 770
 771   /* The TSO is currently waiting: should we stop waiting? */
 772   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
 773   if (valid != 0) {
 774     /* Previous attempt is still valid: no point trying again yet */
 775     jump stg_block_noregs;
 776   } else {
 777     /* Previous attempt is no longer valid: try again */
 778     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
 779     StgTSO_trec(CurrentTSO) = trec;
 780     StgHeader_info(frame) = stg_atomically_frame_info;
 781     R1 = StgAtomicallyFrame_code(frame);
 782     jump stg_ap_v_fast;
 783   }
 784 }
 785
 786 // STM catch frame --------------------------------------------------------------
 787
 788 #define SP_OFF 0
 789
 790 /* Catch frames are very similar to update frames, but when entering
 791  * one we just pop the frame off the stack and perform the correct
 792  * kind of return to the activation record underneath us on the stack.
 793  */
 794
 795 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
 796 #if defined(PROFILING)
 797   W_ unused1, W_ unused2,
 798 #endif
 799   P_ unused3, P_ unused4)
 800    {
 801       W_ r, frame, trec, outer;
 802       frame = Sp;
 803       trec = StgTSO_trec(CurrentTSO);
 804       outer  = StgTRecHeader_enclosing_trec(trec);
 805       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
 806       if (r != 0) {
 807         /* Commit succeeded */
 808         StgTSO_trec(CurrentTSO) = outer;
 809         Sp = Sp + SIZEOF_StgCatchSTMFrame;
 810         jump Sp(SP_OFF);
 811       } else {
 812         /* Commit failed */
 813         W_ new_trec;
 814         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 815         StgTSO_trec(CurrentTSO) = new_trec;
 816         R1 = StgCatchSTMFrame_code(frame);
 817         jump stg_ap_v_fast;
 818       }
 819    }
 820
 821
 822 // Primop definition ------------------------------------------------------------
 823
 824 stg_atomicallyzh
 825 {
 826   W_ frame;
 827   W_ old_trec;
 828   W_ new_trec;
 829
 830   // stmStartTransaction may allocate
 831   MAYBE_GC (R1_PTR, stg_atomicallyzh);
 832
 833   /* Args: R1 = m :: STM a */
 834   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
 835
 836   old_trec = StgTSO_trec(CurrentTSO);
 837
 838   /* Nested transactions are not allowed; raise an exception */
 839   if (old_trec != NO_TREC) {
 840      R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
 841      jump stg_raisezh;
 842   }
 843
 844   /* Set up the atomically frame */
 845   Sp = Sp - SIZEOF_StgAtomicallyFrame;
 846   frame = Sp;
 847
 848   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
 849   StgAtomicallyFrame_code(frame) = R1;
 850   StgAtomicallyFrame_result(frame) = NO_TREC;
 851   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 852
 853   /* Start the memory transcation */
 854   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
 855   StgTSO_trec(CurrentTSO) = new_trec;
 856
 857   /* Apply R1 to the realworld token */
 858   jump stg_ap_v_fast;
 859 }
 860
 861
 862 stg_catchSTMzh
 863 {
 864   W_ frame;
 865
 866   /* Args: R1 :: STM a */
 867   /* Args: R2 :: Exception -> STM a */
 868   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
 869
 870   /* Set up the catch frame */
 871   Sp = Sp - SIZEOF_StgCatchSTMFrame;
 872   frame = Sp;
 873
 874   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
 875   StgCatchSTMFrame_handler(frame) = R2;
 876   StgCatchSTMFrame_code(frame) = R1;
 877
 878   /* Start a nested transaction to run the body of the try block in */
 879   W_ cur_trec;
 880   W_ new_trec;
 881   cur_trec = StgTSO_trec(CurrentTSO);
 882   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
 883   StgTSO_trec(CurrentTSO) = new_trec;
 884
 885   /* Apply R1 to the realworld token */
 886   jump stg_ap_v_fast;
 887 }
 888
 889
 890 stg_catchRetryzh
 891 {
 892   W_ frame;
 893   W_ new_trec;
 894   W_ trec;
 895
 896   // stmStartTransaction may allocate
 897   MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh);
 898
 899   /* Args: R1 :: STM a */
 900   /* Args: R2 :: STM a */
 901   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
 902
 903   /* Start a nested transaction within which to run the first code */
 904   trec = StgTSO_trec(CurrentTSO);
 905   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
 906   StgTSO_trec(CurrentTSO) = new_trec;
 907
 908   /* Set up the catch-retry frame */
 909   Sp = Sp - SIZEOF_StgCatchRetryFrame;
 910   frame = Sp;
 911
 912   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
 913   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
 914   StgCatchRetryFrame_first_code(frame) = R1;
 915   StgCatchRetryFrame_alt_code(frame) = R2;
 916
 917   /* Apply R1 to the realworld token */
 918   jump stg_ap_v_fast;
 919 }
 920
 921
 922 stg_retryzh
 923 {
 924   W_ frame_type;
 925   W_ frame;
 926   W_ trec;
 927   W_ outer;
 928   W_ r;
 929
 930   MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
 931
 932   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 933 retry_pop_stack:
 934   StgTSO_sp(CurrentTSO) = Sp;
 935   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
 936   Sp = StgTSO_sp(CurrentTSO);
 937   frame = Sp;
 938   trec = StgTSO_trec(CurrentTSO);
 939   outer  = StgTRecHeader_enclosing_trec(trec);
 940
 941   if (frame_type == CATCH_RETRY_FRAME) {
 942     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
 943     ASSERT(outer != NO_TREC);
 944     // Abort the transaction attempting the current branch
 945     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 946     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 947     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
 948       // Retry in the first branch: try the alternative
 949       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 950       StgTSO_trec(CurrentTSO) = trec;
 951       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
 952       R1 = StgCatchRetryFrame_alt_code(frame);
 953       jump stg_ap_v_fast;
 954     } else {
 955       // Retry in the alternative code: propagate the retry
 956       StgTSO_trec(CurrentTSO) = outer;
 957       Sp = Sp + SIZEOF_StgCatchRetryFrame;
 958       goto retry_pop_stack;
 959     }
 960   }
 961
 962   // We've reached the ATOMICALLY_FRAME: attempt to wait
 963   ASSERT(frame_type == ATOMICALLY_FRAME);
 964   if (outer != NO_TREC) {
 965     // We called retry while checking invariants, so abort the current
 966     // invariant check (merging its TVar accesses into the parents read
 967     // set so we'll wait on them)
 968     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
 969     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
 970     trec = outer;
 971     StgTSO_trec(CurrentTSO) = trec;
 972     outer  = StgTRecHeader_enclosing_trec(trec);
 973   }
 974   ASSERT(outer == NO_TREC);
 975
 976   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
 977   if (r != 0) {
 978     // Transaction was valid: stmWait put us on the TVars' queues, we now block
 979     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
 980     Sp = frame;
 981     // Fix up the stack in the unregisterised case: the return convention is different.
 982     R3 = trec; // passing to stmWaitUnblock()
 983     jump stg_block_stmwait;
 984   } else {
 985     // Transaction was not valid: retry immediately
 986     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
 987     StgTSO_trec(CurrentTSO) = trec;
 988     R1 = StgAtomicallyFrame_code(frame);
 989     Sp = frame;
 990     jump stg_ap_v_fast;
 991   }
 992 }
 993
 994
 995 stg_checkzh
 996 {
 997   W_ trec, closure;
 998
 999   /* Args: R1 = invariant closure */
1000   MAYBE_GC (R1_PTR, stg_checkzh);
1001
1002   trec = StgTSO_trec(CurrentTSO);
1003   closure = R1;
1004   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr",
1005                                      trec "ptr",
1006                                      closure "ptr") [];
1007
1008   jump %ENTRY_CODE(Sp(0));
1009 }
1010
1011
1012 stg_newTVarzh
1013 {
1014   W_ tv;
1015   W_ new_value;
1016
1017   /* Args: R1 = initialisation value */
1018
1019   MAYBE_GC (R1_PTR, stg_newTVarzh);
1020   new_value = R1;
1021   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1022   RET_P(tv);
1023 }
1024
1025
1026 stg_readTVarzh
1027 {
1028   W_ trec;
1029   W_ tvar;
1030   W_ result;
1031
1032   /* Args: R1 = TVar closure */
1033
1034   MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate
1035   trec = StgTSO_trec(CurrentTSO);
1036   tvar = R1;
1037   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1038
1039   RET_P(result);
1040 }
1041
1042 stg_readTVarIOzh
1043 {
1044     W_ result;
1045
1046 again:
1047     result = StgTVar_current_value(R1);
1048     if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
1049         goto again;
1050     }
1051     RET_P(result);
1052 }
1053
1054 stg_writeTVarzh
1055 {
1056   W_ trec;
1057   W_ tvar;
1058   W_ new_value;
1059
1060   /* Args: R1 = TVar closure */
1061   /*       R2 = New value    */
1062
1063   MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate
1064   trec = StgTSO_trec(CurrentTSO);
1065   tvar = R1;
1066   new_value = R2;
1067   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1068
1069   jump %ENTRY_CODE(Sp(0));
1070 }
1071
1072
1073 /* -----------------------------------------------------------------------------
1074  * MVar primitives
1075  *
1076  * take & putMVar work as follows.  Firstly, an important invariant:
1077  *
1078  *    If the MVar is full, then the blocking queue contains only
1079  *    threads blocked on putMVar, and if the MVar is empty then the
1080  *    blocking queue contains only threads blocked on takeMVar.
1081  *
1082  * takeMvar:
1083  *    MVar empty : then add ourselves to the blocking queue
1084  *    MVar full  : remove the value from the MVar, and
1085  *                 blocking queue empty     : return
1086  *                 blocking queue non-empty : perform the first blocked putMVar
1087  *                                            from the queue, and wake up the
1088  *                                            thread (MVar is now full again)
1089  *
1090  * putMVar is just the dual of the above algorithm.
1091  *
1092  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1093  * the stack of the thread waiting to do the putMVar.  See
1094  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1095  * the stack layout, and the PerformPut and PerformTake macros below.
1096  *
1097  * It is important that a blocked take or put is woken up with the
1098  * take/put already performed, because otherwise there would be a
1099  * small window of vulnerability where the thread could receive an
1100  * exception and never perform its take or put, and we'd end up with a
1101  * deadlock.
1102  *
1103  * -------------------------------------------------------------------------- */
1104
1105 stg_isEmptyMVarzh
1106 {
1107     /* args: R1 = MVar closure */
1108
1109     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1110         RET_N(1);
1111     } else {
1112         RET_N(0);
1113     }
1114 }
1115
1116 stg_newMVarzh
1117 {
1118     /* args: none */
1119     W_ mvar;
1120
1121     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
1122
1123     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1124     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1125         // MVARs start dirty: generation 0 has no mutable list
1126     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1127     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1128     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1129     RET_P(mvar);
1130 }
1131
1132
1133 #define PerformTake(tso, value)                         \
1134     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1135     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1136
1137 #define PerformPut(tso,lval)                    \
1138     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1139     lval = W_[StgTSO_sp(tso) - WDS(1)];
1140
1141 stg_takeMVarzh
1142 {
1143     W_ mvar, val, info, tso, q;
1144
1145     /* args: R1 = MVar closure */
1146     mvar = R1;
1147
1148 #if defined(THREADED_RTS)
1149     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1150 #else
1151     info = GET_INFO(mvar);
1152 #endif
1153
1154     if (info == stg_MVAR_CLEAN_info) {
1155         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1156     }
1157
1158     /* If the MVar is empty, put ourselves on its blocking queue,
1159      * and wait until we're woken up.
1160      */
1161     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1162
1163         // Note [mvar-heap-check] We want to do the heap check in the
1164         // branch here, to avoid the conditional in the common case.
1165         // However, we've already locked the MVar above, so we better
1166         // be careful to unlock it again if the the heap check fails.
1167         // Unfortunately we don't have an easy way to inject any code
1168         // into the heap check generated by the code generator, so we
1169         // have to do it in stg_gc_gen (see HeapStackCheck.cmm).
1170         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
1171
1172         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1173
1174         StgHeader_info(q) = stg_MVAR_TSO_QUEUE_info;
1175         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1176         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1177
1178         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1179             StgMVar_head(mvar) = q;
1180         } else {
1181             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1182             foreign "C" recordClosureMutated(MyCapability() "ptr",
1183                                              StgMVar_tail(mvar)) [];
1184         }
1185         StgTSO__link(CurrentTSO)       = q;
1186         StgTSO_block_info(CurrentTSO)  = mvar;
1187         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1188         StgMVar_tail(mvar)             = q;
1189
1190         R1 = mvar;
1191         jump stg_block_takemvar;
1192     }
1193
1194     /* we got the value... */
1195     val = StgMVar_value(mvar);
1196
1197     q = StgMVar_head(mvar);
1198 loop:
1199     if (q == stg_END_TSO_QUEUE_closure) {
1200         /* No further putMVars, MVar is now empty */
1201         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1202         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1203         RET_P(val);
1204     }
1205     if (StgHeader_info(q) == stg_IND_info ||
1206         StgHeader_info(q) == stg_MSG_NULL_info) {
1207         q = StgInd_indirectee(q);
1208         goto loop;
1209     }
1210
1211     // There are putMVar(s) waiting... wake up the first thread on the queue
1212
1213     tso = StgMVarTSOQueue_tso(q);
1214     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1215     ASSERT(StgTSO_block_info(tso) == mvar);
1216     // actually perform the putMVar for the thread that we just woke up
1217     PerformPut(tso,StgMVar_value(mvar));
1218
1219     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1220     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1221         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1222     }
1223
1224     // indicate that the putMVar has now completed:
1225     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1226
1227     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1228
1229     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1230
1231     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1232     RET_P(val);
1233 }
1234
1235
1236 stg_tryTakeMVarzh
1237 {
1238     W_ mvar, val, info, tso, q;
1239
1240     /* args: R1 = MVar closure */
1241     mvar = R1;
1242
1243 #if defined(THREADED_RTS)
1244     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1245 #else
1246     info = GET_INFO(mvar);
1247 #endif
1248
1249     /* If the MVar is empty, put ourselves on its blocking queue,
1250      * and wait until we're woken up.
1251      */
1252     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1253 #if defined(THREADED_RTS)
1254         unlockClosure(mvar, info);
1255 #endif
1256         /* HACK: we need a pointer to pass back,
1257          * so we abuse NO_FINALIZER_closure
1258          */
1259         RET_NP(0, stg_NO_FINALIZER_closure);
1260     }
1261
1262     if (info == stg_MVAR_CLEAN_info) {
1263         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1264     }
1265
1266     /* we got the value... */
1267     val = StgMVar_value(mvar);
1268
1269     q = StgMVar_head(mvar);
1270 loop:
1271     if (q == stg_END_TSO_QUEUE_closure) {
1272         /* No further putMVars, MVar is now empty */
1273         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1274         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1275         RET_NP(1, val);
1276     }
1277     if (StgHeader_info(q) == stg_IND_info ||
1278         StgHeader_info(q) == stg_MSG_NULL_info) {
1279         q = StgInd_indirectee(q);
1280         goto loop;
1281     }
1282
1283     // There are putMVar(s) waiting... wake up the first thread on the queue
1284
1285     tso = StgMVarTSOQueue_tso(q);
1286     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1287     ASSERT(StgTSO_block_info(tso) == mvar);
1288     // actually perform the putMVar for the thread that we just woke up
1289     PerformPut(tso,StgMVar_value(mvar));
1290
1291     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1292     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1293         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1294     }
1295
1296     // indicate that the putMVar has now completed:
1297     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1298
1299     // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
1300
1301     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1302
1303     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1304     RET_P(val);
1305 }
1306
1307
1308 stg_putMVarzh
1309 {
1310     W_ mvar, val, info, tso, q;
1311
1312     /* args: R1 = MVar, R2 = value */
1313     mvar = R1;
1314     val  = R2;
1315
1316 #if defined(THREADED_RTS)
1317     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1318 #else
1319     info = GET_INFO(mvar);
1320 #endif
1321
1322     if (info == stg_MVAR_CLEAN_info) {
1323         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1324     }
1325
1326     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1327
1328         // see Note [mvar-heap-check] above
1329         HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR|R2_PTR, stg_putMVarzh);
1330
1331         q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
1332
1333         StgHeader_info(q) = stg_MVAR_TSO_QUEUE_info;
1334         StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
1335         StgMVarTSOQueue_tso(q)  = CurrentTSO;
1336
1337         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1338             StgMVar_head(mvar) = q;
1339         } else {
1340             StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
1341             foreign "C" recordClosureMutated(MyCapability() "ptr",
1342                                              StgMVar_tail(mvar)) [];
1343         }
1344         StgTSO__link(CurrentTSO)       = q;
1345         StgTSO_block_info(CurrentTSO)  = mvar;
1346         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1347         StgMVar_tail(mvar)             = q;
1348
1349         R1 = mvar;
1350         R2 = val;
1351         jump stg_block_putmvar;
1352     }
1353
1354     q = StgMVar_head(mvar);
1355 loop:
1356     if (q == stg_END_TSO_QUEUE_closure) {
1357         /* No further takes, the MVar is now full. */
1358         StgMVar_value(mvar) = val;
1359         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1360         jump %ENTRY_CODE(Sp(0));
1361     }
1362     if (StgHeader_info(q) == stg_IND_info ||
1363         StgHeader_info(q) == stg_MSG_NULL_info) {
1364         q = StgInd_indirectee(q);
1365         goto loop;
1366     }
1367
1368     // There are takeMVar(s) waiting: wake up the first one
1369
1370     tso = StgMVarTSOQueue_tso(q);
1371     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1372     ASSERT(StgTSO_block_info(tso) == mvar);
1373     // actually perform the takeMVar
1374     PerformTake(tso, val);
1375
1376     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1377         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1378     }
1379
1380     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1381     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1382         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1383     }
1384
1385     // indicate that the takeMVar has now completed:
1386     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1387
1388     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1389
1390     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1391     jump %ENTRY_CODE(Sp(0));
1392 }
1393
1394
1395 stg_tryPutMVarzh
1396 {
1397     W_ mvar, val, info, tso, q;
1398
1399     /* args: R1 = MVar, R2 = value */
1400     mvar = R1;
1401     val  = R2;
1402
1403 #if defined(THREADED_RTS)
1404     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1405 #else
1406     info = GET_INFO(mvar);
1407 #endif
1408
1409     if (info == stg_MVAR_CLEAN_info) {
1410         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1411     }
1412
1413     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1414 #if defined(THREADED_RTS)
1415         unlockClosure(mvar, info);
1416 #endif
1417         RET_N(0);
1418     }
1419
1420     q = StgMVar_head(mvar);
1421 loop:
1422     if (q == stg_END_TSO_QUEUE_closure) {
1423         /* No further takes, the MVar is now full. */
1424         StgMVar_value(mvar) = val;
1425         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1426         jump %ENTRY_CODE(Sp(0));
1427     }
1428     if (StgHeader_info(q) == stg_IND_info ||
1429         StgHeader_info(q) == stg_MSG_NULL_info) {
1430         q = StgInd_indirectee(q);
1431         goto loop;
1432     }
1433
1434     /* There are takeMVar(s) waiting: wake up the first one
1435      */
1436     // There are takeMVar(s) waiting: wake up the first one
1437
1438     tso = StgMVarTSOQueue_tso(q);
1439     ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
1440     ASSERT(StgTSO_block_info(tso) == mvar);
1441     // actually perform the takeMVar
1442     PerformTake(tso, val);
1443
1444     if (TO_W_(StgTSO_dirty(tso)) == 0) {
1445         foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1446     }
1447
1448     StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
1449     if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1450         StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1451     }
1452
1453     // indicate that the takeMVar has now completed:
1454     StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
1455
1456     foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
1457
1458     unlockClosure(mvar, stg_MVAR_DIRTY_info);
1459     jump %ENTRY_CODE(Sp(0));
1460 }
1461
1462
1463 /* -----------------------------------------------------------------------------
1464    Stable pointer primitives
1465    -------------------------------------------------------------------------  */
1466
1467 stg_makeStableNamezh
1468 {
1469     W_ index, sn_obj;
1470
1471     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh );
1472
1473     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1474
1475     /* Is there already a StableName for this heap object?
1476      *  stable_ptr_table is a pointer to an array of snEntry structs.
1477      */
1478     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1479         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1480         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1481         StgStableName_sn(sn_obj) = index;
1482         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1483     } else {
1484         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1485     }
1486
1487     RET_P(sn_obj);
1488 }
1489
1490
1491 stg_makeStablePtrzh
1492 {
1493     /* Args: R1 = a */
1494     W_ sp;
1495     MAYBE_GC(R1_PTR, stg_makeStablePtrzh);
1496     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1497     RET_N(sp);
1498 }
1499
1500 stg_deRefStablePtrzh
1501 {
1502     /* Args: R1 = the stable ptr */
1503     W_ r, sp;
1504     sp = R1;
1505     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1506     RET_P(r);
1507 }
1508
1509 /* -----------------------------------------------------------------------------
1510    Bytecode object primitives
1511    -------------------------------------------------------------------------  */
1512
1513 stg_newBCOzh
1514 {
1515     /* R1 = instrs
1516        R2 = literals
1517        R3 = ptrs
1518        R4 = arity
1519        R5 = bitmap array
1520     */
1521     W_ bco, bitmap_arr, bytes, words;
1522
1523     bitmap_arr = R5;
1524
1525     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1526     bytes = WDS(words);
1527
1528     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
1529
1530     bco = Hp - bytes + WDS(1);
1531     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1532
1533     StgBCO_instrs(bco)     = R1;
1534     StgBCO_literals(bco)   = R2;
1535     StgBCO_ptrs(bco)       = R3;
1536     StgBCO_arity(bco)      = HALF_W_(R4);
1537     StgBCO_size(bco)       = HALF_W_(words);
1538
1539     // Copy the arity/bitmap info into the BCO
1540     W_ i;
1541     i = 0;
1542 for:
1543     if (i < StgArrWords_words(bitmap_arr)) {
1544         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1545         i = i + 1;
1546         goto for;
1547     }
1548
1549     RET_P(bco);
1550 }
1551
1552
1553 stg_mkApUpd0zh
1554 {
1555     // R1 = the BCO# for the AP
1556     //
1557     W_ ap;
1558
1559     // This function is *only* used to wrap zero-arity BCOs in an
1560     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1561     // saturated and always points directly to a FUN or BCO.
1562     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1563            StgBCO_arity(R1) == HALF_W_(0));
1564
1565     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh);
1566     TICK_ALLOC_UP_THK(0, 0);
1567     CCCS_ALLOC(SIZEOF_StgAP);
1568
1569     ap = Hp - SIZEOF_StgAP + WDS(1);
1570     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1571
1572     StgAP_n_args(ap) = HALF_W_(0);
1573     StgAP_fun(ap) = R1;
1574
1575     RET_P(ap);
1576 }
1577
1578 stg_unpackClosurezh
1579 {
1580 /* args: R1 = closure to analyze */
1581 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1582
1583     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1584     info  = %GET_STD_INFO(UNTAG(R1));
1585
1586     // Some closures have non-standard layout, so we omit those here.
1587     W_ type;
1588     type = TO_W_(%INFO_TYPE(info));
1589     switch [0 .. N_CLOSURE_TYPES] type {
1590     case THUNK_SELECTOR : {
1591         ptrs = 1;
1592         nptrs = 0;
1593         goto out;
1594     }
1595     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
1596          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1597         ptrs = 0;
1598         nptrs = 0;
1599         goto out;
1600     }
1601     default: {
1602         ptrs  = TO_W_(%INFO_PTRS(info));
1603         nptrs = TO_W_(%INFO_NPTRS(info));
1604         goto out;
1605     }}
1606 out:
1607
1608     W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
1609     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1610     ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
1611     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
1612
1613     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh);
1614
1615     W_ clos;
1616     clos = UNTAG(R1);
1617
1618     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1619     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1620
1621     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1622     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1623     StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
1624
1625     p = 0;
1626 for:
1627     if(p < ptrs) {
1628          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1629          p = p + 1;
1630          goto for;
1631     }
1632     /* We can leave the card table uninitialised, since the array is
1633        allocated in the nursery.  The GC will fill it in if/when the array
1634        is promoted. */
1635
1636     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1637     StgArrWords_words(nptrs_arr) = nptrs;
1638     p = 0;
1639 for2:
1640     if(p < nptrs) {
1641          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1642          p = p + 1;
1643          goto for2;
1644     }
1645     RET_NPP(info, ptrs_arr, nptrs_arr);
1646 }
1647
1648 /* -----------------------------------------------------------------------------
1649    Thread I/O blocking primitives
1650    -------------------------------------------------------------------------- */
1651
1652 /* Add a thread to the end of the blocked queue. (C-- version of the C
1653  * macro in Schedule.h).
1654  */
1655 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1656     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
1657     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1658       W_[blocked_queue_hd] = tso;                       \
1659     } else {                                            \
1660       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
1661     }                                                   \
1662     W_[blocked_queue_tl] = tso;
1663
1664 stg_waitReadzh
1665 {
1666     /* args: R1 */
1667 #ifdef THREADED_RTS
1668     foreign "C" barf("waitRead# on threaded RTS") never returns;
1669 #else
1670
1671     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1672     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1673     StgTSO_block_info(CurrentTSO) = R1;
1674     // No locking - we're not going to use this interface in the
1675     // threaded RTS anyway.
1676     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1677     jump stg_block_noregs;
1678 #endif
1679 }
1680
1681 stg_waitWritezh
1682 {
1683     /* args: R1 */
1684 #ifdef THREADED_RTS
1685     foreign "C" barf("waitWrite# on threaded RTS") never returns;
1686 #else
1687
1688     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1689     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1690     StgTSO_block_info(CurrentTSO) = R1;
1691     // No locking - we're not going to use this interface in the
1692     // threaded RTS anyway.
1693     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1694     jump stg_block_noregs;
1695 #endif
1696 }
1697
1698
1699 STRING(stg_delayzh_malloc_str, "stg_delayzh")
1700 stg_delayzh
1701 {
1702 #ifdef mingw32_HOST_OS
1703     W_ ares;
1704     CInt reqID;
1705 #else
1706     W_ t, prev, target;
1707 #endif
1708
1709 #ifdef THREADED_RTS
1710     foreign "C" barf("delay# on threaded RTS") never returns;
1711 #else
1712
1713     /* args: R1 (microsecond delay amount) */
1714     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1715     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1716
1717 #ifdef mingw32_HOST_OS
1718
1719     /* could probably allocate this on the heap instead */
1720     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1721                                             stg_delayzh_malloc_str);
1722     (reqID) = foreign "C" addDelayRequest(R1);
1723     StgAsyncIOResult_reqID(ares)   = reqID;
1724     StgAsyncIOResult_len(ares)     = 0;
1725     StgAsyncIOResult_errCode(ares) = 0;
1726     StgTSO_block_info(CurrentTSO)  = ares;
1727
1728     /* Having all async-blocked threads reside on the blocked_queue
1729      * simplifies matters, so change the status to OnDoProc put the
1730      * delayed thread on the blocked_queue.
1731      */
1732     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1733     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1734     jump stg_block_async_void;
1735
1736 #else
1737
1738     W_ time;
1739     W_ divisor;
1740     (time) = foreign "C" getourtimeofday() [R1];
1741     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
1742     if (divisor == 0) {
1743         divisor = 50;
1744     }
1745     divisor = divisor * 1000;
1746     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
1747            + time + 1; /* Add 1 as getourtimeofday rounds down */
1748     StgTSO_block_info(CurrentTSO) = target;
1749
1750     /* Insert the new thread in the sleeping queue. */
1751     prev = NULL;
1752     t = W_[sleeping_queue];
1753 while:
1754     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1755         prev = t;
1756         t = StgTSO__link(t);
1757         goto while;
1758     }
1759
1760     StgTSO__link(CurrentTSO) = t;
1761     if (prev == NULL) {
1762         W_[sleeping_queue] = CurrentTSO;
1763     } else {
1764         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
1765     }
1766     jump stg_block_noregs;
1767 #endif
1768 #endif /* !THREADED_RTS */
1769 }
1770
1771
1772 #ifdef mingw32_HOST_OS
1773 STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
1774 stg_asyncReadzh
1775 {
1776     W_ ares;
1777     CInt reqID;
1778
1779 #ifdef THREADED_RTS
1780     foreign "C" barf("asyncRead# on threaded RTS") never returns;
1781 #else
1782
1783     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1784     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1785     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1786
1787     /* could probably allocate this on the heap instead */
1788     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1789                                             stg_asyncReadzh_malloc_str)
1790                         [R1,R2,R3,R4];
1791     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
1792     StgAsyncIOResult_reqID(ares)   = reqID;
1793     StgAsyncIOResult_len(ares)     = 0;
1794     StgAsyncIOResult_errCode(ares) = 0;
1795     StgTSO_block_info(CurrentTSO)  = ares;
1796     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1797     jump stg_block_async;
1798 #endif
1799 }
1800
1801 STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
1802 stg_asyncWritezh
1803 {
1804     W_ ares;
1805     CInt reqID;
1806
1807 #ifdef THREADED_RTS
1808     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
1809 #else
1810
1811     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1812     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1813     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1814
1815     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1816                                             stg_asyncWritezh_malloc_str)
1817                         [R1,R2,R3,R4];
1818     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
1819
1820     StgAsyncIOResult_reqID(ares)   = reqID;
1821     StgAsyncIOResult_len(ares)     = 0;
1822     StgAsyncIOResult_errCode(ares) = 0;
1823     StgTSO_block_info(CurrentTSO)  = ares;
1824     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1825     jump stg_block_async;
1826 #endif
1827 }
1828
1829 STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
1830 stg_asyncDoProczh
1831 {
1832     W_ ares;
1833     CInt reqID;
1834
1835 #ifdef THREADED_RTS
1836     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
1837 #else
1838
1839     /* args: R1 = proc, R2 = param */
1840     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1841     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1842
1843     /* could probably allocate this on the heap instead */
1844     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1845                                             stg_asyncDoProczh_malloc_str)
1846                                 [R1,R2];
1847     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
1848     StgAsyncIOResult_reqID(ares)   = reqID;
1849     StgAsyncIOResult_len(ares)     = 0;
1850     StgAsyncIOResult_errCode(ares) = 0;
1851     StgTSO_block_info(CurrentTSO) = ares;
1852     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1853     jump stg_block_async;
1854 #endif
1855 }
1856 #endif
1857
1858 /* -----------------------------------------------------------------------------
1859  * noDuplicate#
1860  *
1861  * noDuplicate# tries to ensure that none of the thunks under
1862  * evaluation by the current thread are also under evaluation by
1863  * another thread.  It relies on *both* threads doing noDuplicate#;
1864  * the second one will get blocked if they are duplicating some work.
1865  *
1866  * The idea is that noDuplicate# is used within unsafePerformIO to
1867  * ensure that the IO operation is performed at most once.
1868  * noDuplicate# calls threadPaused which acquires an exclusive lock on
1869  * all the thunks currently under evaluation by the current thread.
1870  *
1871  * Consider the following scenario.  There is a thunk A, whose
1872  * evaluation requires evaluating thunk B, where thunk B is an
1873  * unsafePerformIO.  Two threads, 1 and 2, bother enter A.  Thread 2
1874  * is pre-empted before it enters B, and claims A by blackholing it
1875  * (in threadPaused).  Thread 1 now enters B, and calls noDuplicate#.
1876  *
1877  *      thread 1                      thread 2
1878  *   +-----------+                 +---------------+
1879  *   |    -------+-----> A <-------+-------        |
1880  *   |  update   |   BLACKHOLE     | marked_update |
1881  *   +-----------+                 +---------------+
1882  *   |           |                 |               |
1883  *        ...                             ...
1884  *   |           |                 +---------------+
1885  *   +-----------+
1886  *   |     ------+-----> B
1887  *   |  update   |   BLACKHOLE
1888  *   +-----------+
1889  *
1890  * At this point: A is a blackhole, owned by thread 2.  noDuplicate#
1891  * calls threadPaused, which walks up the stack and
1892  *  - claims B on behalf of thread 1
1893  *  - then it reaches the update frame for A, which it sees is already
1894  *    a BLACKHOLE and is therefore owned by another thread.  Since
1895  *    thread 1 is duplicating work, the computation up to the update
1896  *    frame for A is suspended, including thunk B.
1897  *  - thunk B, which is an unsafePerformIO, has now been reverted to
1898  *    an AP_STACK which could be duplicated - BAD!
1899  *  - The solution is as follows: before calling threadPaused, we
1900  *    leave a frame on the stack (stg_noDuplicate_info) that will call
1901  *    noDuplicate# again if the current computation is suspended and
1902  *    restarted.
1903  *
1904  * See the test program in concurrent/prog003 for a way to demonstrate
1905  * this.  It needs to be run with +RTS -N3 or greater, and the bug
1906  * only manifests occasionally (once very 10 runs or so).
1907  * -------------------------------------------------------------------------- */
1908
1909 INFO_TABLE_RET(stg_noDuplicate, RET_SMALL)
1910 {
1911     Sp_adj(1);
1912     jump stg_noDuplicatezh;
1913 }
1914
1915 stg_noDuplicatezh
1916 {
1917     STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh );
1918     // leave noDuplicate frame in case the current
1919     // computation is suspended and restarted (see above).
1920     Sp_adj(-1);
1921     Sp(0) = stg_noDuplicate_info;
1922
1923     SAVE_THREAD_STATE();
1924     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1925     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
1926
1927     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
1928         jump stg_threadFinished;
1929     } else {
1930         LOAD_THREAD_STATE();
1931         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
1932         // remove the stg_noDuplicate frame if it is still there.
1933         if (Sp(0) == stg_noDuplicate_info) {
1934             Sp_adj(1);
1935         }
1936         jump %ENTRY_CODE(Sp(0));
1937     }
1938 }
1939
1940 /* -----------------------------------------------------------------------------
1941    Misc. primitives
1942    -------------------------------------------------------------------------- */
1943
1944 stg_getApStackValzh
1945 {
1946    W_ ap_stack, offset, val, ok;
1947
1948    /* args: R1 = AP_STACK, R2 = offset */
1949    ap_stack = R1;
1950    offset   = R2;
1951
1952    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
1953         ok = 1;
1954         val = StgAP_STACK_payload(ap_stack,offset);
1955    } else {
1956         ok = 0;
1957         val = R1;
1958    }
1959    RET_NP(ok,val);
1960 }
1961
1962 // Write the cost center stack of the first argument on stderr; return
1963 // the second.  Possibly only makes sense for already evaluated
1964 // things?
1965 stg_traceCcszh
1966 {
1967     W_ ccs;
1968
1969 #ifdef PROFILING
1970     ccs = StgHeader_ccs(UNTAG(R1));
1971     foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
1972 #endif
1973
1974     R1 = R2;
1975     ENTER();
1976 }
1977
1978 stg_getSparkzh
1979 {
1980    W_ spark;
1981
1982 #ifndef THREADED_RTS
1983    RET_NP(0,ghczmprim_GHCziBool_False_closure);
1984 #else
1985    (spark) = foreign "C" findSpark(MyCapability());
1986    if (spark != 0) {
1987       RET_NP(1,spark);
1988    } else {
1989       RET_NP(0,ghczmprim_GHCziBool_False_closure);
1990    }
1991 #endif
1992 }
1993
1994 stg_traceEventzh
1995 {
1996    W_ msg;
1997    msg = R1;
1998
1999 #if defined(TRACING) || defined(DEBUG)
2000
2001    foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") [];
2002
2003 #elif defined(DTRACE)
2004
2005    W_ enabled;
2006
2007    // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
2008    // RtsProbes.h, but that header file includes unistd.h, which doesn't
2009    // work in Cmm
2010    (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() [];
2011    if (enabled != 0) {
2012      foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") [];
2013    }
2014
2015 #endif
2016    jump %ENTRY_CODE(Sp(0));
2017 }