e5d286d66e575d5648a8e97ba102ae58b407287c
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.105 2003/02/22 04:51:51 sof Exp $
3  *
4  * (c) The GHC Team, 1998-2002
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Timer.h"      /* TICK_MILLISECS */
23 #include "Prelude.h"
24 #ifndef mingw32_TARGET_OS
25 #include "Itimer.h"    /* getourtimeofday() */
26 #endif
27
28 #ifdef HAVE_SYS_TYPES_H
29 # include <sys/types.h>
30 #endif
31
32 #include <stdlib.h>
33
34 #ifdef mingw32_TARGET_OS
35 #include <windows.h>
36 #include "win32/AsyncIO.h"
37 #endif
38
39 /* ** temporary **
40
41    classes CCallable and CReturnable don't really exist, but the
42    compiler insists on generating dictionaries containing references
43    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
44    for these.  Some C compilers can't cope with zero-length static arrays,
45    so we have to make these one element long.
46 */
47
48 StgWord GHC_ZCCCallable_static_info[1];
49 StgWord GHC_ZCCReturnable_static_info[1];
50   
51 /* -----------------------------------------------------------------------------
52    Macros for Hand-written primitives.
53    -------------------------------------------------------------------------- */
54
55 /*
56  * Horrible macros for returning unboxed tuples.
57  *
58  * How an unboxed tuple is returned depends on two factors:
59  *    - the number of real registers we have available
60  *    - the boxedness of the returned fields.
61  *
62  * To return an unboxed tuple from a primitive operation, we have macros
63  * RET_<layout> where <layout> describes the boxedness of each field of the
64  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
65  *
66  * We only define the cases actually used, to avoid having too much
67  * garbage in this section.  Warning: any bugs in here will be hard to
68  * track down.
69  *
70  * The return convention for an unboxed tuple is as follows:
71  *   - fit as many fields as possible in registers (as per the
72  *     function fast-entry point calling convention).
73  *   - sort the rest of the fields into pointers and non-pointers.
74  *     push the pointers on the stack, followed by the non-pointers.
75  *     (so the pointers have higher addresses).
76  */
77
78 /*------ All Regs available */
79 #if MAX_REAL_VANILLA_REG == 8
80 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_N(a)     RET_P(a)
82
83 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
84 # define RET_NN(a,b)  RET_PP(a,b)
85 # define RET_NP(a,b)  RET_PP(a,b)
86
87 # define RET_PPP(a,b,c) \
88         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
89 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
90
91 # define RET_NNNP(a,b,c,d) \
92         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
93         JMP_(ENTRY_CODE(Sp[0]));
94
95 # define RET_NPNP(a,b,c,d) \
96         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
97         JMP_(ENTRY_CODE(Sp[0]));
98
99 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
100 # error RET_n macros not defined for this setup.
101
102 /*------ 2 Registers available */
103 #elif MAX_REAL_VANILLA_REG == 2
104
105 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
106 # define RET_N(a)     RET_P(a)
107
108 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
109                        JMP_(ENTRY_CODE(Sp[0]));
110 # define RET_NN(a,b)   RET_PP(a,b)
111 # define RET_NP(a,b)   RET_PP(a,b)
112
113 # define RET_PPP(a,b,c)                         \
114         R1.w = (W_)(a);                         \
115         R2.w = (W_)(b);                         \
116         Sp[-1] = (W_)(c);                       \
117         Sp -= 1;                                \
118         JMP_(ENTRY_CODE(Sp[1]));
119
120 # define RET_NNP(a,b,c)                         \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123         Sp[-1] = (W_)(c);                       \
124         Sp -= 1;                                \
125         JMP_(ENTRY_CODE(Sp[1]));
126
127 # define RET_NNNP(a,b,c,d)                      \
128         R1.w = (W_)(a);                         \
129         R2.w = (W_)(b);                         \
130         Sp[-2] = (W_)(c);                       \
131         Sp[-1] = (W_)(d);                       \
132         Sp -= 2;                                \
133         JMP_(ENTRY_CODE(Sp[2]));
134
135 # define RET_NPNP(a,b,c,d)                      \
136         R1.w = (W_)(a);                         \
137         R2.w = (W_)(b);                         \
138         Sp[-2] = (W_)(c);                       \
139         Sp[-1] = (W_)(d);                       \
140         Sp -= 2;                                \
141         JMP_(ENTRY_CODE(Sp[2]));
142
143 /*------ 1 Register available */
144 #elif MAX_REAL_VANILLA_REG == 1
145 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
146 # define RET_N(a)     RET_P(a)
147
148 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
149                        JMP_(ENTRY_CODE(Sp[1]));
150 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
151                        JMP_(ENTRY_CODE(Sp[2]));
152 # define RET_NP(a,b)   RET_PP(a,b)
153
154 # define RET_PPP(a,b,c)                         \
155         R1.w = (W_)(a);                         \
156         Sp[-2] = (W_)(b);                       \
157         Sp[-1] = (W_)(c);                       \
158         Sp -= 2;                                \
159         JMP_(ENTRY_CODE(Sp[2]));
160
161 # define RET_NNP(a,b,c)                         \
162         R1.w = (W_)(a);                         \
163         Sp[-2] = (W_)(b);                       \
164         Sp[-1] = (W_)(c);                       \
165         Sp -= 2;                                \
166         JMP_(ENTRY_CODE(Sp[2]));
167
168 # define RET_NNNP(a,b,c,d)                      \
169         R1.w = (W_)(a);                         \
170         Sp[-3] = (W_)(b);                       \
171         Sp[-2] = (W_)(c);                       \
172         Sp[-1] = (W_)(d);                       \
173         Sp -= 3;                                \
174         JMP_(ENTRY_CODE(Sp[3]));
175
176 # define RET_NPNP(a,b,c,d)                      \
177         R1.w = (W_)(a);                         \
178         Sp[-3] = (W_)(c);                       \
179         Sp[-2] = (W_)(b);                       \
180         Sp[-1] = (W_)(d);                       \
181         Sp -= 3;                                \
182         JMP_(ENTRY_CODE(Sp[3]));
183
184 #else /* 0 Regs available */
185
186 #define PUSH(o,x) Sp[-o] = (W_)(x)
187
188 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
189
190 # define RET_P(a)     PUSH(1,a); PUSHED(1)
191 # define RET_N(a)     PUSH(1,a); PUSHED(2)
192
193 # define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
194 # define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
195 # define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
196
197 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
198 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
199
200 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)        
201 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)        
202 #endif
203
204 /*-----------------------------------------------------------------------------
205   Array Primitives
206
207   Basically just new*Array - the others are all inline macros.
208
209   The size arg is always passed in R1, and the result returned in R1.
210
211   The slow entry point is for returning from a heap check, the saved
212   size argument must be re-loaded from the stack.
213   -------------------------------------------------------------------------- */
214
215 /* for objects that are *less* than the size of a word, make sure we
216  * round up to the nearest word for the size of the array.
217  */
218
219 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
220
221 FN_(newByteArrayzh_fast)
222  {
223    W_ size, stuff_size, n;
224    StgArrWords* p;
225    FB_
226      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
227      n = R1.w;
228      stuff_size = BYTES_TO_STGWORDS(n);
229      size = sizeofW(StgArrWords)+ stuff_size;
230      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
231      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
232      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
233      p->words = stuff_size;
234      TICK_RET_UNBOXED_TUP(1)
235      RET_P(p);
236    FE_
237  }
238
239 FN_(newPinnedByteArrayzh_fast)
240  {
241    W_ size, stuff_size, n;
242    StgArrWords* p;
243    FB_
244      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
245      n = R1.w;
246      stuff_size = BYTES_TO_STGWORDS(n);
247
248      // We want an 8-byte aligned array.  allocatePinned() gives us
249      // 8-byte aligned memory by default, but we want to align the
250      // *goods* inside the ArrWords object, so we have to check the
251      // size of the ArrWords header and adjust our size accordingly.
252      size = sizeofW(StgArrWords)+ stuff_size;
253      if ((sizeof(StgArrWords) & 7) != 0) {
254          size++;
255      }
256
257      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
258      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
259
260      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
261      // have to push the object forward one word so that the goods
262      // fall on an 8-byte boundary.
263      if ((sizeof(StgArrWords) & 7) != 0) {
264          ((StgPtr)p)++;
265      }
266
267      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
268      p->words = stuff_size;
269      TICK_RET_UNBOXED_TUP(1)
270      RET_P(p);
271    FE_
272  }
273
274 FN_(newArrayzh_fast)
275 {
276   W_ size, n, init;
277   StgMutArrPtrs* arr;
278   StgPtr p;
279   FB_
280     n = R1.w;
281
282     MAYBE_GC(R2_PTR,newArrayzh_fast);
283
284     size = sizeofW(StgMutArrPtrs) + n;
285     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
286     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
287
288     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
289     arr->ptrs = n;
290
291     init = R2.w;
292     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
293          p < (P_)arr + size; p++) {
294         *p = (W_)init;
295     }
296
297     TICK_RET_UNBOXED_TUP(1);
298     RET_P(arr);
299   FE_
300 }
301
302 FN_(newMutVarzh_fast)
303 {
304   StgMutVar* mv;
305   /* Args: R1.p = initialisation value */
306   FB_
307
308   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
309   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
310   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
311
312   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
313   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
314   mv->var = R1.cl;
315
316   TICK_RET_UNBOXED_TUP(1);
317   RET_P(mv);
318   FE_
319 }
320
321 FN_(atomicModifyMutVarzh_fast)
322 {
323    StgMutVar* mv;
324    StgClosure *z, *x, *y, *r;
325    FB_
326    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
327
328    /* If x is the current contents of the MutVar#, then 
329       We want to make the new contents point to
330
331          (sel_0 (f x))
332  
333       and the return value is
334
335          (sel_1 (f x))
336
337       obviously we can share (f x).
338
339          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
340          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
341          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
342    */
343
344 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
345 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
346
347    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
348    CCS_ALLOC(CCCS,SIZE);
349
350    x = ((StgMutVar *)R1.cl)->var;
351
352    TICK_ALLOC_UP_THK(2,0); // XXX
353    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
354    SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
355    z->payload[0] = R2.cl;
356    z->payload[1] = x;
357
358    TICK_ALLOC_UP_THK(1,1); // XXX
359    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
360    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
361    y->payload[0] = z;
362
363    ((StgMutVar *)R1.cl)->var = y;
364
365    TICK_ALLOC_UP_THK(1,1); // XXX
366    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
367    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
368    r->payload[0] = z;
369
370    RET_P(r);
371    JMP_(ENTRY_CODE(Sp[0]));
372    FE_
373 }
374
375 /* -----------------------------------------------------------------------------
376    Foreign Object Primitives
377    -------------------------------------------------------------------------- */
378
379 FN_(mkForeignObjzh_fast)
380 {
381   /* R1.p = ptr to foreign object,
382   */
383   StgForeignObj *result;
384   FB_
385
386   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
387   TICK_ALLOC_PRIM(sizeofW(StgHeader),
388                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
389   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
390
391   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
392   SET_HDR(result,&stg_FOREIGN_info,CCCS);
393   result->data = R1.p;
394
395   /* returns (# s#, ForeignObj# #) */
396   TICK_RET_UNBOXED_TUP(1);
397   RET_P(result);
398   FE_
399 }
400
401 /* These two are out-of-line for the benefit of the NCG */
402 FN_(unsafeThawArrayzh_fast)
403 {
404   FB_
405   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
406   recordMutable((StgMutClosure*)R1.cl);
407
408   TICK_RET_UNBOXED_TUP(1);
409   RET_P(R1.p);
410   FE_
411 }
412
413 /* -----------------------------------------------------------------------------
414    Weak Pointer Primitives
415    -------------------------------------------------------------------------- */
416
417 FN_(mkWeakzh_fast)
418 {
419   /* R1.p = key
420      R2.p = value
421      R3.p = finalizer (or NULL)
422   */
423   StgWeak *w;
424   FB_
425
426   if (R3.cl == NULL) {
427     R3.cl = &stg_NO_FINALIZER_closure;
428   }
429
430   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
431   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
432                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
433   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
434
435   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
436   SET_HDR(w, &stg_WEAK_info, CCCS);
437
438   w->key        = R1.cl;
439   w->value      = R2.cl;
440   w->finalizer  = R3.cl;
441
442   w->link       = weak_ptr_list;
443   weak_ptr_list = w;
444   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
445
446   TICK_RET_UNBOXED_TUP(1);
447   RET_P(w);
448   FE_
449 }
450
451 FN_(finalizzeWeakzh_fast)
452 {
453   /* R1.p = weak ptr
454    */
455   StgDeadWeak *w;
456   StgClosure *f;
457   FB_
458   TICK_RET_UNBOXED_TUP(0);
459   w = (StgDeadWeak *)R1.p;
460
461   /* already dead? */
462   if (w->header.info == &stg_DEAD_WEAK_info) {
463       RET_NP(0,&stg_NO_FINALIZER_closure);
464   }
465
466   /* kill it */
467 #ifdef PROFILING
468   // @LDV profiling
469   // A weak pointer is inherently used, so we do not need to call
470   // LDV_recordDead_FILL_SLOP_DYNAMIC():
471   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
472   // or, LDV_recordDead():
473   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
474   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
475   // large as weak pointers, so there is no need to fill the slop, either.
476   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
477 #endif
478   //
479   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
480   //
481   w->header.info = &stg_DEAD_WEAK_info;
482 #ifdef PROFILING
483   // @LDV profiling
484   LDV_recordCreate((StgClosure *)w);
485 #endif
486   f = ((StgWeak *)w)->finalizer;
487   w->link = ((StgWeak *)w)->link;
488
489   /* return the finalizer */
490   if (f == &stg_NO_FINALIZER_closure) {
491       RET_NP(0,&stg_NO_FINALIZER_closure);
492   } else {
493       RET_NP(1,f);
494   }
495   FE_
496 }
497
498 FN_(deRefWeakzh_fast)
499 {
500   /* R1.p = weak ptr */
501   StgWeak* w;
502   I_       code;
503   P_       val;
504   FB_
505   w = (StgWeak*)R1.p;
506   if (w->header.info == &stg_WEAK_info) {
507     code = 1;
508     val = (P_)((StgWeak *)w)->value;
509   } else {
510     code = 0;
511     val = (P_)w;
512   }
513   RET_NP(code,val);
514   FE_
515 }
516
517 /* -----------------------------------------------------------------------------
518    Arbitrary-precision Integer operations.
519    -------------------------------------------------------------------------- */
520
521 FN_(int2Integerzh_fast)
522 {
523    /* arguments: R1 = Int# */
524
525    I_ val, s;           /* to avoid aliasing */
526    StgArrWords* p;      /* address of array result */
527    FB_
528
529    val = R1.i;
530    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
531    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
532    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
533
534    p = (StgArrWords *)Hp - 1;
535    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
536
537    /* mpz_set_si is inlined here, makes things simpler */
538    if (val < 0) { 
539         s  = -1;
540         *Hp = -val;
541    } else if (val > 0) {
542         s = 1;
543         *Hp = val;
544    } else {
545         s = 0;
546    }
547
548    /* returns (# size  :: Int#, 
549                  data  :: ByteArray# 
550                #)
551    */
552    TICK_RET_UNBOXED_TUP(2);
553    RET_NP(s,p);
554    FE_
555 }
556
557 FN_(word2Integerzh_fast)
558 {
559    /* arguments: R1 = Word# */
560
561    W_ val;              /* to avoid aliasing */
562    I_  s;
563    StgArrWords* p;      /* address of array result */
564    FB_
565
566    val = R1.w;
567    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
568    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
569    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
570
571    p = (StgArrWords *)Hp - 1;
572    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
573
574    if (val != 0) {
575         s = 1;
576         *Hp = val;
577    } else {
578         s = 0;
579    }
580
581    /* returns (# size  :: Int#, 
582                  data  :: ByteArray# 
583                #)
584    */
585    TICK_RET_UNBOXED_TUP(2);
586    RET_NP(s,p);
587    FE_
588 }
589
590
591 /*
592  * 'long long' primops for converting to/from Integers.
593  */
594
595 #ifdef SUPPORT_LONG_LONGS
596
597 FN_(int64ToIntegerzh_fast)
598 {
599    /* arguments: L1 = Int64# */
600
601    StgInt64  val; /* to avoid aliasing */
602    W_ hi;
603    I_  s, neg, words_needed;
604    StgArrWords* p;      /* address of array result */
605    FB_
606
607    val = (LI_)L1;
608    neg = 0;
609
610    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
611        words_needed = 2;
612    } else { 
613        /* minimum is one word */
614        words_needed = 1;
615    }
616    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
617    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
618    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
619
620    p = (StgArrWords *)(Hp-words_needed+1) - 1;
621    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
622
623    if ( val < 0LL ) {
624      neg = 1;
625      val = -val;
626    }
627
628    hi = (W_)((LW_)val / 0x100000000ULL);
629
630    if ( words_needed == 2 )  { 
631       s = 2;
632       Hp[-1] = (W_)val;
633       Hp[0] = hi;
634    } else if ( val != 0 ) {
635       s = 1;
636       Hp[0] = (W_)val;
637    }  else /* val==0 */   {
638       s = 0;
639    }
640    s = ( neg ? -s : s );
641
642    /* returns (# size  :: Int#, 
643                  data  :: ByteArray# 
644                #)
645    */
646    TICK_RET_UNBOXED_TUP(2);
647    RET_NP(s,p);
648    FE_
649 }
650
651 FN_(word64ToIntegerzh_fast)
652 {
653    /* arguments: L1 = Word64# */
654
655    StgWord64 val; /* to avoid aliasing */
656    StgWord hi;
657    I_  s, words_needed;
658    StgArrWords* p;      /* address of array result */
659    FB_
660
661    val = (LW_)L1;
662    if ( val >= 0x100000000ULL ) {
663       words_needed = 2;
664    } else {
665       words_needed = 1;
666    }
667    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
668    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
669    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
670
671    p = (StgArrWords *)(Hp-words_needed+1) - 1;
672    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
673
674    hi = (W_)((LW_)val / 0x100000000ULL);
675    if ( val >= 0x100000000ULL ) { 
676      s = 2;
677      Hp[-1] = ((W_)val);
678      Hp[0]  = (hi);
679    } else if ( val != 0 )      {
680       s = 1;
681       Hp[0] = ((W_)val);
682    } else /* val==0 */         {
683       s = 0;
684    }
685
686    /* returns (# size  :: Int#, 
687                  data  :: ByteArray# 
688                #)
689    */
690    TICK_RET_UNBOXED_TUP(2);
691    RET_NP(s,p);
692    FE_
693 }
694
695
696 #endif /* SUPPORT_LONG_LONGS */
697
698 /* ToDo: this is shockingly inefficient */
699
700 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
701 FN_(name)                                                               \
702 {                                                                       \
703   MP_INT arg1, arg2, result;                                            \
704   I_ s1, s2;                                                            \
705   StgArrWords* d1;                                                      \
706   StgArrWords* d2;                                                      \
707   FB_                                                                   \
708                                                                         \
709   /* call doYouWantToGC() */                                            \
710   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
711                                                                         \
712   d1 = (StgArrWords *)R2.p;                                             \
713   s1 = R1.i;                                                            \
714   d2 = (StgArrWords *)R4.p;                                             \
715   s2 = R3.i;                                                            \
716                                                                         \
717   arg1._mp_alloc        = d1->words;                                    \
718   arg1._mp_size         = (s1);                                         \
719   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
720   arg2._mp_alloc        = d2->words;                                    \
721   arg2._mp_size         = (s2);                                         \
722   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
723                                                                         \
724   STGCALL1(mpz_init,&result);                                           \
725                                                                         \
726   /* Perform the operation */                                           \
727   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
728                                                                         \
729   TICK_RET_UNBOXED_TUP(2);                                              \
730   RET_NP(result._mp_size,                                               \
731          result._mp_d-sizeofW(StgArrWords));                            \
732   FE_                                                                   \
733 }
734
735 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
736 FN_(name)                                                               \
737 {                                                                       \
738   MP_INT arg1, result;                                                  \
739   I_ s1;                                                                \
740   StgArrWords* d1;                                                      \
741   FB_                                                                   \
742                                                                         \
743   /* call doYouWantToGC() */                                            \
744   MAYBE_GC(R2_PTR, name);                                               \
745                                                                         \
746   d1 = (StgArrWords *)R2.p;                                             \
747   s1 = R1.i;                                                            \
748                                                                         \
749   arg1._mp_alloc        = d1->words;                                    \
750   arg1._mp_size         = (s1);                                         \
751   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
752                                                                         \
753   STGCALL1(mpz_init,&result);                                           \
754                                                                         \
755   /* Perform the operation */                                           \
756   STGCALL2(mp_fun,&result,&arg1);                                       \
757                                                                         \
758   TICK_RET_UNBOXED_TUP(2);                                              \
759   RET_NP(result._mp_size,                                               \
760          result._mp_d-sizeofW(StgArrWords));                            \
761   FE_                                                                   \
762 }
763
764 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
765 FN_(name)                                                               \
766 {                                                                       \
767   MP_INT arg1, arg2, result1, result2;                                  \
768   I_ s1, s2;                                                            \
769   StgArrWords* d1;                                                      \
770   StgArrWords* d2;                                                      \
771   FB_                                                                   \
772                                                                         \
773   /* call doYouWantToGC() */                                            \
774   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
775                                                                         \
776   d1 = (StgArrWords *)R2.p;                                             \
777   s1 = R1.i;                                                            \
778   d2 = (StgArrWords *)R4.p;                                             \
779   s2 = R3.i;                                                            \
780                                                                         \
781   arg1._mp_alloc        = d1->words;                                    \
782   arg1._mp_size         = (s1);                                         \
783   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
784   arg2._mp_alloc        = d2->words;                                    \
785   arg2._mp_size         = (s2);                                         \
786   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
787                                                                         \
788   STGCALL1(mpz_init,&result1);                                          \
789   STGCALL1(mpz_init,&result2);                                          \
790                                                                         \
791   /* Perform the operation */                                           \
792   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
793                                                                         \
794   TICK_RET_UNBOXED_TUP(4);                                              \
795   RET_NPNP(result1._mp_size,                                            \
796            result1._mp_d-sizeofW(StgArrWords),                          \
797            result2._mp_size,                                            \
798            result2._mp_d-sizeofW(StgArrWords));                         \
799   FE_                                                                   \
800 }
801
802 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
803 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
804 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
805 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
806 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
807 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
808 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
809 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
810 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
811 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
812 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
813
814 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
815 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
816
817
818 FN_(gcdIntzh_fast)
819 {
820   /* R1 = the first Int#; R2 = the second Int# */
821   mp_limb_t aa;
822   I_ r;
823   FB_
824   aa = (mp_limb_t)(R1.i);
825   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
826
827   R1.i = r;
828   /* Result parked in R1, return via info-pointer at TOS */
829   JMP_(ENTRY_CODE(Sp[0]));
830   FE_
831 }
832
833 FN_(gcdIntegerIntzh_fast)
834 {
835   /* R1 = s1; R2 = d1; R3 = the int */
836   I_ r;
837   FB_
838   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
839
840   R1.i = r;
841   /* Result parked in R1, return via info-pointer at TOS */
842   JMP_(ENTRY_CODE(Sp[0]));
843   FE_
844 }
845
846 FN_(cmpIntegerIntzh_fast)
847 {
848   /* R1 = s1; R2 = d1; R3 = the int */
849   I_ usize;
850   I_ vsize;
851   I_ v_digit;
852   mp_limb_t u_digit;
853   FB_
854
855   usize = R1.i;
856   vsize = 0;
857   v_digit = R3.i;
858
859   // paraphrased from mpz_cmp_si() in the GMP sources
860   if (v_digit > 0) {
861       vsize = 1;
862   } else if (v_digit < 0) {
863       vsize = -1;
864       v_digit = -v_digit;
865   }
866
867   if (usize != vsize) {
868     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
869   }
870
871   if (usize == 0) {
872     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
873   }
874
875   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
876
877   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
878     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
879   }
880
881   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
882     R1.i = usize; 
883   } else {
884     R1.i = -usize; 
885   }
886
887   JMP_(ENTRY_CODE(Sp[0]));
888   FE_
889 }
890
891 FN_(cmpIntegerzh_fast)
892 {
893   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
894   I_ usize;
895   I_ vsize;
896   I_ size;
897   StgPtr up, vp;
898   int cmp;
899   FB_
900
901   // paraphrased from mpz_cmp() in the GMP sources
902   usize = R1.i;
903   vsize = R3.i;
904
905   if (usize != vsize) {
906     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
907   }
908
909   if (usize == 0) {
910     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
911   }
912
913   size = abs(usize);
914
915   up = BYTE_ARR_CTS(R2.p);
916   vp = BYTE_ARR_CTS(R4.p);
917
918   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
919
920   if (cmp == 0) {
921     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
922   }
923
924   if ((cmp < 0) == (usize < 0)) {
925     R1.i = 1;
926   } else {
927     R1.i = (-1); 
928   }
929   /* Result parked in R1, return via info-pointer at TOS */
930   JMP_(ENTRY_CODE(Sp[0]));
931   FE_
932 }
933
934 FN_(integer2Intzh_fast)
935 {
936   /* R1 = s; R2 = d */
937   I_ r, s;
938   FB_
939   s = R1.i;
940   if (s == 0)
941     r = 0;
942   else {
943     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
944     if (s < 0) r = -r;
945   }
946   /* Result parked in R1, return via info-pointer at TOS */
947   R1.i = r;
948   JMP_(ENTRY_CODE(Sp[0]));
949   FE_
950 }
951
952 FN_(integer2Wordzh_fast)
953 {
954   /* R1 = s; R2 = d */
955   I_ s;
956   W_ r;
957   FB_
958   s = R1.i;
959   if (s == 0)
960     r = 0;
961   else {
962     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
963     if (s < 0) r = -r;
964   }
965   /* Result parked in R1, return via info-pointer at TOS */
966   R1.w = r;
967   JMP_(ENTRY_CODE(Sp[0]));
968   FE_
969 }
970
971
972 FN_(decodeFloatzh_fast)
973
974   MP_INT mantissa;
975   I_ exponent;
976   StgArrWords* p;
977   StgFloat arg;
978   FB_
979
980   /* arguments: F1 = Float# */
981   arg = F1;
982
983   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
984   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
985   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
986
987   /* Be prepared to tell Lennart-coded __decodeFloat    */
988   /* where mantissa._mp_d can be put (it does not care about the rest) */
989   p = (StgArrWords *)Hp - 1;
990   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
991   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
992
993   /* Perform the operation */
994   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
995
996   /* returns: (Int# (expn), Int#, ByteArray#) */
997   TICK_RET_UNBOXED_TUP(3);
998   RET_NNP(exponent,mantissa._mp_size,p);
999   FE_
1000 }
1001
1002 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
1003 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
1004
1005 FN_(decodeDoublezh_fast)
1006 { MP_INT mantissa;
1007   I_ exponent;
1008   StgDouble arg;
1009   StgArrWords* p;
1010   FB_
1011
1012   /* arguments: D1 = Double# */
1013   arg = D1;
1014
1015   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1016   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1017   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1018
1019   /* Be prepared to tell Lennart-coded __decodeDouble   */
1020   /* where mantissa.d can be put (it does not care about the rest) */
1021   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1022   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1023   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1024
1025   /* Perform the operation */
1026   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1027
1028   /* returns: (Int# (expn), Int#, ByteArray#) */
1029   TICK_RET_UNBOXED_TUP(3);
1030   RET_NNP(exponent,mantissa._mp_size,p);
1031   FE_
1032 }
1033
1034 /* -----------------------------------------------------------------------------
1035  * Concurrency primitives
1036  * -------------------------------------------------------------------------- */
1037
1038 FN_(forkzh_fast)
1039 {
1040   FB_
1041   /* args: R1 = closure to spark */
1042   
1043   MAYBE_GC(R1_PTR, forkzh_fast);
1044
1045   /* create it right now, return ThreadID in R1 */
1046   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1047                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1048   STGCALL1(scheduleThread, R1.t);
1049       
1050   /* switch at the earliest opportunity */ 
1051   context_switch = 1;
1052   
1053   RET_P(R1.t);
1054   FE_
1055 }
1056
1057 FN_(forkProcesszh_fast)
1058 {
1059   pid_t pid;
1060
1061   FB_
1062   /* args: none */
1063   /* result: Pid */
1064
1065   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1066
1067   JMP_(ENTRY_CODE(Sp[0]));
1068
1069   FE_
1070 }
1071
1072 FN_(yieldzh_fast)
1073 {
1074   FB_
1075   JMP_(stg_yield_noregs);
1076   FE_
1077 }
1078
1079 FN_(myThreadIdzh_fast)
1080 {
1081   /* no args. */
1082   FB_
1083   RET_P((P_)CurrentTSO);
1084   FE_
1085 }
1086
1087 FN_(labelThreadzh_fast)
1088 {
1089   FB_
1090   /* args: 
1091         R1.p = ThreadId#
1092         R2.p = Addr# */
1093 #ifdef DEBUG
1094   STGCALL2(labelThread,R1.p,(char *)R2.p);
1095 #endif
1096   JMP_(ENTRY_CODE(Sp[0]));
1097   FE_
1098 }
1099
1100
1101 /* -----------------------------------------------------------------------------
1102  * MVar primitives
1103  *
1104  * take & putMVar work as follows.  Firstly, an important invariant:
1105  *
1106  *    If the MVar is full, then the blocking queue contains only
1107  *    threads blocked on putMVar, and if the MVar is empty then the
1108  *    blocking queue contains only threads blocked on takeMVar.
1109  *
1110  * takeMvar:
1111  *    MVar empty : then add ourselves to the blocking queue
1112  *    MVar full  : remove the value from the MVar, and
1113  *                 blocking queue empty     : return
1114  *                 blocking queue non-empty : perform the first blocked putMVar
1115  *                                            from the queue, and wake up the
1116  *                                            thread (MVar is now full again)
1117  *
1118  * putMVar is just the dual of the above algorithm.
1119  *
1120  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1121  * the stack of the thread waiting to do the putMVar.  See
1122  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1123  * the stack layout, and the PerformPut and PerformTake macros below.
1124  *
1125  * It is important that a blocked take or put is woken up with the
1126  * take/put already performed, because otherwise there would be a
1127  * small window of vulnerability where the thread could receive an
1128  * exception and never perform its take or put, and we'd end up with a
1129  * deadlock.
1130  *
1131  * -------------------------------------------------------------------------- */
1132
1133 FN_(isEmptyMVarzh_fast)
1134 {
1135   /* args: R1 = MVar closure */
1136   I_ r;
1137   FB_
1138   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1139   RET_N(r);
1140   FE_
1141 }
1142
1143
1144 FN_(newMVarzh_fast)
1145 {
1146   StgMVar *mvar;
1147
1148   FB_
1149   /* args: none */
1150
1151   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1152   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1153                   1, 0);
1154   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1155   
1156   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1157   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1158   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1159   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1160
1161   TICK_RET_UNBOXED_TUP(1);
1162   RET_P(mvar);
1163   FE_
1164 }
1165
1166 /* If R1 isn't available, pass it on the stack */
1167 #ifdef REG_R1
1168 #define PerformTake(tso, value) ({              \
1169     (tso)->sp[1] = (W_)value;                   \
1170     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1171   })
1172 #else
1173 #define PerformTake(tso, value) ({              \
1174     (tso)->sp[1] = (W_)value;                   \
1175     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1176   })
1177 #endif
1178
1179
1180 #define PerformPut(tso) ({                              \
1181     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1182     (tso)->sp += 3;                                     \
1183     val;                                                \
1184   })
1185
1186 FN_(takeMVarzh_fast)
1187 {
1188   StgMVar *mvar;
1189   StgClosure *val;
1190   const StgInfoTable *info;
1191
1192   FB_
1193   /* args: R1 = MVar closure */
1194
1195   mvar = (StgMVar *)R1.p;
1196
1197 #ifdef SMP
1198   info = LOCK_CLOSURE(mvar);
1199 #else
1200   info = GET_INFO(mvar);
1201 #endif
1202
1203   /* If the MVar is empty, put ourselves on its blocking queue,
1204    * and wait until we're woken up.
1205    */
1206   if (info == &stg_EMPTY_MVAR_info) {
1207     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1208       mvar->head = CurrentTSO;
1209     } else {
1210       mvar->tail->link = CurrentTSO;
1211     }
1212     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1213     CurrentTSO->why_blocked = BlockedOnMVar;
1214     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1215     mvar->tail = CurrentTSO;
1216
1217 #ifdef SMP
1218     /* unlock the MVar */
1219     mvar->header.info = &stg_EMPTY_MVAR_info;
1220 #endif
1221     JMP_(stg_block_takemvar);
1222   }
1223
1224   /* we got the value... */
1225   val = mvar->value;
1226
1227   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1228       /* There are putMVar(s) waiting... 
1229        * wake up the first thread on the queue
1230        */
1231       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1232
1233       /* actually perform the putMVar for the thread that we just woke up */
1234       mvar->value = PerformPut(mvar->head);
1235
1236 #if defined(GRAN) || defined(PAR)
1237       /* ToDo: check 2nd arg (mvar) is right */
1238       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1239 #else
1240       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1241 #endif
1242       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1243           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1244       }
1245 #ifdef SMP
1246       /* unlock in the SMP case */
1247       SET_INFO(mvar,&stg_FULL_MVAR_info);
1248 #endif
1249       TICK_RET_UNBOXED_TUP(1);
1250       RET_P(val);
1251   } else {
1252       /* No further putMVars, MVar is now empty */
1253
1254       /* do this last... we might have locked the MVar in the SMP case,
1255        * and writing the info pointer will unlock it.
1256        */
1257       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1258       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1259       TICK_RET_UNBOXED_TUP(1);
1260       RET_P(val);
1261   }
1262   FE_
1263 }
1264
1265 FN_(tryTakeMVarzh_fast)
1266 {
1267   StgMVar *mvar;
1268   StgClosure *val;
1269   const StgInfoTable *info;
1270
1271   FB_
1272   /* args: R1 = MVar closure */
1273
1274   mvar = (StgMVar *)R1.p;
1275
1276 #ifdef SMP
1277   info = LOCK_CLOSURE(mvar);
1278 #else
1279   info = GET_INFO(mvar);
1280 #endif
1281
1282   if (info == &stg_EMPTY_MVAR_info) {
1283
1284 #ifdef SMP
1285       /* unlock the MVar */
1286       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1287 #endif
1288
1289       /* HACK: we need a pointer to pass back, 
1290        * so we abuse NO_FINALIZER_closure
1291        */
1292       RET_NP(0, &stg_NO_FINALIZER_closure);
1293   }
1294
1295   /* we got the value... */
1296   val = mvar->value;
1297
1298   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1299       /* There are putMVar(s) waiting... 
1300        * wake up the first thread on the queue
1301        */
1302       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1303
1304       /* actually perform the putMVar for the thread that we just woke up */
1305       mvar->value = PerformPut(mvar->head);
1306
1307 #if defined(GRAN) || defined(PAR)
1308       /* ToDo: check 2nd arg (mvar) is right */
1309       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1310 #else
1311       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1312 #endif
1313       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1314           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1315       }
1316 #ifdef SMP
1317       /* unlock in the SMP case */
1318       SET_INFO(mvar,&stg_FULL_MVAR_info);
1319 #endif
1320   } else {
1321       /* No further putMVars, MVar is now empty */
1322       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1323
1324       /* do this last... we might have locked the MVar in the SMP case,
1325        * and writing the info pointer will unlock it.
1326        */
1327       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1328   }
1329
1330   TICK_RET_UNBOXED_TUP(1);
1331   RET_NP((I_)1, val);
1332   FE_
1333 }
1334
1335 FN_(putMVarzh_fast)
1336 {
1337   StgMVar *mvar;
1338   const StgInfoTable *info;
1339
1340   FB_
1341   /* args: R1 = MVar, R2 = value */
1342
1343   mvar = (StgMVar *)R1.p;
1344
1345 #ifdef SMP
1346   info = LOCK_CLOSURE(mvar);
1347 #else
1348   info = GET_INFO(mvar);
1349 #endif
1350
1351   if (info == &stg_FULL_MVAR_info) {
1352     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1353       mvar->head = CurrentTSO;
1354     } else {
1355       mvar->tail->link = CurrentTSO;
1356     }
1357     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1358     CurrentTSO->why_blocked = BlockedOnMVar;
1359     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1360     mvar->tail = CurrentTSO;
1361
1362 #ifdef SMP
1363     /* unlock the MVar */
1364     SET_INFO(mvar,&stg_FULL_MVAR_info);
1365 #endif
1366     JMP_(stg_block_putmvar);
1367   }
1368   
1369   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1370       /* There are takeMVar(s) waiting: wake up the first one
1371        */
1372       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1373
1374       /* actually perform the takeMVar */
1375       PerformTake(mvar->head, R2.cl);
1376       
1377 #if defined(GRAN) || defined(PAR)
1378       /* ToDo: check 2nd arg (mvar) is right */
1379       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1380 #else
1381       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1382 #endif
1383       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1384           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1385       }
1386 #ifdef SMP
1387       /* unlocks the MVar in the SMP case */
1388       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1389 #endif
1390       JMP_(ENTRY_CODE(Sp[0]));
1391   } else {
1392       /* No further takes, the MVar is now full. */
1393       mvar->value = R2.cl;
1394       /* unlocks the MVar in the SMP case */
1395       SET_INFO(mvar,&stg_FULL_MVAR_info);
1396       JMP_(ENTRY_CODE(Sp[0]));
1397   }
1398
1399   /* ToDo: yield afterward for better communication performance? */
1400   FE_
1401 }
1402
1403 FN_(tryPutMVarzh_fast)
1404 {
1405   StgMVar *mvar;
1406   const StgInfoTable *info;
1407
1408   FB_
1409   /* args: R1 = MVar, R2 = value */
1410
1411   mvar = (StgMVar *)R1.p;
1412
1413 #ifdef SMP
1414   info = LOCK_CLOSURE(mvar);
1415 #else
1416   info = GET_INFO(mvar);
1417 #endif
1418
1419   if (info == &stg_FULL_MVAR_info) {
1420
1421 #ifdef SMP
1422     /* unlock the MVar */
1423     mvar->header.info = &stg_FULL_MVAR_info;
1424 #endif
1425
1426     RET_N(0);
1427   }
1428   
1429   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1430       /* There are takeMVar(s) waiting: wake up the first one
1431        */
1432       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1433
1434       /* actually perform the takeMVar */
1435       PerformTake(mvar->head, R2.cl);
1436       
1437 #if defined(GRAN) || defined(PAR)
1438       /* ToDo: check 2nd arg (mvar) is right */
1439       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1440 #else
1441       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1442 #endif
1443       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1444           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1445       }
1446 #ifdef SMP
1447       /* unlocks the MVar in the SMP case */
1448       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1449 #endif
1450       JMP_(ENTRY_CODE(Sp[0]));
1451   } else {
1452       /* No further takes, the MVar is now full. */
1453       mvar->value = R2.cl;
1454       /* unlocks the MVar in the SMP case */
1455       SET_INFO(mvar,&stg_FULL_MVAR_info);
1456       JMP_(ENTRY_CODE(Sp[0]));
1457   }
1458
1459   /* ToDo: yield afterward for better communication performance? */
1460   FE_
1461 }
1462
1463 /* -----------------------------------------------------------------------------
1464    Stable pointer primitives
1465    -------------------------------------------------------------------------  */
1466
1467 FN_(makeStableNamezh_fast)
1468 {
1469   StgWord index;
1470   StgStableName *sn_obj;
1471   FB_
1472
1473   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1474   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1475                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1476   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1477   
1478   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1479
1480   /* Is there already a StableName for this heap object? */
1481   if (stable_ptr_table[index].sn_obj == NULL) {
1482     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1483     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1484     sn_obj->sn = index;
1485     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1486   } else {
1487     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1488   }
1489
1490   TICK_RET_UNBOXED_TUP(1);
1491   RET_P(sn_obj);
1492 }
1493
1494
1495 FN_(makeStablePtrzh_fast)
1496 {
1497   /* Args: R1 = a */
1498   StgStablePtr sp;
1499   FB_
1500   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1501   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1502   RET_N(sp);
1503   FE_
1504 }
1505
1506 FN_(deRefStablePtrzh_fast)
1507 {
1508   /* Args: R1 = the stable ptr */
1509   P_ r;
1510   StgStablePtr sp;
1511   FB_
1512   sp = (StgStablePtr)R1.w;
1513   r = stable_ptr_table[(StgWord)sp].addr;
1514   RET_P(r);
1515   FE_
1516 }
1517
1518 /* -----------------------------------------------------------------------------
1519    Bytecode object primitives
1520    -------------------------------------------------------------------------  */
1521
1522 FN_(newBCOzh_fast)
1523 {
1524   /* R1.p = instrs
1525      R2.p = literals
1526      R3.p = ptrs
1527      R4.p = itbls
1528   */
1529   StgBCO *bco;
1530   FB_
1531
1532   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast);
1533   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1534   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1535   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1536   SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1537
1538   bco->instrs     = (StgArrWords*)R1.cl;
1539   bco->literals   = (StgArrWords*)R2.cl;
1540   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1541   bco->itbls      = (StgArrWords*)R4.cl;
1542
1543   TICK_RET_UNBOXED_TUP(1);
1544   RET_P(bco);
1545   FE_
1546 }
1547
1548 FN_(mkApUpd0zh_fast)
1549 {
1550   // R1.p = the BCO# for the AP
1551   //
1552   StgPAP* ap;
1553   FB_
1554
1555   // This function is *only* used to wrap zero-arity BCOs in an
1556   // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1557   // saturated and always points directly to a FUN or BCO.
1558   ASSERT(get_itbl(R1.cl)->type == BCO && BCO_ARITY(R1.p) == 0);
1559
1560   HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1561   TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1562   CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1563   ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1564   SET_HDR(ap, &stg_AP_info, CCCS);
1565
1566   ap->n_args = 0;
1567   ap->fun = R1.cl;
1568
1569   TICK_RET_UNBOXED_TUP(1);
1570   RET_P(ap);
1571   FE_
1572 }
1573
1574 /* -----------------------------------------------------------------------------
1575    Thread I/O blocking primitives
1576    -------------------------------------------------------------------------- */
1577
1578 FN_(waitReadzh_fast)
1579 {
1580   FB_
1581     /* args: R1.i */
1582     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1583     CurrentTSO->why_blocked = BlockedOnRead;
1584     CurrentTSO->block_info.fd = R1.i;
1585     ACQUIRE_LOCK(&sched_mutex);
1586     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1587     RELEASE_LOCK(&sched_mutex);
1588     JMP_(stg_block_noregs);
1589   FE_
1590 }
1591
1592 FN_(waitWritezh_fast)
1593 {
1594   FB_
1595     /* args: R1.i */
1596     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1597     CurrentTSO->why_blocked = BlockedOnWrite;
1598     CurrentTSO->block_info.fd = R1.i;
1599     ACQUIRE_LOCK(&sched_mutex);
1600     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1601     RELEASE_LOCK(&sched_mutex);
1602     JMP_(stg_block_noregs);
1603   FE_
1604 }
1605
1606 FN_(delayzh_fast)
1607 {
1608 #ifdef mingw32_TARGET_OS
1609   StgAsyncIOResult* ares;
1610   unsigned int reqID;
1611 #else
1612   StgTSO *t, *prev;
1613   nat target;
1614 #endif
1615   FB_
1616     /* args: R1.i */
1617     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1618     CurrentTSO->why_blocked = BlockedOnDelay;
1619
1620     ACQUIRE_LOCK(&sched_mutex);
1621 #ifdef mingw32_TARGET_OS
1622     /* could probably allocate this on the heap instead */
1623     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1624     reqID = RET_STGCALL1(W_,addDelayRequest,R1.i);
1625     ares->reqID   = reqID;
1626     ares->len     = 0;
1627     ares->errCode = 0;
1628     CurrentTSO->block_info.async_result = ares;
1629     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1630 #else
1631     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1632     CurrentTSO->block_info.target = target;
1633
1634     /* Insert the new thread in the sleeping queue. */
1635     prev = NULL;
1636     t = sleeping_queue;
1637     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1638         prev = t;
1639         t = t->link;
1640     }
1641
1642     CurrentTSO->link = t;
1643     if (prev == NULL) {
1644         sleeping_queue = CurrentTSO;
1645     } else {
1646         prev->link = CurrentTSO;
1647     }
1648 #endif
1649     RELEASE_LOCK(&sched_mutex);
1650     JMP_(stg_block_noregs);
1651   FE_
1652 }
1653
1654 #ifdef mingw32_TARGET_OS
1655 FN_(asyncReadzh_fast)
1656 {
1657   StgAsyncIOResult* ares;
1658   unsigned int reqID;
1659   FB_
1660     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1661     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1662     CurrentTSO->why_blocked = BlockedOnRead;
1663     ACQUIRE_LOCK(&sched_mutex);
1664     /* could probably allocate this on the heap instead */
1665     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1666     reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p);
1667     ares->reqID   = reqID;
1668     ares->len     = 0;
1669     ares->errCode = 0;
1670     CurrentTSO->block_info.async_result = ares;
1671     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1672     RELEASE_LOCK(&sched_mutex);
1673     JMP_(stg_block_async);
1674   FE_
1675 }
1676
1677 FN_(asyncWritezh_fast)
1678 {
1679   StgAsyncIOResult* ares;
1680   unsigned int reqID;
1681   FB_
1682     /* args: R1.i */
1683     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1684     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1685     CurrentTSO->why_blocked = BlockedOnWrite;
1686     ACQUIRE_LOCK(&sched_mutex);
1687     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1688     reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p);
1689     ares->reqID   = reqID;
1690     ares->len     = 0;
1691     ares->errCode = 0;
1692     CurrentTSO->block_info.async_result = ares;
1693     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1694     RELEASE_LOCK(&sched_mutex);
1695     JMP_(stg_block_async);
1696   FE_
1697 }
1698 #endif