aba7ed7e708029be353f58fde1098594f4dfdf90
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.91 2002/01/29 16:24:08 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 /* ** temporary **
26
27    classes CCallable and CReturnable don't really exist, but the
28    compiler insists on generating dictionaries containing references
29    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
30    for these.  Some C compilers can't cope with zero-length static arrays,
31    so we have to make these one element long.
32 */
33
34 StgWord GHC_ZCCCallable_static_info[1];
35 StgWord GHC_ZCCReturnable_static_info[1];
36   
37 /* -----------------------------------------------------------------------------
38    Macros for Hand-written primitives.
39    -------------------------------------------------------------------------- */
40
41 /*
42  * Horrible macros for returning unboxed tuples.
43  *
44  * How an unboxed tuple is returned depends on two factors:
45  *    - the number of real registers we have available
46  *    - the boxedness of the returned fields.
47  *
48  * To return an unboxed tuple from a primitive operation, we have macros
49  * RET_<layout> where <layout> describes the boxedness of each field of the
50  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
51  *
52  * We only define the cases actually used, to avoid having too much
53  * garbage in this section.  Warning: any bugs in here will be hard to
54  * track down.
55  */
56
57 /*------ All Regs available */
58 #if defined(REG_R8)
59 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
60 # define RET_N(a)     RET_P(a)
61
62 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
63 # define RET_NN(a,b)  RET_PP(a,b)
64 # define RET_NP(a,b)  RET_PP(a,b)
65
66 # define RET_PPP(a,b,c) \
67         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
68 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
69
70 # define RET_NNNP(a,b,c,d) \
71         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
72         JMP_(ENTRY_CODE(Sp[0]));
73
74 # define RET_NPNP(a,b,c,d) \
75         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
76         JMP_(ENTRY_CODE(Sp[0]));
77
78 # define RET_NNPNNP(a,b,c,d,e,f) \
79         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
80         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
81         JMP_(ENTRY_CODE(Sp[0]));
82
83 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
84       defined(REG_R4) || defined(REG_R3)
85 # error RET_n macros not defined for this setup.
86
87 /*------ 2 Registers available */
88 #elif defined(REG_R2)
89
90 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
91 # define RET_N(a)     RET_P(a)
92
93 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
94                        JMP_(ENTRY_CODE(Sp[0]));
95 # define RET_NN(a,b)   RET_PP(a,b)
96 # define RET_NP(a,b)   RET_PP(a,b)
97
98 # define RET_PPP(a,b,c) \
99         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
100         JMP_(ENTRY_CODE(Sp[1]));
101 # define RET_NNP(a,b,c) \
102         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
103         JMP_(ENTRY_CODE(Sp[1]));
104
105 # define RET_NNNP(a,b,c,d)                      \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108     /*  Sp[-3] = ARGTAG(1); */                  \
109         Sp[-2] = (W_)(c);                       \
110         Sp[-1] = (W_)(d);                       \
111         Sp -= 3;                                \
112         JMP_(ENTRY_CODE(Sp[3]));
113
114 # define RET_NPNP(a,b,c,d)                      \
115         R1.w = (W_)(a);                         \
116         R2.w = (W_)(b);                         \
117     /*  Sp[-3] = ARGTAG(1); */                  \
118         Sp[-2] = (W_)(c);                       \
119         Sp[-1] = (W_)(d);                       \
120         Sp -= 3;                                \
121         JMP_(ENTRY_CODE(Sp[3]));
122
123 # define RET_NNPNNP(a,b,c,d,e,f)                \
124         R1.w = (W_)(a);                         \
125         R2.w = (W_)(b);                         \
126         Sp[-6] = (W_)(c);                       \
127         /* Sp[-5] = ARGTAG(1); */               \
128         Sp[-4] = (W_)(d);                       \
129         /* Sp[-3] = ARGTAG(1); */               \
130         Sp[-2] = (W_)(e);                       \
131         Sp[-1] = (W_)(f);                       \
132         Sp -= 6;                                \
133         JMP_(ENTRY_CODE(Sp[6]));
134
135 /*------ 1 Register available */
136 #elif defined(REG_R1)
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c) \
147         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
148         JMP_(ENTRY_CODE(Sp[2]));
149 # define RET_NNP(a,b,c) \
150         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
151         JMP_(ENTRY_CODE(Sp[3]));
152
153 # define RET_NNNP(a,b,c,d)                      \
154         R1.w = (W_)(a);                         \
155     /*  Sp[-5] = ARGTAG(1); */                  \
156         Sp[-4] = (W_)(b);                       \
157     /*  Sp[-3] = ARGTAG(1); */                  \
158         Sp[-2] = (W_)(c);                       \
159         Sp[-1] = (W_)(d);                       \
160         Sp -= 5;                                \
161         JMP_(ENTRY_CODE(Sp[5]));
162
163 # define RET_NPNP(a,b,c,d)                      \
164         R1.w = (W_)(a);                         \
165         Sp[-4] = (W_)(b);                       \
166     /*  Sp[-3] = ARGTAG(1); */                  \
167         Sp[-2] = (W_)(c);                       \
168         Sp[-1] = (W_)(d);                       \
169         Sp -= 4;                                \
170         JMP_(ENTRY_CODE(Sp[4]));
171
172 # define RET_NNPNNP(a,b,c,d,e,f)                \
173         R1.w = (W_)(a);                         \
174         Sp[-1] = (W_)(f);                       \
175         Sp[-2] = (W_)(e);                       \
176         /* Sp[-3] = ARGTAG(1); */               \
177         Sp[-4] = (W_)(d);                       \
178         /* Sp[-5] = ARGTAG(1); */               \
179         Sp[-6] = (W_)(c);                       \
180         Sp[-7] = (W_)(b);                       \
181         /* Sp[-8] = ARGTAG(1); */               \
182         Sp -= 8;                                \
183         JMP_(ENTRY_CODE(Sp[8]));
184
185 #else /* 0 Regs available */
186
187 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
188
189 #ifdef DEBUG
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
191 #else
192 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
193 #endif
194
195 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
196
197 /* Here's how to construct these macros:
198  *
199  *   N = number of N's in the name;
200  *   P = number of P's in the name;
201  *   s = N * 2 + P;
202  *   while (nonNull(name)) {
203  *     if (nextChar == 'P') {
204  *       PUSH_P(s,_);
205  *       s -= 1;
206  *     } else {
207  *       PUSH_N(s,_);
208  *       s -= 2
209  *     }
210  *   }
211  *   PUSHED(N * 2 + P);
212  */
213
214 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
215 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
216
217 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
218 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
219 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
220
221 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
222 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
223
224 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
225 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
226 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
227
228 #endif
229
230 /*-----------------------------------------------------------------------------
231   Array Primitives
232
233   Basically just new*Array - the others are all inline macros.
234
235   The size arg is always passed in R1, and the result returned in R1.
236
237   The slow entry point is for returning from a heap check, the saved
238   size argument must be re-loaded from the stack.
239   -------------------------------------------------------------------------- */
240
241 /* for objects that are *less* than the size of a word, make sure we
242  * round up to the nearest word for the size of the array.
243  */
244
245 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
246
247 FN_(newByteArrayzh_fast)                                \
248  {                                                      \
249    W_ size, stuff_size, n;                              \
250    StgArrWords* p;                                      \
251    FB_                                                  \
252      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);             \
253      n = R1.w;                                          \
254      stuff_size = BYTES_TO_STGWORDS(n);                 \
255      size = sizeofW(StgArrWords)+ stuff_size;           \
256      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); \
257      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); \
258      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);             \
259      p->words = stuff_size;                             \
260      TICK_RET_UNBOXED_TUP(1)                            \
261      RET_P(p);                                          \
262    FE_                                                  \
263  }
264
265 FN_(newPinnedByteArrayzh_fast)                                  \
266  {                                                              \
267    W_ size, stuff_size, n;                                      \
268    StgArrWords* p;                                              \
269    FB_                                                          \
270      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);               \
271      n = R1.w;                                                  \
272      stuff_size = BYTES_TO_STGWORDS(n);                         \
273      size = sizeofW(StgArrWords)+ stuff_size;                   \
274      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);   \
275      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);        \
276      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);                     \
277      p->words = stuff_size;                                     \
278      TICK_RET_UNBOXED_TUP(1)                                    \
279      RET_P(p);                                                  \
280    FE_                                                          \
281  }
282
283 FN_(newArrayzh_fast)
284 {
285   W_ size, n, init;
286   StgMutArrPtrs* arr;
287   StgPtr p;
288   FB_
289     n = R1.w;
290
291     MAYBE_GC(R2_PTR,newArrayzh_fast);
292
293     size = sizeofW(StgMutArrPtrs) + n;
294     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
295     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
296
297     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
298     arr->ptrs = n;
299
300     init = R2.w;
301     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
302          p < (P_)arr + size; p++) {
303         *p = (W_)init;
304     }
305
306     TICK_RET_UNBOXED_TUP(1);
307     RET_P(arr);
308   FE_
309 }
310
311 FN_(newMutVarzh_fast)
312 {
313   StgMutVar* mv;
314   /* Args: R1.p = initialisation value */
315   FB_
316
317   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
318   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
319   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
320
321   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
322   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
323   mv->var = R1.cl;
324
325   TICK_RET_UNBOXED_TUP(1);
326   RET_P(mv);
327   FE_
328 }
329
330 /* -----------------------------------------------------------------------------
331    Foreign Object Primitives
332    -------------------------------------------------------------------------- */
333
334 FN_(mkForeignObjzh_fast)
335 {
336   /* R1.p = ptr to foreign object,
337   */
338   StgForeignObj *result;
339   FB_
340
341   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
342   TICK_ALLOC_PRIM(sizeofW(StgHeader),
343                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
344   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
345
346   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
347   SET_HDR(result,&stg_FOREIGN_info,CCCS);
348   result->data = R1.p;
349
350   /* returns (# s#, ForeignObj# #) */
351   TICK_RET_UNBOXED_TUP(1);
352   RET_P(result);
353   FE_
354 }
355
356 /* These two are out-of-line for the benefit of the NCG */
357 FN_(unsafeThawArrayzh_fast)
358 {
359   FB_
360   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
361   recordMutable((StgMutClosure*)R1.cl);
362
363   TICK_RET_UNBOXED_TUP(1);
364   RET_P(R1.p);
365   FE_
366 }
367
368 /* -----------------------------------------------------------------------------
369    Weak Pointer Primitives
370    -------------------------------------------------------------------------- */
371
372 FN_(mkWeakzh_fast)
373 {
374   /* R1.p = key
375      R2.p = value
376      R3.p = finalizer (or NULL)
377   */
378   StgWeak *w;
379   FB_
380
381   if (R3.cl == NULL) {
382     R3.cl = &stg_NO_FINALIZER_closure;
383   }
384
385   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
387                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
388   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
389
390   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
391   SET_HDR(w, &stg_WEAK_info, CCCS);
392
393   w->key        = R1.cl;
394   w->value      = R2.cl;
395   w->finalizer  = R3.cl;
396
397   w->link       = weak_ptr_list;
398   weak_ptr_list = w;
399   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
400
401   TICK_RET_UNBOXED_TUP(1);
402   RET_P(w);
403   FE_
404 }
405
406 FN_(finalizzeWeakzh_fast)
407 {
408   /* R1.p = weak ptr
409    */
410   StgDeadWeak *w;
411   StgClosure *f;
412   FB_
413   TICK_RET_UNBOXED_TUP(0);
414   w = (StgDeadWeak *)R1.p;
415
416   /* already dead? */
417   if (w->header.info == &stg_DEAD_WEAK_info) {
418       RET_NP(0,&stg_NO_FINALIZER_closure);
419   }
420
421   /* kill it */
422 #ifdef PROFILING
423   // @LDV profiling
424   // A weak pointer is inherently used, so we do not need to call
425   // LDV_recordDead_FILL_SLOP_DYNAMIC():
426   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
427   // or, LDV_recordDead():
428   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
429   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
430   // large as weak pointers, so there is no need to fill the slop, either.
431   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
432 #endif
433   //
434   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
435   //
436   w->header.info = &stg_DEAD_WEAK_info;
437 #ifdef PROFILING
438   // @LDV profiling
439   LDV_recordCreate((StgClosure *)w);
440 #endif
441   f = ((StgWeak *)w)->finalizer;
442   w->link = ((StgWeak *)w)->link;
443
444   /* return the finalizer */
445   if (f == &stg_NO_FINALIZER_closure) {
446       RET_NP(0,&stg_NO_FINALIZER_closure);
447   } else {
448       RET_NP(1,f);
449   }
450   FE_
451 }
452
453 FN_(deRefWeakzh_fast)
454 {
455   /* R1.p = weak ptr */
456   StgWeak* w;
457   I_       code;
458   P_       val;
459   FB_
460   w = (StgWeak*)R1.p;
461   if (w->header.info == &stg_WEAK_info) {
462     code = 1;
463     val = (P_)((StgWeak *)w)->value;
464   } else {
465     code = 0;
466     val = (P_)w;
467   }
468   RET_NP(code,val);
469   FE_
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474    -------------------------------------------------------------------------- */
475
476 FN_(int2Integerzh_fast)
477 {
478    /* arguments: R1 = Int# */
479
480    I_ val, s;           /* to avoid aliasing */
481    StgArrWords* p;      /* address of array result */
482    FB_
483
484    val = R1.i;
485    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
486    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
487    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
488
489    p = (StgArrWords *)Hp - 1;
490    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (val < 0) { 
494         s  = -1;
495         *Hp = -val;
496    } else if (val > 0) {
497         s = 1;
498         *Hp = val;
499    } else {
500         s = 0;
501    }
502
503    /* returns (# size  :: Int#, 
504                  data  :: ByteArray# 
505                #)
506    */
507    TICK_RET_UNBOXED_TUP(2);
508    RET_NP(s,p);
509    FE_
510 }
511
512 FN_(word2Integerzh_fast)
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val;              /* to avoid aliasing */
517    I_  s;
518    StgArrWords* p;      /* address of array result */
519    FB_
520
521    val = R1.w;
522    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
523    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
524    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
525
526    p = (StgArrWords *)Hp - 1;
527    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
528
529    if (val != 0) {
530         s = 1;
531         *Hp = val;
532    } else {
533         s = 0;
534    }
535
536    /* returns (# size  :: Int#, 
537                  data  :: ByteArray# 
538                #)
539    */
540    TICK_RET_UNBOXED_TUP(2);
541    RET_NP(s,p);
542    FE_
543 }
544
545
546 /*
547  * 'long long' primops for converting to/from Integers.
548  */
549
550 #ifdef SUPPORT_LONG_LONGS
551
552 FN_(int64ToIntegerzh_fast)
553 {
554    /* arguments: L1 = Int64# */
555
556    StgInt64  val; /* to avoid aliasing */
557    W_ hi;
558    I_  s, neg, words_needed;
559    StgArrWords* p;      /* address of array result */
560    FB_
561
562    val = (LI_)L1;
563    neg = 0;
564
565    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
566        words_needed = 2;
567    } else { 
568        /* minimum is one word */
569        words_needed = 1;
570    }
571    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
572    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
573    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
574
575    p = (StgArrWords *)(Hp-words_needed+1) - 1;
576    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
577
578    if ( val < 0LL ) {
579      neg = 1;
580      val = -val;
581    }
582
583    hi = (W_)((LW_)val / 0x100000000ULL);
584
585    if ( words_needed == 2 )  { 
586       s = 2;
587       Hp[-1] = (W_)val;
588       Hp[0] = hi;
589    } else if ( val != 0 ) {
590       s = 1;
591       Hp[0] = (W_)val;
592    }  else /* val==0 */   {
593       s = 0;
594    }
595    s = ( neg ? -s : s );
596
597    /* returns (# size  :: Int#, 
598                  data  :: ByteArray# 
599                #)
600    */
601    TICK_RET_UNBOXED_TUP(2);
602    RET_NP(s,p);
603    FE_
604 }
605
606 FN_(word64ToIntegerzh_fast)
607 {
608    /* arguments: L1 = Word64# */
609
610    StgWord64 val; /* to avoid aliasing */
611    StgWord hi;
612    I_  s, words_needed;
613    StgArrWords* p;      /* address of array result */
614    FB_
615
616    val = (LW_)L1;
617    if ( val >= 0x100000000ULL ) {
618       words_needed = 2;
619    } else {
620       words_needed = 1;
621    }
622    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
623    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
624    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
625
626    p = (StgArrWords *)(Hp-words_needed+1) - 1;
627    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
628
629    hi = (W_)((LW_)val / 0x100000000ULL);
630    if ( val >= 0x100000000ULL ) { 
631      s = 2;
632      Hp[-1] = ((W_)val);
633      Hp[0]  = (hi);
634    } else if ( val != 0 )      {
635       s = 1;
636       Hp[0] = ((W_)val);
637    } else /* val==0 */         {
638       s = 0;
639    }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650
651 #endif /* SUPPORT_LONG_LONGS */
652
653 /* ToDo: this is shockingly inefficient */
654
655 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
656 FN_(name)                                                               \
657 {                                                                       \
658   MP_INT arg1, arg2, result;                                            \
659   I_ s1, s2;                                                            \
660   StgArrWords* d1;                                                      \
661   StgArrWords* d2;                                                      \
662   FB_                                                                   \
663                                                                         \
664   /* call doYouWantToGC() */                                            \
665   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
666                                                                         \
667   d1 = (StgArrWords *)R2.p;                                             \
668   s1 = R1.i;                                                            \
669   d2 = (StgArrWords *)R4.p;                                             \
670   s2 = R3.i;                                                            \
671                                                                         \
672   arg1._mp_alloc        = d1->words;                                    \
673   arg1._mp_size         = (s1);                                         \
674   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
675   arg2._mp_alloc        = d2->words;                                    \
676   arg2._mp_size         = (s2);                                         \
677   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
678                                                                         \
679   STGCALL1(mpz_init,&result);                                           \
680                                                                         \
681   /* Perform the operation */                                           \
682   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
683                                                                         \
684   TICK_RET_UNBOXED_TUP(2);                                              \
685   RET_NP(result._mp_size,                                               \
686          result._mp_d-sizeofW(StgArrWords));                            \
687   FE_                                                                   \
688 }
689
690 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
691 FN_(name)                                                               \
692 {                                                                       \
693   MP_INT arg1, result;                                                  \
694   I_ s1;                                                                \
695   StgArrWords* d1;                                                      \
696   FB_                                                                   \
697                                                                         \
698   /* call doYouWantToGC() */                                            \
699   MAYBE_GC(R2_PTR, name);                                               \
700                                                                         \
701   d1 = (StgArrWords *)R2.p;                                             \
702   s1 = R1.i;                                                            \
703                                                                         \
704   arg1._mp_alloc        = d1->words;                                    \
705   arg1._mp_size         = (s1);                                         \
706   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
707                                                                         \
708   STGCALL1(mpz_init,&result);                                           \
709                                                                         \
710   /* Perform the operation */                                           \
711   STGCALL2(mp_fun,&result,&arg1);                                       \
712                                                                         \
713   TICK_RET_UNBOXED_TUP(2);                                              \
714   RET_NP(result._mp_size,                                               \
715          result._mp_d-sizeofW(StgArrWords));                            \
716   FE_                                                                   \
717 }
718
719 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
720 FN_(name)                                                               \
721 {                                                                       \
722   MP_INT arg1, arg2, result1, result2;                                  \
723   I_ s1, s2;                                                            \
724   StgArrWords* d1;                                                      \
725   StgArrWords* d2;                                                      \
726   FB_                                                                   \
727                                                                         \
728   /* call doYouWantToGC() */                                            \
729   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
730                                                                         \
731   d1 = (StgArrWords *)R2.p;                                             \
732   s1 = R1.i;                                                            \
733   d2 = (StgArrWords *)R4.p;                                             \
734   s2 = R3.i;                                                            \
735                                                                         \
736   arg1._mp_alloc        = d1->words;                                    \
737   arg1._mp_size         = (s1);                                         \
738   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
739   arg2._mp_alloc        = d2->words;                                    \
740   arg2._mp_size         = (s2);                                         \
741   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
742                                                                         \
743   STGCALL1(mpz_init,&result1);                                          \
744   STGCALL1(mpz_init,&result2);                                          \
745                                                                         \
746   /* Perform the operation */                                           \
747   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
748                                                                         \
749   TICK_RET_UNBOXED_TUP(4);                                              \
750   RET_NPNP(result1._mp_size,                                            \
751            result1._mp_d-sizeofW(StgArrWords),                          \
752            result2._mp_size,                                            \
753            result2._mp_d-sizeofW(StgArrWords));                         \
754   FE_                                                                   \
755 }
756
757 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
758 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
759 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
760 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
761 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
762 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
763 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
764 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
765 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
766 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
767 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
768
769 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
770 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
771
772
773 FN_(gcdIntzh_fast)
774 {
775   /* R1 = the first Int#; R2 = the second Int# */
776   mp_limb_t aa;
777   I_        r;
778   FB_
779   aa = (mp_limb_t)(R1.i);
780   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
781   RET_N(r);
782   FE_
783 }
784
785 FN_(gcdIntegerIntzh_fast)
786 {
787   /* R1 = s1; R2 = d1; R3 = the int */
788   I_ r;
789   FB_
790   r = RET_STGCALL3(I_,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
791   RET_N(r);
792   FE_
793 }
794
795 FN_(cmpIntegerIntzh_fast)
796 {
797   /* R1 = s1; R2 = d1; R3 = the int */
798   I_ usize;
799   I_ vsize;
800   I_ v_digit;
801   mp_limb_t u_digit;
802   FB_
803
804   usize = R1.i;
805   vsize = 0;
806   v_digit = R3.i;
807
808   // paraphrased from mpz_cmp_si() in the GMP sources
809   if (v_digit > 0) {
810       vsize = 1;
811   } else if (v_digit < 0) {
812       vsize = -1;
813       v_digit = -v_digit;
814   }
815
816   if (usize != vsize) {
817     RET_N(usize - vsize);
818   }
819
820   if (usize == 0) {
821     RET_N(0);
822   }
823
824   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
825
826   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
827     RET_N(0);
828   }
829
830   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
831     RET_N(usize);
832   } else {
833     RET_N(-usize);
834   }
835   FE_
836 }
837
838 FN_(cmpIntegerzh_fast)
839 {
840   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
841   I_ usize;
842   I_ vsize;
843   I_ size;
844   StgPtr up, vp;
845   int cmp;
846   FB_
847
848   // paraphrased from mpz_cmp() in the GMP sources
849   usize = R1.i;
850   vsize = R3.i;
851
852   if (usize != vsize) {
853       RET_N(usize - vsize);
854   }
855
856   if (usize == 0) {
857       RET_N(0);
858   }
859
860   size = abs(usize);
861
862   up = BYTE_ARR_CTS(R2.p);
863   vp = BYTE_ARR_CTS(R4.p);
864
865   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
866
867   if (cmp == 0) {
868       RET_N(0);
869   }
870
871   if ((cmp < 0) == (usize < 0)) {
872       RET_N(1);
873   } else {
874       RET_N(-1);
875   }
876   FE_
877 }
878
879 FN_(integer2Intzh_fast)
880 {
881   /* R1 = s; R2 = d */
882   I_ r, s;
883   FB_
884   s = R1.i;
885   if (s == 0)
886     r = 0;
887   else {
888     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
889     if (s < 0) r = -r;
890   }
891   RET_N(r);
892   FE_
893 }
894
895 FN_(integer2Wordzh_fast)
896 {
897   /* R1 = s; R2 = d */
898   I_ s;
899   W_ r;
900   FB_
901   s = R1.i;
902   if (s == 0)
903     r = 0;
904   else {
905     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
906     if (s < 0) r = -r;
907   }
908   RET_N(r);
909   FE_
910 }
911
912
913 FN_(decodeFloatzh_fast)
914
915   MP_INT mantissa;
916   I_ exponent;
917   StgArrWords* p;
918   StgFloat arg;
919   FB_
920
921   /* arguments: F1 = Float# */
922   arg = F1;
923
924   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
925   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
926   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
927
928   /* Be prepared to tell Lennart-coded __decodeFloat    */
929   /* where mantissa._mp_d can be put (it does not care about the rest) */
930   p = (StgArrWords *)Hp - 1;
931   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
932   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
933
934   /* Perform the operation */
935   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
936
937   /* returns: (Int# (expn), Int#, ByteArray#) */
938   TICK_RET_UNBOXED_TUP(3);
939   RET_NNP(exponent,mantissa._mp_size,p);
940   FE_
941 }
942
943 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
944 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
945
946 FN_(decodeDoublezh_fast)
947 { MP_INT mantissa;
948   I_ exponent;
949   StgDouble arg;
950   StgArrWords* p;
951   FB_
952
953   /* arguments: D1 = Double# */
954   arg = D1;
955
956   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
957   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
958   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
959
960   /* Be prepared to tell Lennart-coded __decodeDouble   */
961   /* where mantissa.d can be put (it does not care about the rest) */
962   p = (StgArrWords *)(Hp-ARR_SIZE+1);
963   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
964   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
965
966   /* Perform the operation */
967   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
968
969   /* returns: (Int# (expn), Int#, ByteArray#) */
970   TICK_RET_UNBOXED_TUP(3);
971   RET_NNP(exponent,mantissa._mp_size,p);
972   FE_
973 }
974
975 /* -----------------------------------------------------------------------------
976  * Concurrency primitives
977  * -------------------------------------------------------------------------- */
978
979 FN_(forkzh_fast)
980 {
981   FB_
982   /* args: R1 = closure to spark */
983   
984   MAYBE_GC(R1_PTR, forkzh_fast);
985
986   /* create it right now, return ThreadID in R1 */
987   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
988                       RtsFlags.GcFlags.initialStkSize, R1.cl);
989   STGCALL1(scheduleThread, R1.t);
990       
991   /* switch at the earliest opportunity */ 
992   context_switch = 1;
993   
994   JMP_(ENTRY_CODE(Sp[0]));
995   FE_
996 }
997
998 FN_(yieldzh_fast)
999 {
1000   FB_
1001   JMP_(stg_yield_noregs);
1002   FE_
1003 }
1004
1005 /* -----------------------------------------------------------------------------
1006  * MVar primitives
1007  *
1008  * take & putMVar work as follows.  Firstly, an important invariant:
1009  *
1010  *    If the MVar is full, then the blocking queue contains only
1011  *    threads blocked on putMVar, and if the MVar is empty then the
1012  *    blocking queue contains only threads blocked on takeMVar.
1013  *
1014  * takeMvar:
1015  *    MVar empty : then add ourselves to the blocking queue
1016  *    MVar full  : remove the value from the MVar, and
1017  *                 blocking queue empty     : return
1018  *                 blocking queue non-empty : perform the first blocked putMVar
1019  *                                            from the queue, and wake up the
1020  *                                            thread (MVar is now full again)
1021  *
1022  * putMVar is just the dual of the above algorithm.
1023  *
1024  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1025  * the stack of the thread waiting to do the putMVar.  See
1026  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1027  * the stack layout, and the PerformPut and PerformTake macros below.
1028  *
1029  * It is important that a blocked take or put is woken up with the
1030  * take/put already performed, because otherwise there would be a
1031  * small window of vulnerability where the thread could receive an
1032  * exception and never perform its take or put, and we'd end up with a
1033  * deadlock.
1034  *
1035  * -------------------------------------------------------------------------- */
1036
1037 FN_(isEmptyMVarzh_fast)
1038 {
1039   /* args: R1 = MVar closure */
1040   I_ r;
1041   FB_
1042   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1043   RET_N(r);
1044   FE_
1045 }
1046
1047
1048 FN_(newMVarzh_fast)
1049 {
1050   StgMVar *mvar;
1051
1052   FB_
1053   /* args: none */
1054
1055   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1056   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1057                   1, 0);
1058   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1059   
1060   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1061   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1062   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1063   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1064
1065   TICK_RET_UNBOXED_TUP(1);
1066   RET_P(mvar);
1067   FE_
1068 }
1069
1070 #define PerformTake(tso, value) ({                      \
1071     (tso)->sp[1] = (W_)value;                           \
1072     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1073   })
1074
1075 #define PerformPut(tso) ({                              \
1076     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1077     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1078     (tso)->sp += 2;                                     \
1079     val;                                                \
1080   })
1081
1082 FN_(takeMVarzh_fast)
1083 {
1084   StgMVar *mvar;
1085   StgClosure *val;
1086   const StgInfoTable *info;
1087
1088   FB_
1089   /* args: R1 = MVar closure */
1090
1091   mvar = (StgMVar *)R1.p;
1092
1093 #ifdef SMP
1094   info = LOCK_CLOSURE(mvar);
1095 #else
1096   info = GET_INFO(mvar);
1097 #endif
1098
1099   /* If the MVar is empty, put ourselves on its blocking queue,
1100    * and wait until we're woken up.
1101    */
1102   if (info == &stg_EMPTY_MVAR_info) {
1103     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1104       mvar->head = CurrentTSO;
1105     } else {
1106       mvar->tail->link = CurrentTSO;
1107     }
1108     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1109     CurrentTSO->why_blocked = BlockedOnMVar;
1110     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1111     mvar->tail = CurrentTSO;
1112
1113 #ifdef SMP
1114     /* unlock the MVar */
1115     mvar->header.info = &stg_EMPTY_MVAR_info;
1116 #endif
1117     JMP_(stg_block_takemvar);
1118   }
1119
1120   /* we got the value... */
1121   val = mvar->value;
1122
1123   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1124       /* There are putMVar(s) waiting... 
1125        * wake up the first thread on the queue
1126        */
1127       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1128
1129       /* actually perform the putMVar for the thread that we just woke up */
1130       mvar->value = PerformPut(mvar->head);
1131
1132 #if defined(GRAN) || defined(PAR)
1133       /* ToDo: check 2nd arg (mvar) is right */
1134       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1135 #else
1136       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1137 #endif
1138       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1139           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1140       }
1141 #ifdef SMP
1142       /* unlock in the SMP case */
1143       SET_INFO(mvar,&stg_FULL_MVAR_info);
1144 #endif
1145       TICK_RET_UNBOXED_TUP(1);
1146       RET_P(val);
1147   } else {
1148       /* No further putMVars, MVar is now empty */
1149
1150       /* do this last... we might have locked the MVar in the SMP case,
1151        * and writing the info pointer will unlock it.
1152        */
1153       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1154       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1155       TICK_RET_UNBOXED_TUP(1);
1156       RET_P(val);
1157   }
1158   FE_
1159 }
1160
1161 FN_(tryTakeMVarzh_fast)
1162 {
1163   StgMVar *mvar;
1164   StgClosure *val;
1165   const StgInfoTable *info;
1166
1167   FB_
1168   /* args: R1 = MVar closure */
1169
1170   mvar = (StgMVar *)R1.p;
1171
1172 #ifdef SMP
1173   info = LOCK_CLOSURE(mvar);
1174 #else
1175   info = GET_INFO(mvar);
1176 #endif
1177
1178   if (info == &stg_EMPTY_MVAR_info) {
1179
1180 #ifdef SMP
1181       /* unlock the MVar */
1182       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1183 #endif
1184
1185       /* HACK: we need a pointer to pass back, 
1186        * so we abuse NO_FINALIZER_closure
1187        */
1188       RET_NP(0, &stg_NO_FINALIZER_closure);
1189   }
1190
1191   /* we got the value... */
1192   val = mvar->value;
1193
1194   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1195       /* There are putMVar(s) waiting... 
1196        * wake up the first thread on the queue
1197        */
1198       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1199
1200       /* actually perform the putMVar for the thread that we just woke up */
1201       mvar->value = PerformPut(mvar->head);
1202
1203 #if defined(GRAN) || defined(PAR)
1204       /* ToDo: check 2nd arg (mvar) is right */
1205       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1206 #else
1207       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1208 #endif
1209       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1210           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1211       }
1212 #ifdef SMP
1213       /* unlock in the SMP case */
1214       SET_INFO(mvar,&stg_FULL_MVAR_info);
1215 #endif
1216   } else {
1217       /* No further putMVars, MVar is now empty */
1218       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1219
1220       /* do this last... we might have locked the MVar in the SMP case,
1221        * and writing the info pointer will unlock it.
1222        */
1223       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1224   }
1225
1226   TICK_RET_UNBOXED_TUP(1);
1227   RET_NP((I_)1, val);
1228   FE_
1229 }
1230
1231 FN_(putMVarzh_fast)
1232 {
1233   StgMVar *mvar;
1234   const StgInfoTable *info;
1235
1236   FB_
1237   /* args: R1 = MVar, R2 = value */
1238
1239   mvar = (StgMVar *)R1.p;
1240
1241 #ifdef SMP
1242   info = LOCK_CLOSURE(mvar);
1243 #else
1244   info = GET_INFO(mvar);
1245 #endif
1246
1247   if (info == &stg_FULL_MVAR_info) {
1248     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1249       mvar->head = CurrentTSO;
1250     } else {
1251       mvar->tail->link = CurrentTSO;
1252     }
1253     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1254     CurrentTSO->why_blocked = BlockedOnMVar;
1255     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1256     mvar->tail = CurrentTSO;
1257
1258 #ifdef SMP
1259     /* unlock the MVar */
1260     SET_INFO(mvar,&stg_FULL_MVAR_info);
1261 #endif
1262     JMP_(stg_block_putmvar);
1263   }
1264   
1265   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1266       /* There are takeMVar(s) waiting: wake up the first one
1267        */
1268       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1269
1270       /* actually perform the takeMVar */
1271       PerformTake(mvar->head, R2.cl);
1272       
1273 #if defined(GRAN) || defined(PAR)
1274       /* ToDo: check 2nd arg (mvar) is right */
1275       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1276 #else
1277       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1278 #endif
1279       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1280           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1281       }
1282 #ifdef SMP
1283       /* unlocks the MVar in the SMP case */
1284       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1285 #endif
1286       JMP_(ENTRY_CODE(Sp[0]));
1287   } else {
1288       /* No further takes, the MVar is now full. */
1289       mvar->value = R2.cl;
1290       /* unlocks the MVar in the SMP case */
1291       SET_INFO(mvar,&stg_FULL_MVAR_info);
1292       JMP_(ENTRY_CODE(Sp[0]));
1293   }
1294
1295   /* ToDo: yield afterward for better communication performance? */
1296   FE_
1297 }
1298
1299 FN_(tryPutMVarzh_fast)
1300 {
1301   StgMVar *mvar;
1302   const StgInfoTable *info;
1303
1304   FB_
1305   /* args: R1 = MVar, R2 = value */
1306
1307   mvar = (StgMVar *)R1.p;
1308
1309 #ifdef SMP
1310   info = LOCK_CLOSURE(mvar);
1311 #else
1312   info = GET_INFO(mvar);
1313 #endif
1314
1315   if (info == &stg_FULL_MVAR_info) {
1316
1317 #ifdef SMP
1318     /* unlock the MVar */
1319     mvar->header.info = &stg_FULL_MVAR_info;
1320 #endif
1321
1322     RET_N(0);
1323   }
1324   
1325   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1326       /* There are takeMVar(s) waiting: wake up the first one
1327        */
1328       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1329
1330       /* actually perform the takeMVar */
1331       PerformTake(mvar->head, R2.cl);
1332       
1333 #if defined(GRAN) || defined(PAR)
1334       /* ToDo: check 2nd arg (mvar) is right */
1335       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1336 #else
1337       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1338 #endif
1339       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1340           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1341       }
1342 #ifdef SMP
1343       /* unlocks the MVar in the SMP case */
1344       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1345 #endif
1346       JMP_(ENTRY_CODE(Sp[0]));
1347   } else {
1348       /* No further takes, the MVar is now full. */
1349       mvar->value = R2.cl;
1350       /* unlocks the MVar in the SMP case */
1351       SET_INFO(mvar,&stg_FULL_MVAR_info);
1352       JMP_(ENTRY_CODE(Sp[0]));
1353   }
1354
1355   /* ToDo: yield afterward for better communication performance? */
1356   FE_
1357 }
1358
1359 /* -----------------------------------------------------------------------------
1360    Stable pointer primitives
1361    -------------------------------------------------------------------------  */
1362
1363 FN_(makeStableNamezh_fast)
1364 {
1365   StgWord index;
1366   StgStableName *sn_obj;
1367   FB_
1368
1369   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1370   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1371                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1372   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1373   
1374   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1375
1376   /* Is there already a StableName for this heap object? */
1377   if (stable_ptr_table[index].sn_obj == NULL) {
1378     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1379     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1380     sn_obj->sn = index;
1381     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1382   } else {
1383     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1384   }
1385
1386   TICK_RET_UNBOXED_TUP(1);
1387   RET_P(sn_obj);
1388 }
1389
1390
1391 FN_(makeStablePtrzh_fast)
1392 {
1393   /* Args: R1 = a */
1394   StgStablePtr sp;
1395   FB_
1396   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1397   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1398   RET_N(sp);
1399   FE_
1400 }
1401
1402 FN_(deRefStablePtrzh_fast)
1403 {
1404   /* Args: R1 = the stable ptr */
1405   P_ r;
1406   StgStablePtr sp;
1407   FB_
1408   sp = (StgStablePtr)R1.w;
1409   r = stable_ptr_table[(StgWord)sp].addr;
1410   RET_P(r);
1411   FE_
1412 }
1413
1414 /* -----------------------------------------------------------------------------
1415    Bytecode object primitives
1416    -------------------------------------------------------------------------  */
1417
1418 FN_(newBCOzh_fast)
1419 {
1420   /* R1.p = instrs
1421      R2.p = literals
1422      R3.p = ptrs
1423      R4.p = itbls
1424   */
1425   StgBCO *bco;
1426   FB_
1427
1428   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1429   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1430   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1431   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1432   SET_HDR(bco, &stg_BCO_info, CCCS);
1433
1434   bco->instrs     = (StgArrWords*)R1.cl;
1435   bco->literals   = (StgArrWords*)R2.cl;
1436   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1437   bco->itbls      = (StgArrWords*)R4.cl;
1438
1439   TICK_RET_UNBOXED_TUP(1);
1440   RET_P(bco);
1441   FE_
1442 }
1443
1444 FN_(mkApUpd0zh_fast)
1445 {
1446   /* R1.p = the fn for the AP_UPD
1447   */
1448   StgAP_UPD* ap;
1449   FB_
1450   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1451   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1452   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1453   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1454   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1455
1456   ap->n_args = 0;
1457   ap->fun = R1.cl;
1458
1459   TICK_RET_UNBOXED_TUP(1);
1460   RET_P(ap);
1461   FE_
1462 }
1463
1464 /* -----------------------------------------------------------------------------
1465    Thread I/O blocking primitives
1466    -------------------------------------------------------------------------- */
1467
1468 FN_(waitReadzh_fast)
1469 {
1470   FB_
1471     /* args: R1.i */
1472     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1473     CurrentTSO->why_blocked = BlockedOnRead;
1474     CurrentTSO->block_info.fd = R1.i;
1475     ACQUIRE_LOCK(&sched_mutex);
1476     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1477     RELEASE_LOCK(&sched_mutex);
1478     JMP_(stg_block_noregs);
1479   FE_
1480 }
1481
1482 FN_(waitWritezh_fast)
1483 {
1484   FB_
1485     /* args: R1.i */
1486     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1487     CurrentTSO->why_blocked = BlockedOnWrite;
1488     CurrentTSO->block_info.fd = R1.i;
1489     ACQUIRE_LOCK(&sched_mutex);
1490     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1491     RELEASE_LOCK(&sched_mutex);
1492     JMP_(stg_block_noregs);
1493   FE_
1494 }
1495
1496 FN_(delayzh_fast)
1497 {
1498   StgTSO *t, *prev;
1499   nat target;
1500   FB_
1501     /* args: R1.i */
1502     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1503     CurrentTSO->why_blocked = BlockedOnDelay;
1504
1505     ACQUIRE_LOCK(&sched_mutex);
1506
1507     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1508     CurrentTSO->block_info.target = target;
1509
1510     /* Insert the new thread in the sleeping queue. */
1511     prev = NULL;
1512     t = sleeping_queue;
1513     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1514         prev = t;
1515         t = t->link;
1516     }
1517
1518     CurrentTSO->link = t;
1519     if (prev == NULL) {
1520         sleeping_queue = CurrentTSO;
1521     } else {
1522         prev->link = CurrentTSO;
1523     }
1524
1525     RELEASE_LOCK(&sched_mutex);
1526     JMP_(stg_block_noregs);
1527   FE_
1528 }
1529