[project @ 2002-04-23 09:56:28 by stolz]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.96 2002/04/23 09:56:29 stolz Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 /* ** temporary **
26
27    classes CCallable and CReturnable don't really exist, but the
28    compiler insists on generating dictionaries containing references
29    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
30    for these.  Some C compilers can't cope with zero-length static arrays,
31    so we have to make these one element long.
32 */
33
34 StgWord GHC_ZCCCallable_static_info[1];
35 StgWord GHC_ZCCReturnable_static_info[1];
36   
37 /* -----------------------------------------------------------------------------
38    Macros for Hand-written primitives.
39    -------------------------------------------------------------------------- */
40
41 /*
42  * Horrible macros for returning unboxed tuples.
43  *
44  * How an unboxed tuple is returned depends on two factors:
45  *    - the number of real registers we have available
46  *    - the boxedness of the returned fields.
47  *
48  * To return an unboxed tuple from a primitive operation, we have macros
49  * RET_<layout> where <layout> describes the boxedness of each field of the
50  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
51  *
52  * We only define the cases actually used, to avoid having too much
53  * garbage in this section.  Warning: any bugs in here will be hard to
54  * track down.
55  */
56
57 /*------ All Regs available */
58 #if defined(REG_R8)
59 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
60 # define RET_N(a)     RET_P(a)
61
62 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
63 # define RET_NN(a,b)  RET_PP(a,b)
64 # define RET_NP(a,b)  RET_PP(a,b)
65
66 # define RET_PPP(a,b,c) \
67         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
68 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
69
70 # define RET_NNNP(a,b,c,d) \
71         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
72         JMP_(ENTRY_CODE(Sp[0]));
73
74 # define RET_NPNP(a,b,c,d) \
75         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
76         JMP_(ENTRY_CODE(Sp[0]));
77
78 # define RET_NNPNNP(a,b,c,d,e,f) \
79         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
80         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
81         JMP_(ENTRY_CODE(Sp[0]));
82
83 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
84       defined(REG_R4) || defined(REG_R3)
85 # error RET_n macros not defined for this setup.
86
87 /*------ 2 Registers available */
88 #elif defined(REG_R2)
89
90 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
91 # define RET_N(a)     RET_P(a)
92
93 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
94                        JMP_(ENTRY_CODE(Sp[0]));
95 # define RET_NN(a,b)   RET_PP(a,b)
96 # define RET_NP(a,b)   RET_PP(a,b)
97
98 # define RET_PPP(a,b,c) \
99         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
100         JMP_(ENTRY_CODE(Sp[1]));
101 # define RET_NNP(a,b,c) \
102         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
103         JMP_(ENTRY_CODE(Sp[1]));
104
105 # define RET_NNNP(a,b,c,d)                      \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108     /*  Sp[-3] = ARGTAG(1); */                  \
109         Sp[-2] = (W_)(c);                       \
110         Sp[-1] = (W_)(d);                       \
111         Sp -= 3;                                \
112         JMP_(ENTRY_CODE(Sp[3]));
113
114 # define RET_NPNP(a,b,c,d)                      \
115         R1.w = (W_)(a);                         \
116         R2.w = (W_)(b);                         \
117     /*  Sp[-3] = ARGTAG(1); */                  \
118         Sp[-2] = (W_)(c);                       \
119         Sp[-1] = (W_)(d);                       \
120         Sp -= 3;                                \
121         JMP_(ENTRY_CODE(Sp[3]));
122
123 # define RET_NNPNNP(a,b,c,d,e,f)                \
124         R1.w = (W_)(a);                         \
125         R2.w = (W_)(b);                         \
126         Sp[-6] = (W_)(c);                       \
127         /* Sp[-5] = ARGTAG(1); */               \
128         Sp[-4] = (W_)(d);                       \
129         /* Sp[-3] = ARGTAG(1); */               \
130         Sp[-2] = (W_)(e);                       \
131         Sp[-1] = (W_)(f);                       \
132         Sp -= 6;                                \
133         JMP_(ENTRY_CODE(Sp[6]));
134
135 /*------ 1 Register available */
136 #elif defined(REG_R1)
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c) \
147         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
148         JMP_(ENTRY_CODE(Sp[2]));
149 # define RET_NNP(a,b,c) \
150         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
151         JMP_(ENTRY_CODE(Sp[3]));
152
153 # define RET_NNNP(a,b,c,d)                      \
154         R1.w = (W_)(a);                         \
155     /*  Sp[-5] = ARGTAG(1); */                  \
156         Sp[-4] = (W_)(b);                       \
157     /*  Sp[-3] = ARGTAG(1); */                  \
158         Sp[-2] = (W_)(c);                       \
159         Sp[-1] = (W_)(d);                       \
160         Sp -= 5;                                \
161         JMP_(ENTRY_CODE(Sp[5]));
162
163 # define RET_NPNP(a,b,c,d)                      \
164         R1.w = (W_)(a);                         \
165         Sp[-4] = (W_)(b);                       \
166     /*  Sp[-3] = ARGTAG(1); */                  \
167         Sp[-2] = (W_)(c);                       \
168         Sp[-1] = (W_)(d);                       \
169         Sp -= 4;                                \
170         JMP_(ENTRY_CODE(Sp[4]));
171
172 # define RET_NNPNNP(a,b,c,d,e,f)                \
173         R1.w = (W_)(a);                         \
174         Sp[-1] = (W_)(f);                       \
175         Sp[-2] = (W_)(e);                       \
176         /* Sp[-3] = ARGTAG(1); */               \
177         Sp[-4] = (W_)(d);                       \
178         /* Sp[-5] = ARGTAG(1); */               \
179         Sp[-6] = (W_)(c);                       \
180         Sp[-7] = (W_)(b);                       \
181         /* Sp[-8] = ARGTAG(1); */               \
182         Sp -= 8;                                \
183         JMP_(ENTRY_CODE(Sp[8]));
184
185 #else /* 0 Regs available */
186
187 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
188
189 #ifdef DEBUG
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
191 #else
192 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
193 #endif
194
195 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
196
197 /* Here's how to construct these macros:
198  *
199  *   N = number of N's in the name;
200  *   P = number of P's in the name;
201  *   s = N * 2 + P;
202  *   while (nonNull(name)) {
203  *     if (nextChar == 'P') {
204  *       PUSH_P(s,_);
205  *       s -= 1;
206  *     } else {
207  *       PUSH_N(s,_);
208  *       s -= 2
209  *     }
210  *   }
211  *   PUSHED(N * 2 + P);
212  */
213
214 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
215 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
216
217 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
218 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
219 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
220
221 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
222 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
223
224 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
225 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
226 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
227
228 #endif
229
230 /*-----------------------------------------------------------------------------
231   Array Primitives
232
233   Basically just new*Array - the others are all inline macros.
234
235   The size arg is always passed in R1, and the result returned in R1.
236
237   The slow entry point is for returning from a heap check, the saved
238   size argument must be re-loaded from the stack.
239   -------------------------------------------------------------------------- */
240
241 /* for objects that are *less* than the size of a word, make sure we
242  * round up to the nearest word for the size of the array.
243  */
244
245 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
246
247 FN_(newByteArrayzh_fast)                                \
248  {                                                      \
249    W_ size, stuff_size, n;                              \
250    StgArrWords* p;                                      \
251    FB_                                                  \
252      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);             \
253      n = R1.w;                                          \
254      stuff_size = BYTES_TO_STGWORDS(n);                 \
255      size = sizeofW(StgArrWords)+ stuff_size;           \
256      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); \
257      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); \
258      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);             \
259      p->words = stuff_size;                             \
260      TICK_RET_UNBOXED_TUP(1)                            \
261      RET_P(p);                                          \
262    FE_                                                  \
263  }
264
265 FN_(newPinnedByteArrayzh_fast)                                  \
266  {                                                              \
267    W_ size, stuff_size, n;                                      \
268    StgArrWords* p;                                              \
269    FB_                                                          \
270      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);               \
271      n = R1.w;                                                  \
272      stuff_size = BYTES_TO_STGWORDS(n);                         \
273      size = sizeofW(StgArrWords)+ stuff_size;                   \
274      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);   \
275      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);        \
276      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);                     \
277      p->words = stuff_size;                                     \
278      TICK_RET_UNBOXED_TUP(1)                                    \
279      RET_P(p);                                                  \
280    FE_                                                          \
281  }
282
283 FN_(newArrayzh_fast)
284 {
285   W_ size, n, init;
286   StgMutArrPtrs* arr;
287   StgPtr p;
288   FB_
289     n = R1.w;
290
291     MAYBE_GC(R2_PTR,newArrayzh_fast);
292
293     size = sizeofW(StgMutArrPtrs) + n;
294     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
295     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
296
297     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
298     arr->ptrs = n;
299
300     init = R2.w;
301     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
302          p < (P_)arr + size; p++) {
303         *p = (W_)init;
304     }
305
306     TICK_RET_UNBOXED_TUP(1);
307     RET_P(arr);
308   FE_
309 }
310
311 FN_(newMutVarzh_fast)
312 {
313   StgMutVar* mv;
314   /* Args: R1.p = initialisation value */
315   FB_
316
317   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
318   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
319   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
320
321   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
322   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
323   mv->var = R1.cl;
324
325   TICK_RET_UNBOXED_TUP(1);
326   RET_P(mv);
327   FE_
328 }
329
330 /* -----------------------------------------------------------------------------
331    Foreign Object Primitives
332    -------------------------------------------------------------------------- */
333
334 FN_(mkForeignObjzh_fast)
335 {
336   /* R1.p = ptr to foreign object,
337   */
338   StgForeignObj *result;
339   FB_
340
341   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
342   TICK_ALLOC_PRIM(sizeofW(StgHeader),
343                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
344   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
345
346   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
347   SET_HDR(result,&stg_FOREIGN_info,CCCS);
348   result->data = R1.p;
349
350   /* returns (# s#, ForeignObj# #) */
351   TICK_RET_UNBOXED_TUP(1);
352   RET_P(result);
353   FE_
354 }
355
356 /* These two are out-of-line for the benefit of the NCG */
357 FN_(unsafeThawArrayzh_fast)
358 {
359   FB_
360   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
361   recordMutable((StgMutClosure*)R1.cl);
362
363   TICK_RET_UNBOXED_TUP(1);
364   RET_P(R1.p);
365   FE_
366 }
367
368 /* -----------------------------------------------------------------------------
369    Weak Pointer Primitives
370    -------------------------------------------------------------------------- */
371
372 FN_(mkWeakzh_fast)
373 {
374   /* R1.p = key
375      R2.p = value
376      R3.p = finalizer (or NULL)
377   */
378   StgWeak *w;
379   FB_
380
381   if (R3.cl == NULL) {
382     R3.cl = &stg_NO_FINALIZER_closure;
383   }
384
385   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
387                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
388   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
389
390   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
391   SET_HDR(w, &stg_WEAK_info, CCCS);
392
393   w->key        = R1.cl;
394   w->value      = R2.cl;
395   w->finalizer  = R3.cl;
396
397   w->link       = weak_ptr_list;
398   weak_ptr_list = w;
399   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
400
401   TICK_RET_UNBOXED_TUP(1);
402   RET_P(w);
403   FE_
404 }
405
406 FN_(finalizzeWeakzh_fast)
407 {
408   /* R1.p = weak ptr
409    */
410   StgDeadWeak *w;
411   StgClosure *f;
412   FB_
413   TICK_RET_UNBOXED_TUP(0);
414   w = (StgDeadWeak *)R1.p;
415
416   /* already dead? */
417   if (w->header.info == &stg_DEAD_WEAK_info) {
418       RET_NP(0,&stg_NO_FINALIZER_closure);
419   }
420
421   /* kill it */
422 #ifdef PROFILING
423   // @LDV profiling
424   // A weak pointer is inherently used, so we do not need to call
425   // LDV_recordDead_FILL_SLOP_DYNAMIC():
426   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
427   // or, LDV_recordDead():
428   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
429   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
430   // large as weak pointers, so there is no need to fill the slop, either.
431   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
432 #endif
433   //
434   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
435   //
436   w->header.info = &stg_DEAD_WEAK_info;
437 #ifdef PROFILING
438   // @LDV profiling
439   LDV_recordCreate((StgClosure *)w);
440 #endif
441   f = ((StgWeak *)w)->finalizer;
442   w->link = ((StgWeak *)w)->link;
443
444   /* return the finalizer */
445   if (f == &stg_NO_FINALIZER_closure) {
446       RET_NP(0,&stg_NO_FINALIZER_closure);
447   } else {
448       RET_NP(1,f);
449   }
450   FE_
451 }
452
453 FN_(deRefWeakzh_fast)
454 {
455   /* R1.p = weak ptr */
456   StgWeak* w;
457   I_       code;
458   P_       val;
459   FB_
460   w = (StgWeak*)R1.p;
461   if (w->header.info == &stg_WEAK_info) {
462     code = 1;
463     val = (P_)((StgWeak *)w)->value;
464   } else {
465     code = 0;
466     val = (P_)w;
467   }
468   RET_NP(code,val);
469   FE_
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474    -------------------------------------------------------------------------- */
475
476 FN_(int2Integerzh_fast)
477 {
478    /* arguments: R1 = Int# */
479
480    I_ val, s;           /* to avoid aliasing */
481    StgArrWords* p;      /* address of array result */
482    FB_
483
484    val = R1.i;
485    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
486    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
487    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
488
489    p = (StgArrWords *)Hp - 1;
490    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (val < 0) { 
494         s  = -1;
495         *Hp = -val;
496    } else if (val > 0) {
497         s = 1;
498         *Hp = val;
499    } else {
500         s = 0;
501    }
502
503    /* returns (# size  :: Int#, 
504                  data  :: ByteArray# 
505                #)
506    */
507    TICK_RET_UNBOXED_TUP(2);
508    RET_NP(s,p);
509    FE_
510 }
511
512 FN_(word2Integerzh_fast)
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val;              /* to avoid aliasing */
517    I_  s;
518    StgArrWords* p;      /* address of array result */
519    FB_
520
521    val = R1.w;
522    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
523    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
524    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
525
526    p = (StgArrWords *)Hp - 1;
527    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
528
529    if (val != 0) {
530         s = 1;
531         *Hp = val;
532    } else {
533         s = 0;
534    }
535
536    /* returns (# size  :: Int#, 
537                  data  :: ByteArray# 
538                #)
539    */
540    TICK_RET_UNBOXED_TUP(2);
541    RET_NP(s,p);
542    FE_
543 }
544
545
546 /*
547  * 'long long' primops for converting to/from Integers.
548  */
549
550 #ifdef SUPPORT_LONG_LONGS
551
552 FN_(int64ToIntegerzh_fast)
553 {
554    /* arguments: L1 = Int64# */
555
556    StgInt64  val; /* to avoid aliasing */
557    W_ hi;
558    I_  s, neg, words_needed;
559    StgArrWords* p;      /* address of array result */
560    FB_
561
562    val = (LI_)L1;
563    neg = 0;
564
565    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
566        words_needed = 2;
567    } else { 
568        /* minimum is one word */
569        words_needed = 1;
570    }
571    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
572    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
573    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
574
575    p = (StgArrWords *)(Hp-words_needed+1) - 1;
576    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
577
578    if ( val < 0LL ) {
579      neg = 1;
580      val = -val;
581    }
582
583    hi = (W_)((LW_)val / 0x100000000ULL);
584
585    if ( words_needed == 2 )  { 
586       s = 2;
587       Hp[-1] = (W_)val;
588       Hp[0] = hi;
589    } else if ( val != 0 ) {
590       s = 1;
591       Hp[0] = (W_)val;
592    }  else /* val==0 */   {
593       s = 0;
594    }
595    s = ( neg ? -s : s );
596
597    /* returns (# size  :: Int#, 
598                  data  :: ByteArray# 
599                #)
600    */
601    TICK_RET_UNBOXED_TUP(2);
602    RET_NP(s,p);
603    FE_
604 }
605
606 FN_(word64ToIntegerzh_fast)
607 {
608    /* arguments: L1 = Word64# */
609
610    StgWord64 val; /* to avoid aliasing */
611    StgWord hi;
612    I_  s, words_needed;
613    StgArrWords* p;      /* address of array result */
614    FB_
615
616    val = (LW_)L1;
617    if ( val >= 0x100000000ULL ) {
618       words_needed = 2;
619    } else {
620       words_needed = 1;
621    }
622    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
623    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
624    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
625
626    p = (StgArrWords *)(Hp-words_needed+1) - 1;
627    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
628
629    hi = (W_)((LW_)val / 0x100000000ULL);
630    if ( val >= 0x100000000ULL ) { 
631      s = 2;
632      Hp[-1] = ((W_)val);
633      Hp[0]  = (hi);
634    } else if ( val != 0 )      {
635       s = 1;
636       Hp[0] = ((W_)val);
637    } else /* val==0 */         {
638       s = 0;
639    }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650
651 #endif /* SUPPORT_LONG_LONGS */
652
653 /* ToDo: this is shockingly inefficient */
654
655 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
656 FN_(name)                                                               \
657 {                                                                       \
658   MP_INT arg1, arg2, result;                                            \
659   I_ s1, s2;                                                            \
660   StgArrWords* d1;                                                      \
661   StgArrWords* d2;                                                      \
662   FB_                                                                   \
663                                                                         \
664   /* call doYouWantToGC() */                                            \
665   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
666                                                                         \
667   d1 = (StgArrWords *)R2.p;                                             \
668   s1 = R1.i;                                                            \
669   d2 = (StgArrWords *)R4.p;                                             \
670   s2 = R3.i;                                                            \
671                                                                         \
672   arg1._mp_alloc        = d1->words;                                    \
673   arg1._mp_size         = (s1);                                         \
674   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
675   arg2._mp_alloc        = d2->words;                                    \
676   arg2._mp_size         = (s2);                                         \
677   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
678                                                                         \
679   STGCALL1(mpz_init,&result);                                           \
680                                                                         \
681   /* Perform the operation */                                           \
682   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
683                                                                         \
684   TICK_RET_UNBOXED_TUP(2);                                              \
685   RET_NP(result._mp_size,                                               \
686          result._mp_d-sizeofW(StgArrWords));                            \
687   FE_                                                                   \
688 }
689
690 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
691 FN_(name)                                                               \
692 {                                                                       \
693   MP_INT arg1, result;                                                  \
694   I_ s1;                                                                \
695   StgArrWords* d1;                                                      \
696   FB_                                                                   \
697                                                                         \
698   /* call doYouWantToGC() */                                            \
699   MAYBE_GC(R2_PTR, name);                                               \
700                                                                         \
701   d1 = (StgArrWords *)R2.p;                                             \
702   s1 = R1.i;                                                            \
703                                                                         \
704   arg1._mp_alloc        = d1->words;                                    \
705   arg1._mp_size         = (s1);                                         \
706   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
707                                                                         \
708   STGCALL1(mpz_init,&result);                                           \
709                                                                         \
710   /* Perform the operation */                                           \
711   STGCALL2(mp_fun,&result,&arg1);                                       \
712                                                                         \
713   TICK_RET_UNBOXED_TUP(2);                                              \
714   RET_NP(result._mp_size,                                               \
715          result._mp_d-sizeofW(StgArrWords));                            \
716   FE_                                                                   \
717 }
718
719 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
720 FN_(name)                                                               \
721 {                                                                       \
722   MP_INT arg1, arg2, result1, result2;                                  \
723   I_ s1, s2;                                                            \
724   StgArrWords* d1;                                                      \
725   StgArrWords* d2;                                                      \
726   FB_                                                                   \
727                                                                         \
728   /* call doYouWantToGC() */                                            \
729   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
730                                                                         \
731   d1 = (StgArrWords *)R2.p;                                             \
732   s1 = R1.i;                                                            \
733   d2 = (StgArrWords *)R4.p;                                             \
734   s2 = R3.i;                                                            \
735                                                                         \
736   arg1._mp_alloc        = d1->words;                                    \
737   arg1._mp_size         = (s1);                                         \
738   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
739   arg2._mp_alloc        = d2->words;                                    \
740   arg2._mp_size         = (s2);                                         \
741   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
742                                                                         \
743   STGCALL1(mpz_init,&result1);                                          \
744   STGCALL1(mpz_init,&result2);                                          \
745                                                                         \
746   /* Perform the operation */                                           \
747   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
748                                                                         \
749   TICK_RET_UNBOXED_TUP(4);                                              \
750   RET_NPNP(result1._mp_size,                                            \
751            result1._mp_d-sizeofW(StgArrWords),                          \
752            result2._mp_size,                                            \
753            result2._mp_d-sizeofW(StgArrWords));                         \
754   FE_                                                                   \
755 }
756
757 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
758 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
759 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
760 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
761 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
762 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
763 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
764 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
765 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
766 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
767 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
768
769 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
770 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
771
772
773 FN_(gcdIntzh_fast)
774 {
775   /* R1 = the first Int#; R2 = the second Int# */
776   mp_limb_t aa;
777   I_ r;
778   FB_
779   aa = (mp_limb_t)(R1.i);
780   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
781
782   R1.i = r;
783   /* Result parked in R1, return via info-pointer at TOS */
784   JMP_(ENTRY_CODE(Sp[0]));
785   FE_
786 }
787
788 FN_(gcdIntegerIntzh_fast)
789 {
790   /* R1 = s1; R2 = d1; R3 = the int */
791   I_ r;
792   FB_
793   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
794
795   R1.i = r;
796   /* Result parked in R1, return via info-pointer at TOS */
797   JMP_(ENTRY_CODE(Sp[0]));
798   FE_
799 }
800
801 FN_(cmpIntegerIntzh_fast)
802 {
803   /* R1 = s1; R2 = d1; R3 = the int */
804   I_ usize;
805   I_ vsize;
806   I_ v_digit;
807   mp_limb_t u_digit;
808   FB_
809
810   usize = R1.i;
811   vsize = 0;
812   v_digit = R3.i;
813
814   // paraphrased from mpz_cmp_si() in the GMP sources
815   if (v_digit > 0) {
816       vsize = 1;
817   } else if (v_digit < 0) {
818       vsize = -1;
819       v_digit = -v_digit;
820   }
821
822   if (usize != vsize) {
823     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
824   }
825
826   if (usize == 0) {
827     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
828   }
829
830   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
831
832   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
833     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
834   }
835
836   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
837     R1.i = usize; 
838   } else {
839     R1.i = -usize; 
840   }
841
842   JMP_(ENTRY_CODE(Sp[0]));
843   FE_
844 }
845
846 FN_(cmpIntegerzh_fast)
847 {
848   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
849   I_ usize;
850   I_ vsize;
851   I_ size;
852   StgPtr up, vp;
853   int cmp;
854   FB_
855
856   // paraphrased from mpz_cmp() in the GMP sources
857   usize = R1.i;
858   vsize = R3.i;
859
860   if (usize != vsize) {
861     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
862   }
863
864   if (usize == 0) {
865     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
866   }
867
868   size = abs(usize);
869
870   up = BYTE_ARR_CTS(R2.p);
871   vp = BYTE_ARR_CTS(R4.p);
872
873   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
874
875   if (cmp == 0) {
876     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
877   }
878
879   if ((cmp < 0) == (usize < 0)) {
880     R1.i = 1;
881   } else {
882     R1.i = (-1); 
883   }
884   /* Result parked in R1, return via info-pointer at TOS */
885   JMP_(ENTRY_CODE(Sp[0]));
886   FE_
887 }
888
889 FN_(integer2Intzh_fast)
890 {
891   /* R1 = s; R2 = d */
892   I_ r, s;
893   FB_
894   s = R1.i;
895   if (s == 0)
896     r = 0;
897   else {
898     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
899     if (s < 0) r = -r;
900   }
901   /* Result parked in R1, return via info-pointer at TOS */
902   R1.i = r;
903   JMP_(ENTRY_CODE(Sp[0]));
904   FE_
905 }
906
907 FN_(integer2Wordzh_fast)
908 {
909   /* R1 = s; R2 = d */
910   I_ s;
911   W_ r;
912   FB_
913   s = R1.i;
914   if (s == 0)
915     r = 0;
916   else {
917     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
918     if (s < 0) r = -r;
919   }
920   /* Result parked in R1, return via info-pointer at TOS */
921   R1.w = r;
922   JMP_(ENTRY_CODE(Sp[0]));
923   FE_
924 }
925
926
927 FN_(decodeFloatzh_fast)
928
929   MP_INT mantissa;
930   I_ exponent;
931   StgArrWords* p;
932   StgFloat arg;
933   FB_
934
935   /* arguments: F1 = Float# */
936   arg = F1;
937
938   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
939   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
940   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
941
942   /* Be prepared to tell Lennart-coded __decodeFloat    */
943   /* where mantissa._mp_d can be put (it does not care about the rest) */
944   p = (StgArrWords *)Hp - 1;
945   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
946   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
947
948   /* Perform the operation */
949   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
950
951   /* returns: (Int# (expn), Int#, ByteArray#) */
952   TICK_RET_UNBOXED_TUP(3);
953   RET_NNP(exponent,mantissa._mp_size,p);
954   FE_
955 }
956
957 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
958 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
959
960 FN_(decodeDoublezh_fast)
961 { MP_INT mantissa;
962   I_ exponent;
963   StgDouble arg;
964   StgArrWords* p;
965   FB_
966
967   /* arguments: D1 = Double# */
968   arg = D1;
969
970   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
971   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
972   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
973
974   /* Be prepared to tell Lennart-coded __decodeDouble   */
975   /* where mantissa.d can be put (it does not care about the rest) */
976   p = (StgArrWords *)(Hp-ARR_SIZE+1);
977   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
978   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
979
980   /* Perform the operation */
981   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
982
983   /* returns: (Int# (expn), Int#, ByteArray#) */
984   TICK_RET_UNBOXED_TUP(3);
985   RET_NNP(exponent,mantissa._mp_size,p);
986   FE_
987 }
988
989 /* -----------------------------------------------------------------------------
990  * Concurrency primitives
991  * -------------------------------------------------------------------------- */
992
993 FN_(forkzh_fast)
994 {
995   FB_
996   /* args: R1 = closure to spark */
997   
998   MAYBE_GC(R1_PTR, forkzh_fast);
999
1000   /* create it right now, return ThreadID in R1 */
1001   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1002                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1003   STGCALL1(scheduleThread, R1.t);
1004       
1005   /* switch at the earliest opportunity */ 
1006   context_switch = 1;
1007   
1008   RET_P(R1.t);
1009   FE_
1010 }
1011
1012 FN_(forkProcesszh_fast)
1013 {
1014   pid_t pid;
1015
1016   FB_
1017   /* args: none */
1018   /* result: Pid */
1019
1020   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1021
1022   JMP_(ENTRY_CODE(Sp[0]));
1023
1024   FE_
1025 }
1026
1027 FN_(yieldzh_fast)
1028 {
1029   FB_
1030   JMP_(stg_yield_noregs);
1031   FE_
1032 }
1033
1034 FN_(myThreadIdzh_fast)
1035 {
1036   /* no args. */
1037   FB_
1038   RET_P((P_)CurrentTSO);
1039   FE_
1040 }
1041
1042 FN_(labelThreadzh_fast)
1043 {
1044   FB_
1045   /* args: R1.p = Addr# */
1046 #ifdef DEBUG
1047   STGCALL2(labelThread,CurrentTSO,(char *)R1.p);
1048 #endif
1049   JMP_(ENTRY_CODE(Sp[0]));
1050   FE_
1051 }
1052
1053
1054 /* -----------------------------------------------------------------------------
1055  * MVar primitives
1056  *
1057  * take & putMVar work as follows.  Firstly, an important invariant:
1058  *
1059  *    If the MVar is full, then the blocking queue contains only
1060  *    threads blocked on putMVar, and if the MVar is empty then the
1061  *    blocking queue contains only threads blocked on takeMVar.
1062  *
1063  * takeMvar:
1064  *    MVar empty : then add ourselves to the blocking queue
1065  *    MVar full  : remove the value from the MVar, and
1066  *                 blocking queue empty     : return
1067  *                 blocking queue non-empty : perform the first blocked putMVar
1068  *                                            from the queue, and wake up the
1069  *                                            thread (MVar is now full again)
1070  *
1071  * putMVar is just the dual of the above algorithm.
1072  *
1073  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1074  * the stack of the thread waiting to do the putMVar.  See
1075  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1076  * the stack layout, and the PerformPut and PerformTake macros below.
1077  *
1078  * It is important that a blocked take or put is woken up with the
1079  * take/put already performed, because otherwise there would be a
1080  * small window of vulnerability where the thread could receive an
1081  * exception and never perform its take or put, and we'd end up with a
1082  * deadlock.
1083  *
1084  * -------------------------------------------------------------------------- */
1085
1086 FN_(isEmptyMVarzh_fast)
1087 {
1088   /* args: R1 = MVar closure */
1089   I_ r;
1090   FB_
1091   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1092   RET_N(r);
1093   FE_
1094 }
1095
1096
1097 FN_(newMVarzh_fast)
1098 {
1099   StgMVar *mvar;
1100
1101   FB_
1102   /* args: none */
1103
1104   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1105   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1106                   1, 0);
1107   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1108   
1109   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1110   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1111   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1112   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1113
1114   TICK_RET_UNBOXED_TUP(1);
1115   RET_P(mvar);
1116   FE_
1117 }
1118
1119 /* If R1 isn't available, pass it on the stack */
1120 #ifdef REG_R1
1121 #define PerformTake(tso, value) ({              \
1122     (tso)->sp[1] = (W_)value;                   \
1123     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1124   })
1125 #else
1126 #define PerformTake(tso, value) ({              \
1127     (tso)->sp[1] = (W_)value;                   \
1128     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1129   })
1130 #endif
1131
1132
1133 #define PerformPut(tso) ({                              \
1134     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1135     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1136     (tso)->sp += 2;                                     \
1137     val;                                                \
1138   })
1139
1140 FN_(takeMVarzh_fast)
1141 {
1142   StgMVar *mvar;
1143   StgClosure *val;
1144   const StgInfoTable *info;
1145
1146   FB_
1147   /* args: R1 = MVar closure */
1148
1149   mvar = (StgMVar *)R1.p;
1150
1151 #ifdef SMP
1152   info = LOCK_CLOSURE(mvar);
1153 #else
1154   info = GET_INFO(mvar);
1155 #endif
1156
1157   /* If the MVar is empty, put ourselves on its blocking queue,
1158    * and wait until we're woken up.
1159    */
1160   if (info == &stg_EMPTY_MVAR_info) {
1161     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1162       mvar->head = CurrentTSO;
1163     } else {
1164       mvar->tail->link = CurrentTSO;
1165     }
1166     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1167     CurrentTSO->why_blocked = BlockedOnMVar;
1168     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1169     mvar->tail = CurrentTSO;
1170
1171 #ifdef SMP
1172     /* unlock the MVar */
1173     mvar->header.info = &stg_EMPTY_MVAR_info;
1174 #endif
1175     JMP_(stg_block_takemvar);
1176   }
1177
1178   /* we got the value... */
1179   val = mvar->value;
1180
1181   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1182       /* There are putMVar(s) waiting... 
1183        * wake up the first thread on the queue
1184        */
1185       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1186
1187       /* actually perform the putMVar for the thread that we just woke up */
1188       mvar->value = PerformPut(mvar->head);
1189
1190 #if defined(GRAN) || defined(PAR)
1191       /* ToDo: check 2nd arg (mvar) is right */
1192       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1193 #else
1194       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1195 #endif
1196       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1197           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1198       }
1199 #ifdef SMP
1200       /* unlock in the SMP case */
1201       SET_INFO(mvar,&stg_FULL_MVAR_info);
1202 #endif
1203       TICK_RET_UNBOXED_TUP(1);
1204       RET_P(val);
1205   } else {
1206       /* No further putMVars, MVar is now empty */
1207
1208       /* do this last... we might have locked the MVar in the SMP case,
1209        * and writing the info pointer will unlock it.
1210        */
1211       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1212       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1213       TICK_RET_UNBOXED_TUP(1);
1214       RET_P(val);
1215   }
1216   FE_
1217 }
1218
1219 FN_(tryTakeMVarzh_fast)
1220 {
1221   StgMVar *mvar;
1222   StgClosure *val;
1223   const StgInfoTable *info;
1224
1225   FB_
1226   /* args: R1 = MVar closure */
1227
1228   mvar = (StgMVar *)R1.p;
1229
1230 #ifdef SMP
1231   info = LOCK_CLOSURE(mvar);
1232 #else
1233   info = GET_INFO(mvar);
1234 #endif
1235
1236   if (info == &stg_EMPTY_MVAR_info) {
1237
1238 #ifdef SMP
1239       /* unlock the MVar */
1240       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1241 #endif
1242
1243       /* HACK: we need a pointer to pass back, 
1244        * so we abuse NO_FINALIZER_closure
1245        */
1246       RET_NP(0, &stg_NO_FINALIZER_closure);
1247   }
1248
1249   /* we got the value... */
1250   val = mvar->value;
1251
1252   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1253       /* There are putMVar(s) waiting... 
1254        * wake up the first thread on the queue
1255        */
1256       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1257
1258       /* actually perform the putMVar for the thread that we just woke up */
1259       mvar->value = PerformPut(mvar->head);
1260
1261 #if defined(GRAN) || defined(PAR)
1262       /* ToDo: check 2nd arg (mvar) is right */
1263       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1264 #else
1265       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1266 #endif
1267       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1268           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1269       }
1270 #ifdef SMP
1271       /* unlock in the SMP case */
1272       SET_INFO(mvar,&stg_FULL_MVAR_info);
1273 #endif
1274   } else {
1275       /* No further putMVars, MVar is now empty */
1276       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1277
1278       /* do this last... we might have locked the MVar in the SMP case,
1279        * and writing the info pointer will unlock it.
1280        */
1281       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1282   }
1283
1284   TICK_RET_UNBOXED_TUP(1);
1285   RET_NP((I_)1, val);
1286   FE_
1287 }
1288
1289 FN_(putMVarzh_fast)
1290 {
1291   StgMVar *mvar;
1292   const StgInfoTable *info;
1293
1294   FB_
1295   /* args: R1 = MVar, R2 = value */
1296
1297   mvar = (StgMVar *)R1.p;
1298
1299 #ifdef SMP
1300   info = LOCK_CLOSURE(mvar);
1301 #else
1302   info = GET_INFO(mvar);
1303 #endif
1304
1305   if (info == &stg_FULL_MVAR_info) {
1306     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1307       mvar->head = CurrentTSO;
1308     } else {
1309       mvar->tail->link = CurrentTSO;
1310     }
1311     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1312     CurrentTSO->why_blocked = BlockedOnMVar;
1313     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1314     mvar->tail = CurrentTSO;
1315
1316 #ifdef SMP
1317     /* unlock the MVar */
1318     SET_INFO(mvar,&stg_FULL_MVAR_info);
1319 #endif
1320     JMP_(stg_block_putmvar);
1321   }
1322   
1323   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1324       /* There are takeMVar(s) waiting: wake up the first one
1325        */
1326       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1327
1328       /* actually perform the takeMVar */
1329       PerformTake(mvar->head, R2.cl);
1330       
1331 #if defined(GRAN) || defined(PAR)
1332       /* ToDo: check 2nd arg (mvar) is right */
1333       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1334 #else
1335       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1336 #endif
1337       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1338           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1339       }
1340 #ifdef SMP
1341       /* unlocks the MVar in the SMP case */
1342       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1343 #endif
1344       JMP_(ENTRY_CODE(Sp[0]));
1345   } else {
1346       /* No further takes, the MVar is now full. */
1347       mvar->value = R2.cl;
1348       /* unlocks the MVar in the SMP case */
1349       SET_INFO(mvar,&stg_FULL_MVAR_info);
1350       JMP_(ENTRY_CODE(Sp[0]));
1351   }
1352
1353   /* ToDo: yield afterward for better communication performance? */
1354   FE_
1355 }
1356
1357 FN_(tryPutMVarzh_fast)
1358 {
1359   StgMVar *mvar;
1360   const StgInfoTable *info;
1361
1362   FB_
1363   /* args: R1 = MVar, R2 = value */
1364
1365   mvar = (StgMVar *)R1.p;
1366
1367 #ifdef SMP
1368   info = LOCK_CLOSURE(mvar);
1369 #else
1370   info = GET_INFO(mvar);
1371 #endif
1372
1373   if (info == &stg_FULL_MVAR_info) {
1374
1375 #ifdef SMP
1376     /* unlock the MVar */
1377     mvar->header.info = &stg_FULL_MVAR_info;
1378 #endif
1379
1380     RET_N(0);
1381   }
1382   
1383   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1384       /* There are takeMVar(s) waiting: wake up the first one
1385        */
1386       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1387
1388       /* actually perform the takeMVar */
1389       PerformTake(mvar->head, R2.cl);
1390       
1391 #if defined(GRAN) || defined(PAR)
1392       /* ToDo: check 2nd arg (mvar) is right */
1393       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1394 #else
1395       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1396 #endif
1397       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1398           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1399       }
1400 #ifdef SMP
1401       /* unlocks the MVar in the SMP case */
1402       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1403 #endif
1404       JMP_(ENTRY_CODE(Sp[0]));
1405   } else {
1406       /* No further takes, the MVar is now full. */
1407       mvar->value = R2.cl;
1408       /* unlocks the MVar in the SMP case */
1409       SET_INFO(mvar,&stg_FULL_MVAR_info);
1410       JMP_(ENTRY_CODE(Sp[0]));
1411   }
1412
1413   /* ToDo: yield afterward for better communication performance? */
1414   FE_
1415 }
1416
1417 /* -----------------------------------------------------------------------------
1418    Stable pointer primitives
1419    -------------------------------------------------------------------------  */
1420
1421 FN_(makeStableNamezh_fast)
1422 {
1423   StgWord index;
1424   StgStableName *sn_obj;
1425   FB_
1426
1427   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1428   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1429                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1430   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1431   
1432   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1433
1434   /* Is there already a StableName for this heap object? */
1435   if (stable_ptr_table[index].sn_obj == NULL) {
1436     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1437     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1438     sn_obj->sn = index;
1439     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1440   } else {
1441     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1442   }
1443
1444   TICK_RET_UNBOXED_TUP(1);
1445   RET_P(sn_obj);
1446 }
1447
1448
1449 FN_(makeStablePtrzh_fast)
1450 {
1451   /* Args: R1 = a */
1452   StgStablePtr sp;
1453   FB_
1454   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1455   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1456   RET_N(sp);
1457   FE_
1458 }
1459
1460 FN_(deRefStablePtrzh_fast)
1461 {
1462   /* Args: R1 = the stable ptr */
1463   P_ r;
1464   StgStablePtr sp;
1465   FB_
1466   sp = (StgStablePtr)R1.w;
1467   r = stable_ptr_table[(StgWord)sp].addr;
1468   RET_P(r);
1469   FE_
1470 }
1471
1472 /* -----------------------------------------------------------------------------
1473    Bytecode object primitives
1474    -------------------------------------------------------------------------  */
1475
1476 FN_(newBCOzh_fast)
1477 {
1478   /* R1.p = instrs
1479      R2.p = literals
1480      R3.p = ptrs
1481      R4.p = itbls
1482   */
1483   StgBCO *bco;
1484   FB_
1485
1486   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1487   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1488   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1489   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1490   SET_HDR(bco, &stg_BCO_info, CCCS);
1491
1492   bco->instrs     = (StgArrWords*)R1.cl;
1493   bco->literals   = (StgArrWords*)R2.cl;
1494   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1495   bco->itbls      = (StgArrWords*)R4.cl;
1496
1497   TICK_RET_UNBOXED_TUP(1);
1498   RET_P(bco);
1499   FE_
1500 }
1501
1502 FN_(mkApUpd0zh_fast)
1503 {
1504   /* R1.p = the fn for the AP_UPD
1505   */
1506   StgAP_UPD* ap;
1507   FB_
1508   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1509   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1510   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1511   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1512   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1513
1514   ap->n_args = 0;
1515   ap->fun = R1.cl;
1516
1517   TICK_RET_UNBOXED_TUP(1);
1518   RET_P(ap);
1519   FE_
1520 }
1521
1522 /* -----------------------------------------------------------------------------
1523    Thread I/O blocking primitives
1524    -------------------------------------------------------------------------- */
1525
1526 FN_(waitReadzh_fast)
1527 {
1528   FB_
1529     /* args: R1.i */
1530     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1531     CurrentTSO->why_blocked = BlockedOnRead;
1532     CurrentTSO->block_info.fd = R1.i;
1533     ACQUIRE_LOCK(&sched_mutex);
1534     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1535     RELEASE_LOCK(&sched_mutex);
1536     JMP_(stg_block_noregs);
1537   FE_
1538 }
1539
1540 FN_(waitWritezh_fast)
1541 {
1542   FB_
1543     /* args: R1.i */
1544     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1545     CurrentTSO->why_blocked = BlockedOnWrite;
1546     CurrentTSO->block_info.fd = R1.i;
1547     ACQUIRE_LOCK(&sched_mutex);
1548     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1549     RELEASE_LOCK(&sched_mutex);
1550     JMP_(stg_block_noregs);
1551   FE_
1552 }
1553
1554 FN_(delayzh_fast)
1555 {
1556   StgTSO *t, *prev;
1557   nat target;
1558   FB_
1559     /* args: R1.i */
1560     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1561     CurrentTSO->why_blocked = BlockedOnDelay;
1562
1563     ACQUIRE_LOCK(&sched_mutex);
1564
1565     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1566     CurrentTSO->block_info.target = target;
1567
1568     /* Insert the new thread in the sleeping queue. */
1569     prev = NULL;
1570     t = sleeping_queue;
1571     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1572         prev = t;
1573         t = t->link;
1574     }
1575
1576     CurrentTSO->link = t;
1577     if (prev == NULL) {
1578         sleeping_queue = CurrentTSO;
1579     } else {
1580         prev->link = CurrentTSO;
1581     }
1582
1583     RELEASE_LOCK(&sched_mutex);
1584     JMP_(stg_block_noregs);
1585   FE_
1586 }
1587