[project @ 2002-02-28 18:44:28 by sof]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.93 2002/02/28 18:44:29 sof Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 /* ** temporary **
26
27    classes CCallable and CReturnable don't really exist, but the
28    compiler insists on generating dictionaries containing references
29    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
30    for these.  Some C compilers can't cope with zero-length static arrays,
31    so we have to make these one element long.
32 */
33
34 StgWord GHC_ZCCCallable_static_info[1];
35 StgWord GHC_ZCCReturnable_static_info[1];
36   
37 /* -----------------------------------------------------------------------------
38    Macros for Hand-written primitives.
39    -------------------------------------------------------------------------- */
40
41 /*
42  * Horrible macros for returning unboxed tuples.
43  *
44  * How an unboxed tuple is returned depends on two factors:
45  *    - the number of real registers we have available
46  *    - the boxedness of the returned fields.
47  *
48  * To return an unboxed tuple from a primitive operation, we have macros
49  * RET_<layout> where <layout> describes the boxedness of each field of the
50  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
51  *
52  * We only define the cases actually used, to avoid having too much
53  * garbage in this section.  Warning: any bugs in here will be hard to
54  * track down.
55  */
56
57 /*------ All Regs available */
58 #if defined(REG_R8)
59 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
60 # define RET_N(a)     RET_P(a)
61
62 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
63 # define RET_NN(a,b)  RET_PP(a,b)
64 # define RET_NP(a,b)  RET_PP(a,b)
65
66 # define RET_PPP(a,b,c) \
67         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
68 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
69
70 # define RET_NNNP(a,b,c,d) \
71         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
72         JMP_(ENTRY_CODE(Sp[0]));
73
74 # define RET_NPNP(a,b,c,d) \
75         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
76         JMP_(ENTRY_CODE(Sp[0]));
77
78 # define RET_NNPNNP(a,b,c,d,e,f) \
79         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
80         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
81         JMP_(ENTRY_CODE(Sp[0]));
82
83 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
84       defined(REG_R4) || defined(REG_R3)
85 # error RET_n macros not defined for this setup.
86
87 /*------ 2 Registers available */
88 #elif defined(REG_R2)
89
90 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
91 # define RET_N(a)     RET_P(a)
92
93 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
94                        JMP_(ENTRY_CODE(Sp[0]));
95 # define RET_NN(a,b)   RET_PP(a,b)
96 # define RET_NP(a,b)   RET_PP(a,b)
97
98 # define RET_PPP(a,b,c) \
99         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
100         JMP_(ENTRY_CODE(Sp[1]));
101 # define RET_NNP(a,b,c) \
102         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
103         JMP_(ENTRY_CODE(Sp[1]));
104
105 # define RET_NNNP(a,b,c,d)                      \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108     /*  Sp[-3] = ARGTAG(1); */                  \
109         Sp[-2] = (W_)(c);                       \
110         Sp[-1] = (W_)(d);                       \
111         Sp -= 3;                                \
112         JMP_(ENTRY_CODE(Sp[3]));
113
114 # define RET_NPNP(a,b,c,d)                      \
115         R1.w = (W_)(a);                         \
116         R2.w = (W_)(b);                         \
117     /*  Sp[-3] = ARGTAG(1); */                  \
118         Sp[-2] = (W_)(c);                       \
119         Sp[-1] = (W_)(d);                       \
120         Sp -= 3;                                \
121         JMP_(ENTRY_CODE(Sp[3]));
122
123 # define RET_NNPNNP(a,b,c,d,e,f)                \
124         R1.w = (W_)(a);                         \
125         R2.w = (W_)(b);                         \
126         Sp[-6] = (W_)(c);                       \
127         /* Sp[-5] = ARGTAG(1); */               \
128         Sp[-4] = (W_)(d);                       \
129         /* Sp[-3] = ARGTAG(1); */               \
130         Sp[-2] = (W_)(e);                       \
131         Sp[-1] = (W_)(f);                       \
132         Sp -= 6;                                \
133         JMP_(ENTRY_CODE(Sp[6]));
134
135 /*------ 1 Register available */
136 #elif defined(REG_R1)
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c) \
147         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
148         JMP_(ENTRY_CODE(Sp[2]));
149 # define RET_NNP(a,b,c) \
150         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
151         JMP_(ENTRY_CODE(Sp[3]));
152
153 # define RET_NNNP(a,b,c,d)                      \
154         R1.w = (W_)(a);                         \
155     /*  Sp[-5] = ARGTAG(1); */                  \
156         Sp[-4] = (W_)(b);                       \
157     /*  Sp[-3] = ARGTAG(1); */                  \
158         Sp[-2] = (W_)(c);                       \
159         Sp[-1] = (W_)(d);                       \
160         Sp -= 5;                                \
161         JMP_(ENTRY_CODE(Sp[5]));
162
163 # define RET_NPNP(a,b,c,d)                      \
164         R1.w = (W_)(a);                         \
165         Sp[-4] = (W_)(b);                       \
166     /*  Sp[-3] = ARGTAG(1); */                  \
167         Sp[-2] = (W_)(c);                       \
168         Sp[-1] = (W_)(d);                       \
169         Sp -= 4;                                \
170         JMP_(ENTRY_CODE(Sp[4]));
171
172 # define RET_NNPNNP(a,b,c,d,e,f)                \
173         R1.w = (W_)(a);                         \
174         Sp[-1] = (W_)(f);                       \
175         Sp[-2] = (W_)(e);                       \
176         /* Sp[-3] = ARGTAG(1); */               \
177         Sp[-4] = (W_)(d);                       \
178         /* Sp[-5] = ARGTAG(1); */               \
179         Sp[-6] = (W_)(c);                       \
180         Sp[-7] = (W_)(b);                       \
181         /* Sp[-8] = ARGTAG(1); */               \
182         Sp -= 8;                                \
183         JMP_(ENTRY_CODE(Sp[8]));
184
185 #else /* 0 Regs available */
186
187 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
188
189 #ifdef DEBUG
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
191 #else
192 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
193 #endif
194
195 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
196
197 /* Here's how to construct these macros:
198  *
199  *   N = number of N's in the name;
200  *   P = number of P's in the name;
201  *   s = N * 2 + P;
202  *   while (nonNull(name)) {
203  *     if (nextChar == 'P') {
204  *       PUSH_P(s,_);
205  *       s -= 1;
206  *     } else {
207  *       PUSH_N(s,_);
208  *       s -= 2
209  *     }
210  *   }
211  *   PUSHED(N * 2 + P);
212  */
213
214 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
215 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
216
217 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
218 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
219 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
220
221 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
222 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
223
224 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
225 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
226 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
227
228 #endif
229
230 /*-----------------------------------------------------------------------------
231   Array Primitives
232
233   Basically just new*Array - the others are all inline macros.
234
235   The size arg is always passed in R1, and the result returned in R1.
236
237   The slow entry point is for returning from a heap check, the saved
238   size argument must be re-loaded from the stack.
239   -------------------------------------------------------------------------- */
240
241 /* for objects that are *less* than the size of a word, make sure we
242  * round up to the nearest word for the size of the array.
243  */
244
245 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
246
247 FN_(newByteArrayzh_fast)                                \
248  {                                                      \
249    W_ size, stuff_size, n;                              \
250    StgArrWords* p;                                      \
251    FB_                                                  \
252      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);             \
253      n = R1.w;                                          \
254      stuff_size = BYTES_TO_STGWORDS(n);                 \
255      size = sizeofW(StgArrWords)+ stuff_size;           \
256      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); \
257      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); \
258      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);             \
259      p->words = stuff_size;                             \
260      TICK_RET_UNBOXED_TUP(1)                            \
261      RET_P(p);                                          \
262    FE_                                                  \
263  }
264
265 FN_(newPinnedByteArrayzh_fast)                                  \
266  {                                                              \
267    W_ size, stuff_size, n;                                      \
268    StgArrWords* p;                                              \
269    FB_                                                          \
270      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);               \
271      n = R1.w;                                                  \
272      stuff_size = BYTES_TO_STGWORDS(n);                         \
273      size = sizeofW(StgArrWords)+ stuff_size;                   \
274      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);   \
275      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);        \
276      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);                     \
277      p->words = stuff_size;                                     \
278      TICK_RET_UNBOXED_TUP(1)                                    \
279      RET_P(p);                                                  \
280    FE_                                                          \
281  }
282
283 FN_(newArrayzh_fast)
284 {
285   W_ size, n, init;
286   StgMutArrPtrs* arr;
287   StgPtr p;
288   FB_
289     n = R1.w;
290
291     MAYBE_GC(R2_PTR,newArrayzh_fast);
292
293     size = sizeofW(StgMutArrPtrs) + n;
294     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
295     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
296
297     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
298     arr->ptrs = n;
299
300     init = R2.w;
301     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
302          p < (P_)arr + size; p++) {
303         *p = (W_)init;
304     }
305
306     TICK_RET_UNBOXED_TUP(1);
307     RET_P(arr);
308   FE_
309 }
310
311 FN_(newMutVarzh_fast)
312 {
313   StgMutVar* mv;
314   /* Args: R1.p = initialisation value */
315   FB_
316
317   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
318   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
319   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
320
321   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
322   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
323   mv->var = R1.cl;
324
325   TICK_RET_UNBOXED_TUP(1);
326   RET_P(mv);
327   FE_
328 }
329
330 /* -----------------------------------------------------------------------------
331    Foreign Object Primitives
332    -------------------------------------------------------------------------- */
333
334 FN_(mkForeignObjzh_fast)
335 {
336   /* R1.p = ptr to foreign object,
337   */
338   StgForeignObj *result;
339   FB_
340
341   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
342   TICK_ALLOC_PRIM(sizeofW(StgHeader),
343                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
344   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
345
346   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
347   SET_HDR(result,&stg_FOREIGN_info,CCCS);
348   result->data = R1.p;
349
350   /* returns (# s#, ForeignObj# #) */
351   TICK_RET_UNBOXED_TUP(1);
352   RET_P(result);
353   FE_
354 }
355
356 /* These two are out-of-line for the benefit of the NCG */
357 FN_(unsafeThawArrayzh_fast)
358 {
359   FB_
360   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
361   recordMutable((StgMutClosure*)R1.cl);
362
363   TICK_RET_UNBOXED_TUP(1);
364   RET_P(R1.p);
365   FE_
366 }
367
368 /* -----------------------------------------------------------------------------
369    Weak Pointer Primitives
370    -------------------------------------------------------------------------- */
371
372 FN_(mkWeakzh_fast)
373 {
374   /* R1.p = key
375      R2.p = value
376      R3.p = finalizer (or NULL)
377   */
378   StgWeak *w;
379   FB_
380
381   if (R3.cl == NULL) {
382     R3.cl = &stg_NO_FINALIZER_closure;
383   }
384
385   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
387                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
388   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
389
390   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
391   SET_HDR(w, &stg_WEAK_info, CCCS);
392
393   w->key        = R1.cl;
394   w->value      = R2.cl;
395   w->finalizer  = R3.cl;
396
397   w->link       = weak_ptr_list;
398   weak_ptr_list = w;
399   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
400
401   TICK_RET_UNBOXED_TUP(1);
402   RET_P(w);
403   FE_
404 }
405
406 FN_(finalizzeWeakzh_fast)
407 {
408   /* R1.p = weak ptr
409    */
410   StgDeadWeak *w;
411   StgClosure *f;
412   FB_
413   TICK_RET_UNBOXED_TUP(0);
414   w = (StgDeadWeak *)R1.p;
415
416   /* already dead? */
417   if (w->header.info == &stg_DEAD_WEAK_info) {
418       RET_NP(0,&stg_NO_FINALIZER_closure);
419   }
420
421   /* kill it */
422 #ifdef PROFILING
423   // @LDV profiling
424   // A weak pointer is inherently used, so we do not need to call
425   // LDV_recordDead_FILL_SLOP_DYNAMIC():
426   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
427   // or, LDV_recordDead():
428   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
429   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
430   // large as weak pointers, so there is no need to fill the slop, either.
431   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
432 #endif
433   //
434   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
435   //
436   w->header.info = &stg_DEAD_WEAK_info;
437 #ifdef PROFILING
438   // @LDV profiling
439   LDV_recordCreate((StgClosure *)w);
440 #endif
441   f = ((StgWeak *)w)->finalizer;
442   w->link = ((StgWeak *)w)->link;
443
444   /* return the finalizer */
445   if (f == &stg_NO_FINALIZER_closure) {
446       RET_NP(0,&stg_NO_FINALIZER_closure);
447   } else {
448       RET_NP(1,f);
449   }
450   FE_
451 }
452
453 FN_(deRefWeakzh_fast)
454 {
455   /* R1.p = weak ptr */
456   StgWeak* w;
457   I_       code;
458   P_       val;
459   FB_
460   w = (StgWeak*)R1.p;
461   if (w->header.info == &stg_WEAK_info) {
462     code = 1;
463     val = (P_)((StgWeak *)w)->value;
464   } else {
465     code = 0;
466     val = (P_)w;
467   }
468   RET_NP(code,val);
469   FE_
470 }
471
472 /* -----------------------------------------------------------------------------
473    Arbitrary-precision Integer operations.
474    -------------------------------------------------------------------------- */
475
476 FN_(int2Integerzh_fast)
477 {
478    /* arguments: R1 = Int# */
479
480    I_ val, s;           /* to avoid aliasing */
481    StgArrWords* p;      /* address of array result */
482    FB_
483
484    val = R1.i;
485    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
486    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
487    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
488
489    p = (StgArrWords *)Hp - 1;
490    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
491
492    /* mpz_set_si is inlined here, makes things simpler */
493    if (val < 0) { 
494         s  = -1;
495         *Hp = -val;
496    } else if (val > 0) {
497         s = 1;
498         *Hp = val;
499    } else {
500         s = 0;
501    }
502
503    /* returns (# size  :: Int#, 
504                  data  :: ByteArray# 
505                #)
506    */
507    TICK_RET_UNBOXED_TUP(2);
508    RET_NP(s,p);
509    FE_
510 }
511
512 FN_(word2Integerzh_fast)
513 {
514    /* arguments: R1 = Word# */
515
516    W_ val;              /* to avoid aliasing */
517    I_  s;
518    StgArrWords* p;      /* address of array result */
519    FB_
520
521    val = R1.w;
522    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
523    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
524    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
525
526    p = (StgArrWords *)Hp - 1;
527    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
528
529    if (val != 0) {
530         s = 1;
531         *Hp = val;
532    } else {
533         s = 0;
534    }
535
536    /* returns (# size  :: Int#, 
537                  data  :: ByteArray# 
538                #)
539    */
540    TICK_RET_UNBOXED_TUP(2);
541    RET_NP(s,p);
542    FE_
543 }
544
545
546 /*
547  * 'long long' primops for converting to/from Integers.
548  */
549
550 #ifdef SUPPORT_LONG_LONGS
551
552 FN_(int64ToIntegerzh_fast)
553 {
554    /* arguments: L1 = Int64# */
555
556    StgInt64  val; /* to avoid aliasing */
557    W_ hi;
558    I_  s, neg, words_needed;
559    StgArrWords* p;      /* address of array result */
560    FB_
561
562    val = (LI_)L1;
563    neg = 0;
564
565    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
566        words_needed = 2;
567    } else { 
568        /* minimum is one word */
569        words_needed = 1;
570    }
571    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
572    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
573    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
574
575    p = (StgArrWords *)(Hp-words_needed+1) - 1;
576    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
577
578    if ( val < 0LL ) {
579      neg = 1;
580      val = -val;
581    }
582
583    hi = (W_)((LW_)val / 0x100000000ULL);
584
585    if ( words_needed == 2 )  { 
586       s = 2;
587       Hp[-1] = (W_)val;
588       Hp[0] = hi;
589    } else if ( val != 0 ) {
590       s = 1;
591       Hp[0] = (W_)val;
592    }  else /* val==0 */   {
593       s = 0;
594    }
595    s = ( neg ? -s : s );
596
597    /* returns (# size  :: Int#, 
598                  data  :: ByteArray# 
599                #)
600    */
601    TICK_RET_UNBOXED_TUP(2);
602    RET_NP(s,p);
603    FE_
604 }
605
606 FN_(word64ToIntegerzh_fast)
607 {
608    /* arguments: L1 = Word64# */
609
610    StgWord64 val; /* to avoid aliasing */
611    StgWord hi;
612    I_  s, words_needed;
613    StgArrWords* p;      /* address of array result */
614    FB_
615
616    val = (LW_)L1;
617    if ( val >= 0x100000000ULL ) {
618       words_needed = 2;
619    } else {
620       words_needed = 1;
621    }
622    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
623    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
624    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
625
626    p = (StgArrWords *)(Hp-words_needed+1) - 1;
627    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
628
629    hi = (W_)((LW_)val / 0x100000000ULL);
630    if ( val >= 0x100000000ULL ) { 
631      s = 2;
632      Hp[-1] = ((W_)val);
633      Hp[0]  = (hi);
634    } else if ( val != 0 )      {
635       s = 1;
636       Hp[0] = ((W_)val);
637    } else /* val==0 */         {
638       s = 0;
639    }
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650
651 #endif /* SUPPORT_LONG_LONGS */
652
653 /* ToDo: this is shockingly inefficient */
654
655 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
656 FN_(name)                                                               \
657 {                                                                       \
658   MP_INT arg1, arg2, result;                                            \
659   I_ s1, s2;                                                            \
660   StgArrWords* d1;                                                      \
661   StgArrWords* d2;                                                      \
662   FB_                                                                   \
663                                                                         \
664   /* call doYouWantToGC() */                                            \
665   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
666                                                                         \
667   d1 = (StgArrWords *)R2.p;                                             \
668   s1 = R1.i;                                                            \
669   d2 = (StgArrWords *)R4.p;                                             \
670   s2 = R3.i;                                                            \
671                                                                         \
672   arg1._mp_alloc        = d1->words;                                    \
673   arg1._mp_size         = (s1);                                         \
674   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
675   arg2._mp_alloc        = d2->words;                                    \
676   arg2._mp_size         = (s2);                                         \
677   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
678                                                                         \
679   STGCALL1(mpz_init,&result);                                           \
680                                                                         \
681   /* Perform the operation */                                           \
682   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
683                                                                         \
684   TICK_RET_UNBOXED_TUP(2);                                              \
685   RET_NP(result._mp_size,                                               \
686          result._mp_d-sizeofW(StgArrWords));                            \
687   FE_                                                                   \
688 }
689
690 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
691 FN_(name)                                                               \
692 {                                                                       \
693   MP_INT arg1, result;                                                  \
694   I_ s1;                                                                \
695   StgArrWords* d1;                                                      \
696   FB_                                                                   \
697                                                                         \
698   /* call doYouWantToGC() */                                            \
699   MAYBE_GC(R2_PTR, name);                                               \
700                                                                         \
701   d1 = (StgArrWords *)R2.p;                                             \
702   s1 = R1.i;                                                            \
703                                                                         \
704   arg1._mp_alloc        = d1->words;                                    \
705   arg1._mp_size         = (s1);                                         \
706   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
707                                                                         \
708   STGCALL1(mpz_init,&result);                                           \
709                                                                         \
710   /* Perform the operation */                                           \
711   STGCALL2(mp_fun,&result,&arg1);                                       \
712                                                                         \
713   TICK_RET_UNBOXED_TUP(2);                                              \
714   RET_NP(result._mp_size,                                               \
715          result._mp_d-sizeofW(StgArrWords));                            \
716   FE_                                                                   \
717 }
718
719 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
720 FN_(name)                                                               \
721 {                                                                       \
722   MP_INT arg1, arg2, result1, result2;                                  \
723   I_ s1, s2;                                                            \
724   StgArrWords* d1;                                                      \
725   StgArrWords* d2;                                                      \
726   FB_                                                                   \
727                                                                         \
728   /* call doYouWantToGC() */                                            \
729   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
730                                                                         \
731   d1 = (StgArrWords *)R2.p;                                             \
732   s1 = R1.i;                                                            \
733   d2 = (StgArrWords *)R4.p;                                             \
734   s2 = R3.i;                                                            \
735                                                                         \
736   arg1._mp_alloc        = d1->words;                                    \
737   arg1._mp_size         = (s1);                                         \
738   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
739   arg2._mp_alloc        = d2->words;                                    \
740   arg2._mp_size         = (s2);                                         \
741   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
742                                                                         \
743   STGCALL1(mpz_init,&result1);                                          \
744   STGCALL1(mpz_init,&result2);                                          \
745                                                                         \
746   /* Perform the operation */                                           \
747   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
748                                                                         \
749   TICK_RET_UNBOXED_TUP(4);                                              \
750   RET_NPNP(result1._mp_size,                                            \
751            result1._mp_d-sizeofW(StgArrWords),                          \
752            result2._mp_size,                                            \
753            result2._mp_d-sizeofW(StgArrWords));                         \
754   FE_                                                                   \
755 }
756
757 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
758 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
759 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
760 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
761 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
762 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
763 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
764 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
765 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
766 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
767 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
768
769 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
770 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
771
772
773 FN_(gcdIntzh_fast)
774 {
775   /* R1 = the first Int#; R2 = the second Int# */
776   mp_limb_t aa;
777   I_ r;
778   FB_
779   aa = (mp_limb_t)(R1.i);
780   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
781
782   R1.i = r;
783   /* Result parked in R1, return via info-pointer at TOS */
784   JMP_(ENTRY_CODE(Sp[0]));
785   FE_
786 }
787
788 FN_(gcdIntegerIntzh_fast)
789 {
790   /* R1 = s1; R2 = d1; R3 = the int */
791   I_ r;
792   FB_
793   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
794   RET_N(r);
795   FE_
796 }
797
798 FN_(cmpIntegerIntzh_fast)
799 {
800   /* R1 = s1; R2 = d1; R3 = the int */
801   I_ usize;
802   I_ vsize;
803   I_ v_digit;
804   mp_limb_t u_digit;
805   FB_
806
807   usize = R1.i;
808   vsize = 0;
809   v_digit = R3.i;
810
811   // paraphrased from mpz_cmp_si() in the GMP sources
812   if (v_digit > 0) {
813       vsize = 1;
814   } else if (v_digit < 0) {
815       vsize = -1;
816       v_digit = -v_digit;
817   }
818
819   if (usize != vsize) {
820     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
821   }
822
823   if (usize == 0) {
824     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
825   }
826
827   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
828
829   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
830     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
831   }
832
833   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
834     R1.i = usize; 
835   } else {
836     R1.i = -usize; 
837   }
838
839   JMP_(ENTRY_CODE(Sp[0]));
840   FE_
841 }
842
843 FN_(cmpIntegerzh_fast)
844 {
845   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
846   I_ usize;
847   I_ vsize;
848   I_ size;
849   StgPtr up, vp;
850   int cmp;
851   FB_
852
853   // paraphrased from mpz_cmp() in the GMP sources
854   usize = R1.i;
855   vsize = R3.i;
856
857   if (usize != vsize) {
858     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
859   }
860
861   if (usize == 0) {
862     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
863   }
864
865   size = abs(usize);
866
867   up = BYTE_ARR_CTS(R2.p);
868   vp = BYTE_ARR_CTS(R4.p);
869
870   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
871
872   if (cmp == 0) {
873     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
874   }
875
876   if ((cmp < 0) == (usize < 0)) {
877     R1.i = 1;
878   } else {
879     R1.i = (-1); 
880   }
881   /* Result parked in R1, return via info-pointer at TOS */
882   JMP_(ENTRY_CODE(Sp[0]));
883   FE_
884 }
885
886 FN_(integer2Intzh_fast)
887 {
888   /* R1 = s; R2 = d */
889   I_ r, s;
890   FB_
891   s = R1.i;
892   if (s == 0)
893     r = 0;
894   else {
895     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
896     if (s < 0) r = -r;
897   }
898   /* Result parked in R1, return via info-pointer at TOS */
899   R1.i = r;
900   JMP_(ENTRY_CODE(Sp[0]));
901   FE_
902 }
903
904 FN_(integer2Wordzh_fast)
905 {
906   /* R1 = s; R2 = d */
907   I_ s;
908   W_ r;
909   FB_
910   s = R1.i;
911   if (s == 0)
912     r = 0;
913   else {
914     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
915     if (s < 0) r = -r;
916   }
917   /* Result parked in R1, return via info-pointer at TOS */
918   R1.w = r;
919   JMP_(ENTRY_CODE(Sp[0]));
920   FE_
921 }
922
923
924 FN_(decodeFloatzh_fast)
925
926   MP_INT mantissa;
927   I_ exponent;
928   StgArrWords* p;
929   StgFloat arg;
930   FB_
931
932   /* arguments: F1 = Float# */
933   arg = F1;
934
935   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
936   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
937   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
938
939   /* Be prepared to tell Lennart-coded __decodeFloat    */
940   /* where mantissa._mp_d can be put (it does not care about the rest) */
941   p = (StgArrWords *)Hp - 1;
942   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
943   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
944
945   /* Perform the operation */
946   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
947
948   /* returns: (Int# (expn), Int#, ByteArray#) */
949   TICK_RET_UNBOXED_TUP(3);
950   RET_NNP(exponent,mantissa._mp_size,p);
951   FE_
952 }
953
954 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
955 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
956
957 FN_(decodeDoublezh_fast)
958 { MP_INT mantissa;
959   I_ exponent;
960   StgDouble arg;
961   StgArrWords* p;
962   FB_
963
964   /* arguments: D1 = Double# */
965   arg = D1;
966
967   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
968   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
969   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
970
971   /* Be prepared to tell Lennart-coded __decodeDouble   */
972   /* where mantissa.d can be put (it does not care about the rest) */
973   p = (StgArrWords *)(Hp-ARR_SIZE+1);
974   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
975   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
976
977   /* Perform the operation */
978   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
979
980   /* returns: (Int# (expn), Int#, ByteArray#) */
981   TICK_RET_UNBOXED_TUP(3);
982   RET_NNP(exponent,mantissa._mp_size,p);
983   FE_
984 }
985
986 /* -----------------------------------------------------------------------------
987  * Concurrency primitives
988  * -------------------------------------------------------------------------- */
989
990 FN_(forkzh_fast)
991 {
992   FB_
993   /* args: R1 = closure to spark */
994   
995   MAYBE_GC(R1_PTR, forkzh_fast);
996
997   /* create it right now, return ThreadID in R1 */
998   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
999                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1000   STGCALL1(scheduleThread, R1.t);
1001       
1002   /* switch at the earliest opportunity */ 
1003   context_switch = 1;
1004   
1005   RET_N(R1.t);
1006   FE_
1007 }
1008
1009 FN_(yieldzh_fast)
1010 {
1011   FB_
1012   JMP_(stg_yield_noregs);
1013   FE_
1014 }
1015
1016 FN_(myThreadIdzh_fast)
1017 {
1018   /* no args. */
1019   FB_
1020   RET_N((P_)CurrentTSO);
1021   FE_
1022 }
1023
1024
1025
1026
1027 /* -----------------------------------------------------------------------------
1028  * MVar primitives
1029  *
1030  * take & putMVar work as follows.  Firstly, an important invariant:
1031  *
1032  *    If the MVar is full, then the blocking queue contains only
1033  *    threads blocked on putMVar, and if the MVar is empty then the
1034  *    blocking queue contains only threads blocked on takeMVar.
1035  *
1036  * takeMvar:
1037  *    MVar empty : then add ourselves to the blocking queue
1038  *    MVar full  : remove the value from the MVar, and
1039  *                 blocking queue empty     : return
1040  *                 blocking queue non-empty : perform the first blocked putMVar
1041  *                                            from the queue, and wake up the
1042  *                                            thread (MVar is now full again)
1043  *
1044  * putMVar is just the dual of the above algorithm.
1045  *
1046  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1047  * the stack of the thread waiting to do the putMVar.  See
1048  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1049  * the stack layout, and the PerformPut and PerformTake macros below.
1050  *
1051  * It is important that a blocked take or put is woken up with the
1052  * take/put already performed, because otherwise there would be a
1053  * small window of vulnerability where the thread could receive an
1054  * exception and never perform its take or put, and we'd end up with a
1055  * deadlock.
1056  *
1057  * -------------------------------------------------------------------------- */
1058
1059 FN_(isEmptyMVarzh_fast)
1060 {
1061   /* args: R1 = MVar closure */
1062   I_ r;
1063   FB_
1064   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1065   RET_N(r);
1066   FE_
1067 }
1068
1069
1070 FN_(newMVarzh_fast)
1071 {
1072   StgMVar *mvar;
1073
1074   FB_
1075   /* args: none */
1076
1077   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1078   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1079                   1, 0);
1080   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1081   
1082   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1083   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1084   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1085   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1086
1087   TICK_RET_UNBOXED_TUP(1);
1088   RET_P(mvar);
1089   FE_
1090 }
1091
1092 #define PerformTake(tso, value) ({                      \
1093     (tso)->sp[1] = (W_)value;                           \
1094     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1095   })
1096
1097 #define PerformPut(tso) ({                              \
1098     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1099     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1100     (tso)->sp += 2;                                     \
1101     val;                                                \
1102   })
1103
1104 FN_(takeMVarzh_fast)
1105 {
1106   StgMVar *mvar;
1107   StgClosure *val;
1108   const StgInfoTable *info;
1109
1110   FB_
1111   /* args: R1 = MVar closure */
1112
1113   mvar = (StgMVar *)R1.p;
1114
1115 #ifdef SMP
1116   info = LOCK_CLOSURE(mvar);
1117 #else
1118   info = GET_INFO(mvar);
1119 #endif
1120
1121   /* If the MVar is empty, put ourselves on its blocking queue,
1122    * and wait until we're woken up.
1123    */
1124   if (info == &stg_EMPTY_MVAR_info) {
1125     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1126       mvar->head = CurrentTSO;
1127     } else {
1128       mvar->tail->link = CurrentTSO;
1129     }
1130     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1131     CurrentTSO->why_blocked = BlockedOnMVar;
1132     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1133     mvar->tail = CurrentTSO;
1134
1135 #ifdef SMP
1136     /* unlock the MVar */
1137     mvar->header.info = &stg_EMPTY_MVAR_info;
1138 #endif
1139     JMP_(stg_block_takemvar);
1140   }
1141
1142   /* we got the value... */
1143   val = mvar->value;
1144
1145   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1146       /* There are putMVar(s) waiting... 
1147        * wake up the first thread on the queue
1148        */
1149       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1150
1151       /* actually perform the putMVar for the thread that we just woke up */
1152       mvar->value = PerformPut(mvar->head);
1153
1154 #if defined(GRAN) || defined(PAR)
1155       /* ToDo: check 2nd arg (mvar) is right */
1156       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1157 #else
1158       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1159 #endif
1160       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1161           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1162       }
1163 #ifdef SMP
1164       /* unlock in the SMP case */
1165       SET_INFO(mvar,&stg_FULL_MVAR_info);
1166 #endif
1167       TICK_RET_UNBOXED_TUP(1);
1168       RET_P(val);
1169   } else {
1170       /* No further putMVars, MVar is now empty */
1171
1172       /* do this last... we might have locked the MVar in the SMP case,
1173        * and writing the info pointer will unlock it.
1174        */
1175       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1176       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1177       TICK_RET_UNBOXED_TUP(1);
1178       RET_P(val);
1179   }
1180   FE_
1181 }
1182
1183 FN_(tryTakeMVarzh_fast)
1184 {
1185   StgMVar *mvar;
1186   StgClosure *val;
1187   const StgInfoTable *info;
1188
1189   FB_
1190   /* args: R1 = MVar closure */
1191
1192   mvar = (StgMVar *)R1.p;
1193
1194 #ifdef SMP
1195   info = LOCK_CLOSURE(mvar);
1196 #else
1197   info = GET_INFO(mvar);
1198 #endif
1199
1200   if (info == &stg_EMPTY_MVAR_info) {
1201
1202 #ifdef SMP
1203       /* unlock the MVar */
1204       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1205 #endif
1206
1207       /* HACK: we need a pointer to pass back, 
1208        * so we abuse NO_FINALIZER_closure
1209        */
1210       RET_NP(0, &stg_NO_FINALIZER_closure);
1211   }
1212
1213   /* we got the value... */
1214   val = mvar->value;
1215
1216   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1217       /* There are putMVar(s) waiting... 
1218        * wake up the first thread on the queue
1219        */
1220       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1221
1222       /* actually perform the putMVar for the thread that we just woke up */
1223       mvar->value = PerformPut(mvar->head);
1224
1225 #if defined(GRAN) || defined(PAR)
1226       /* ToDo: check 2nd arg (mvar) is right */
1227       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1228 #else
1229       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1230 #endif
1231       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1232           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1233       }
1234 #ifdef SMP
1235       /* unlock in the SMP case */
1236       SET_INFO(mvar,&stg_FULL_MVAR_info);
1237 #endif
1238   } else {
1239       /* No further putMVars, MVar is now empty */
1240       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1241
1242       /* do this last... we might have locked the MVar in the SMP case,
1243        * and writing the info pointer will unlock it.
1244        */
1245       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1246   }
1247
1248   TICK_RET_UNBOXED_TUP(1);
1249   RET_NP((I_)1, val);
1250   FE_
1251 }
1252
1253 FN_(putMVarzh_fast)
1254 {
1255   StgMVar *mvar;
1256   const StgInfoTable *info;
1257
1258   FB_
1259   /* args: R1 = MVar, R2 = value */
1260
1261   mvar = (StgMVar *)R1.p;
1262
1263 #ifdef SMP
1264   info = LOCK_CLOSURE(mvar);
1265 #else
1266   info = GET_INFO(mvar);
1267 #endif
1268
1269   if (info == &stg_FULL_MVAR_info) {
1270     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1271       mvar->head = CurrentTSO;
1272     } else {
1273       mvar->tail->link = CurrentTSO;
1274     }
1275     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1276     CurrentTSO->why_blocked = BlockedOnMVar;
1277     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1278     mvar->tail = CurrentTSO;
1279
1280 #ifdef SMP
1281     /* unlock the MVar */
1282     SET_INFO(mvar,&stg_FULL_MVAR_info);
1283 #endif
1284     JMP_(stg_block_putmvar);
1285   }
1286   
1287   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1288       /* There are takeMVar(s) waiting: wake up the first one
1289        */
1290       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1291
1292       /* actually perform the takeMVar */
1293       PerformTake(mvar->head, R2.cl);
1294       
1295 #if defined(GRAN) || defined(PAR)
1296       /* ToDo: check 2nd arg (mvar) is right */
1297       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1298 #else
1299       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1300 #endif
1301       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1302           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1303       }
1304 #ifdef SMP
1305       /* unlocks the MVar in the SMP case */
1306       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1307 #endif
1308       JMP_(ENTRY_CODE(Sp[0]));
1309   } else {
1310       /* No further takes, the MVar is now full. */
1311       mvar->value = R2.cl;
1312       /* unlocks the MVar in the SMP case */
1313       SET_INFO(mvar,&stg_FULL_MVAR_info);
1314       JMP_(ENTRY_CODE(Sp[0]));
1315   }
1316
1317   /* ToDo: yield afterward for better communication performance? */
1318   FE_
1319 }
1320
1321 FN_(tryPutMVarzh_fast)
1322 {
1323   StgMVar *mvar;
1324   const StgInfoTable *info;
1325
1326   FB_
1327   /* args: R1 = MVar, R2 = value */
1328
1329   mvar = (StgMVar *)R1.p;
1330
1331 #ifdef SMP
1332   info = LOCK_CLOSURE(mvar);
1333 #else
1334   info = GET_INFO(mvar);
1335 #endif
1336
1337   if (info == &stg_FULL_MVAR_info) {
1338
1339 #ifdef SMP
1340     /* unlock the MVar */
1341     mvar->header.info = &stg_FULL_MVAR_info;
1342 #endif
1343
1344     RET_N(0);
1345   }
1346   
1347   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1348       /* There are takeMVar(s) waiting: wake up the first one
1349        */
1350       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1351
1352       /* actually perform the takeMVar */
1353       PerformTake(mvar->head, R2.cl);
1354       
1355 #if defined(GRAN) || defined(PAR)
1356       /* ToDo: check 2nd arg (mvar) is right */
1357       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1358 #else
1359       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1360 #endif
1361       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1362           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1363       }
1364 #ifdef SMP
1365       /* unlocks the MVar in the SMP case */
1366       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1367 #endif
1368       JMP_(ENTRY_CODE(Sp[0]));
1369   } else {
1370       /* No further takes, the MVar is now full. */
1371       mvar->value = R2.cl;
1372       /* unlocks the MVar in the SMP case */
1373       SET_INFO(mvar,&stg_FULL_MVAR_info);
1374       JMP_(ENTRY_CODE(Sp[0]));
1375   }
1376
1377   /* ToDo: yield afterward for better communication performance? */
1378   FE_
1379 }
1380
1381 /* -----------------------------------------------------------------------------
1382    Stable pointer primitives
1383    -------------------------------------------------------------------------  */
1384
1385 FN_(makeStableNamezh_fast)
1386 {
1387   StgWord index;
1388   StgStableName *sn_obj;
1389   FB_
1390
1391   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1392   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1393                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1394   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1395   
1396   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1397
1398   /* Is there already a StableName for this heap object? */
1399   if (stable_ptr_table[index].sn_obj == NULL) {
1400     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1401     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1402     sn_obj->sn = index;
1403     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1404   } else {
1405     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1406   }
1407
1408   TICK_RET_UNBOXED_TUP(1);
1409   RET_P(sn_obj);
1410 }
1411
1412
1413 FN_(makeStablePtrzh_fast)
1414 {
1415   /* Args: R1 = a */
1416   StgStablePtr sp;
1417   FB_
1418   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1419   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1420   RET_N(sp);
1421   FE_
1422 }
1423
1424 FN_(deRefStablePtrzh_fast)
1425 {
1426   /* Args: R1 = the stable ptr */
1427   P_ r;
1428   StgStablePtr sp;
1429   FB_
1430   sp = (StgStablePtr)R1.w;
1431   r = stable_ptr_table[(StgWord)sp].addr;
1432   RET_P(r);
1433   FE_
1434 }
1435
1436 /* -----------------------------------------------------------------------------
1437    Bytecode object primitives
1438    -------------------------------------------------------------------------  */
1439
1440 FN_(newBCOzh_fast)
1441 {
1442   /* R1.p = instrs
1443      R2.p = literals
1444      R3.p = ptrs
1445      R4.p = itbls
1446   */
1447   StgBCO *bco;
1448   FB_
1449
1450   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1451   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1452   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1453   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1454   SET_HDR(bco, &stg_BCO_info, CCCS);
1455
1456   bco->instrs     = (StgArrWords*)R1.cl;
1457   bco->literals   = (StgArrWords*)R2.cl;
1458   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1459   bco->itbls      = (StgArrWords*)R4.cl;
1460
1461   TICK_RET_UNBOXED_TUP(1);
1462   RET_P(bco);
1463   FE_
1464 }
1465
1466 FN_(mkApUpd0zh_fast)
1467 {
1468   /* R1.p = the fn for the AP_UPD
1469   */
1470   StgAP_UPD* ap;
1471   FB_
1472   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1473   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1474   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1475   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1476   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1477
1478   ap->n_args = 0;
1479   ap->fun = R1.cl;
1480
1481   TICK_RET_UNBOXED_TUP(1);
1482   RET_P(ap);
1483   FE_
1484 }
1485
1486 /* -----------------------------------------------------------------------------
1487    Thread I/O blocking primitives
1488    -------------------------------------------------------------------------- */
1489
1490 FN_(waitReadzh_fast)
1491 {
1492   FB_
1493     /* args: R1.i */
1494     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1495     CurrentTSO->why_blocked = BlockedOnRead;
1496     CurrentTSO->block_info.fd = R1.i;
1497     ACQUIRE_LOCK(&sched_mutex);
1498     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1499     RELEASE_LOCK(&sched_mutex);
1500     JMP_(stg_block_noregs);
1501   FE_
1502 }
1503
1504 FN_(waitWritezh_fast)
1505 {
1506   FB_
1507     /* args: R1.i */
1508     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1509     CurrentTSO->why_blocked = BlockedOnWrite;
1510     CurrentTSO->block_info.fd = R1.i;
1511     ACQUIRE_LOCK(&sched_mutex);
1512     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1513     RELEASE_LOCK(&sched_mutex);
1514     JMP_(stg_block_noregs);
1515   FE_
1516 }
1517
1518 FN_(delayzh_fast)
1519 {
1520   StgTSO *t, *prev;
1521   nat target;
1522   FB_
1523     /* args: R1.i */
1524     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1525     CurrentTSO->why_blocked = BlockedOnDelay;
1526
1527     ACQUIRE_LOCK(&sched_mutex);
1528
1529     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1530     CurrentTSO->block_info.target = target;
1531
1532     /* Insert the new thread in the sleeping queue. */
1533     prev = NULL;
1534     t = sleeping_queue;
1535     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1536         prev = t;
1537         t = t->link;
1538     }
1539
1540     CurrentTSO->link = t;
1541     if (prev == NULL) {
1542         sleeping_queue = CurrentTSO;
1543     } else {
1544         prev->link = CurrentTSO;
1545     }
1546
1547     RELEASE_LOCK(&sched_mutex);
1548     JMP_(stg_block_noregs);
1549   FE_
1550 }
1551