0d2e75298b452d84f3b3632eff6a9ae2e9cf00a5
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.98 2002/04/23 11:22:12 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 /* ** temporary **
26
27    classes CCallable and CReturnable don't really exist, but the
28    compiler insists on generating dictionaries containing references
29    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
30    for these.  Some C compilers can't cope with zero-length static arrays,
31    so we have to make these one element long.
32 */
33
34 StgWord GHC_ZCCCallable_static_info[1];
35 StgWord GHC_ZCCReturnable_static_info[1];
36   
37 /* -----------------------------------------------------------------------------
38    Macros for Hand-written primitives.
39    -------------------------------------------------------------------------- */
40
41 /*
42  * Horrible macros for returning unboxed tuples.
43  *
44  * How an unboxed tuple is returned depends on two factors:
45  *    - the number of real registers we have available
46  *    - the boxedness of the returned fields.
47  *
48  * To return an unboxed tuple from a primitive operation, we have macros
49  * RET_<layout> where <layout> describes the boxedness of each field of the
50  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
51  *
52  * We only define the cases actually used, to avoid having too much
53  * garbage in this section.  Warning: any bugs in here will be hard to
54  * track down.
55  */
56
57 /*------ All Regs available */
58 #if defined(REG_R8)
59 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
60 # define RET_N(a)     RET_P(a)
61
62 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
63 # define RET_NN(a,b)  RET_PP(a,b)
64 # define RET_NP(a,b)  RET_PP(a,b)
65
66 # define RET_PPP(a,b,c) \
67         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
68 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
69
70 # define RET_NNNP(a,b,c,d) \
71         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
72         JMP_(ENTRY_CODE(Sp[0]));
73
74 # define RET_NPNP(a,b,c,d) \
75         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
76         JMP_(ENTRY_CODE(Sp[0]));
77
78 # define RET_NNPNNP(a,b,c,d,e,f) \
79         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
80         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
81         JMP_(ENTRY_CODE(Sp[0]));
82
83 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
84       defined(REG_R4) || defined(REG_R3)
85 # error RET_n macros not defined for this setup.
86
87 /*------ 2 Registers available */
88 #elif defined(REG_R2)
89
90 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
91 # define RET_N(a)     RET_P(a)
92
93 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
94                        JMP_(ENTRY_CODE(Sp[0]));
95 # define RET_NN(a,b)   RET_PP(a,b)
96 # define RET_NP(a,b)   RET_PP(a,b)
97
98 # define RET_PPP(a,b,c) \
99         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
100         JMP_(ENTRY_CODE(Sp[1]));
101 # define RET_NNP(a,b,c) \
102         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
103         JMP_(ENTRY_CODE(Sp[1]));
104
105 # define RET_NNNP(a,b,c,d)                      \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108     /*  Sp[-3] = ARGTAG(1); */                  \
109         Sp[-2] = (W_)(c);                       \
110         Sp[-1] = (W_)(d);                       \
111         Sp -= 3;                                \
112         JMP_(ENTRY_CODE(Sp[3]));
113
114 # define RET_NPNP(a,b,c,d)                      \
115         R1.w = (W_)(a);                         \
116         R2.w = (W_)(b);                         \
117     /*  Sp[-3] = ARGTAG(1); */                  \
118         Sp[-2] = (W_)(c);                       \
119         Sp[-1] = (W_)(d);                       \
120         Sp -= 3;                                \
121         JMP_(ENTRY_CODE(Sp[3]));
122
123 # define RET_NNPNNP(a,b,c,d,e,f)                \
124         R1.w = (W_)(a);                         \
125         R2.w = (W_)(b);                         \
126         Sp[-6] = (W_)(c);                       \
127         /* Sp[-5] = ARGTAG(1); */               \
128         Sp[-4] = (W_)(d);                       \
129         /* Sp[-3] = ARGTAG(1); */               \
130         Sp[-2] = (W_)(e);                       \
131         Sp[-1] = (W_)(f);                       \
132         Sp -= 6;                                \
133         JMP_(ENTRY_CODE(Sp[6]));
134
135 /*------ 1 Register available */
136 #elif defined(REG_R1)
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c) \
147         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
148         JMP_(ENTRY_CODE(Sp[2]));
149 # define RET_NNP(a,b,c) \
150         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
151         JMP_(ENTRY_CODE(Sp[3]));
152
153 # define RET_NNNP(a,b,c,d)                      \
154         R1.w = (W_)(a);                         \
155     /*  Sp[-5] = ARGTAG(1); */                  \
156         Sp[-4] = (W_)(b);                       \
157     /*  Sp[-3] = ARGTAG(1); */                  \
158         Sp[-2] = (W_)(c);                       \
159         Sp[-1] = (W_)(d);                       \
160         Sp -= 5;                                \
161         JMP_(ENTRY_CODE(Sp[5]));
162
163 # define RET_NPNP(a,b,c,d)                      \
164         R1.w = (W_)(a);                         \
165         Sp[-4] = (W_)(b);                       \
166     /*  Sp[-3] = ARGTAG(1); */                  \
167         Sp[-2] = (W_)(c);                       \
168         Sp[-1] = (W_)(d);                       \
169         Sp -= 4;                                \
170         JMP_(ENTRY_CODE(Sp[4]));
171
172 # define RET_NNPNNP(a,b,c,d,e,f)                \
173         R1.w = (W_)(a);                         \
174         Sp[-1] = (W_)(f);                       \
175         Sp[-2] = (W_)(e);                       \
176         /* Sp[-3] = ARGTAG(1); */               \
177         Sp[-4] = (W_)(d);                       \
178         /* Sp[-5] = ARGTAG(1); */               \
179         Sp[-6] = (W_)(c);                       \
180         Sp[-7] = (W_)(b);                       \
181         /* Sp[-8] = ARGTAG(1); */               \
182         Sp -= 8;                                \
183         JMP_(ENTRY_CODE(Sp[8]));
184
185 #else /* 0 Regs available */
186
187 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
188
189 #ifdef DEBUG
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
191 #else
192 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
193 #endif
194
195 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
196
197 /* Here's how to construct these macros:
198  *
199  *   N = number of N's in the name;
200  *   P = number of P's in the name;
201  *   s = N * 2 + P;
202  *   while (nonNull(name)) {
203  *     if (nextChar == 'P') {
204  *       PUSH_P(s,_);
205  *       s -= 1;
206  *     } else {
207  *       PUSH_N(s,_);
208  *       s -= 2
209  *     }
210  *   }
211  *   PUSHED(N * 2 + P);
212  */
213
214 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
215 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
216
217 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
218 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
219 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
220
221 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
222 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
223
224 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
225 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
226 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
227
228 #endif
229
230 /*-----------------------------------------------------------------------------
231   Array Primitives
232
233   Basically just new*Array - the others are all inline macros.
234
235   The size arg is always passed in R1, and the result returned in R1.
236
237   The slow entry point is for returning from a heap check, the saved
238   size argument must be re-loaded from the stack.
239   -------------------------------------------------------------------------- */
240
241 /* for objects that are *less* than the size of a word, make sure we
242  * round up to the nearest word for the size of the array.
243  */
244
245 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
246
247 FN_(newByteArrayzh_fast)
248  {
249    W_ size, stuff_size, n;
250    StgArrWords* p;
251    FB_
252      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
253      n = R1.w;
254      stuff_size = BYTES_TO_STGWORDS(n);
255      size = sizeofW(StgArrWords)+ stuff_size;
256      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
257      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
258      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
259      p->words = stuff_size;
260      TICK_RET_UNBOXED_TUP(1)
261      RET_P(p);
262    FE_
263  }
264
265 FN_(newPinnedByteArrayzh_fast)
266  {
267    W_ size, stuff_size, n;
268    StgArrWords* p;
269    FB_
270      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
271      n = R1.w;
272      stuff_size = BYTES_TO_STGWORDS(n);
273
274      // We want an 8-byte aligned array.  allocatePinned() gives us
275      // 8-byte aligned memory by default, but we want to align the
276      // *goods* inside the ArrWords object, so we have to check the
277      // size of the ArrWords header and adjust our size accordingly.
278      size = sizeofW(StgArrWords)+ stuff_size;
279      if ((sizeof(StgArrWords) & 7) != 0) {
280          size++;
281      }
282
283      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
284      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
285
286      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
287      // have to push the object forward one word so that the goods
288      // fall on an 8-byte boundary.
289      if ((sizeof(StgArrWords) & 7) != 0) {
290          ((StgPtr)p)++;
291      }
292
293      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
294      p->words = stuff_size;
295      TICK_RET_UNBOXED_TUP(1)
296      RET_P(p);
297    FE_
298  }
299
300 FN_(newArrayzh_fast)
301 {
302   W_ size, n, init;
303   StgMutArrPtrs* arr;
304   StgPtr p;
305   FB_
306     n = R1.w;
307
308     MAYBE_GC(R2_PTR,newArrayzh_fast);
309
310     size = sizeofW(StgMutArrPtrs) + n;
311     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
312     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
313
314     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
315     arr->ptrs = n;
316
317     init = R2.w;
318     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
319          p < (P_)arr + size; p++) {
320         *p = (W_)init;
321     }
322
323     TICK_RET_UNBOXED_TUP(1);
324     RET_P(arr);
325   FE_
326 }
327
328 FN_(newMutVarzh_fast)
329 {
330   StgMutVar* mv;
331   /* Args: R1.p = initialisation value */
332   FB_
333
334   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
335   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
336   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
337
338   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
339   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
340   mv->var = R1.cl;
341
342   TICK_RET_UNBOXED_TUP(1);
343   RET_P(mv);
344   FE_
345 }
346
347 /* -----------------------------------------------------------------------------
348    Foreign Object Primitives
349    -------------------------------------------------------------------------- */
350
351 FN_(mkForeignObjzh_fast)
352 {
353   /* R1.p = ptr to foreign object,
354   */
355   StgForeignObj *result;
356   FB_
357
358   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
359   TICK_ALLOC_PRIM(sizeofW(StgHeader),
360                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
361   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
362
363   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
364   SET_HDR(result,&stg_FOREIGN_info,CCCS);
365   result->data = R1.p;
366
367   /* returns (# s#, ForeignObj# #) */
368   TICK_RET_UNBOXED_TUP(1);
369   RET_P(result);
370   FE_
371 }
372
373 /* These two are out-of-line for the benefit of the NCG */
374 FN_(unsafeThawArrayzh_fast)
375 {
376   FB_
377   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
378   recordMutable((StgMutClosure*)R1.cl);
379
380   TICK_RET_UNBOXED_TUP(1);
381   RET_P(R1.p);
382   FE_
383 }
384
385 /* -----------------------------------------------------------------------------
386    Weak Pointer Primitives
387    -------------------------------------------------------------------------- */
388
389 FN_(mkWeakzh_fast)
390 {
391   /* R1.p = key
392      R2.p = value
393      R3.p = finalizer (or NULL)
394   */
395   StgWeak *w;
396   FB_
397
398   if (R3.cl == NULL) {
399     R3.cl = &stg_NO_FINALIZER_closure;
400   }
401
402   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
403   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
404                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
405   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
406
407   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
408   SET_HDR(w, &stg_WEAK_info, CCCS);
409
410   w->key        = R1.cl;
411   w->value      = R2.cl;
412   w->finalizer  = R3.cl;
413
414   w->link       = weak_ptr_list;
415   weak_ptr_list = w;
416   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
417
418   TICK_RET_UNBOXED_TUP(1);
419   RET_P(w);
420   FE_
421 }
422
423 FN_(finalizzeWeakzh_fast)
424 {
425   /* R1.p = weak ptr
426    */
427   StgDeadWeak *w;
428   StgClosure *f;
429   FB_
430   TICK_RET_UNBOXED_TUP(0);
431   w = (StgDeadWeak *)R1.p;
432
433   /* already dead? */
434   if (w->header.info == &stg_DEAD_WEAK_info) {
435       RET_NP(0,&stg_NO_FINALIZER_closure);
436   }
437
438   /* kill it */
439 #ifdef PROFILING
440   // @LDV profiling
441   // A weak pointer is inherently used, so we do not need to call
442   // LDV_recordDead_FILL_SLOP_DYNAMIC():
443   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
444   // or, LDV_recordDead():
445   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
446   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
447   // large as weak pointers, so there is no need to fill the slop, either.
448   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
449 #endif
450   //
451   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
452   //
453   w->header.info = &stg_DEAD_WEAK_info;
454 #ifdef PROFILING
455   // @LDV profiling
456   LDV_recordCreate((StgClosure *)w);
457 #endif
458   f = ((StgWeak *)w)->finalizer;
459   w->link = ((StgWeak *)w)->link;
460
461   /* return the finalizer */
462   if (f == &stg_NO_FINALIZER_closure) {
463       RET_NP(0,&stg_NO_FINALIZER_closure);
464   } else {
465       RET_NP(1,f);
466   }
467   FE_
468 }
469
470 FN_(deRefWeakzh_fast)
471 {
472   /* R1.p = weak ptr */
473   StgWeak* w;
474   I_       code;
475   P_       val;
476   FB_
477   w = (StgWeak*)R1.p;
478   if (w->header.info == &stg_WEAK_info) {
479     code = 1;
480     val = (P_)((StgWeak *)w)->value;
481   } else {
482     code = 0;
483     val = (P_)w;
484   }
485   RET_NP(code,val);
486   FE_
487 }
488
489 /* -----------------------------------------------------------------------------
490    Arbitrary-precision Integer operations.
491    -------------------------------------------------------------------------- */
492
493 FN_(int2Integerzh_fast)
494 {
495    /* arguments: R1 = Int# */
496
497    I_ val, s;           /* to avoid aliasing */
498    StgArrWords* p;      /* address of array result */
499    FB_
500
501    val = R1.i;
502    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
503    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
504    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
505
506    p = (StgArrWords *)Hp - 1;
507    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
508
509    /* mpz_set_si is inlined here, makes things simpler */
510    if (val < 0) { 
511         s  = -1;
512         *Hp = -val;
513    } else if (val > 0) {
514         s = 1;
515         *Hp = val;
516    } else {
517         s = 0;
518    }
519
520    /* returns (# size  :: Int#, 
521                  data  :: ByteArray# 
522                #)
523    */
524    TICK_RET_UNBOXED_TUP(2);
525    RET_NP(s,p);
526    FE_
527 }
528
529 FN_(word2Integerzh_fast)
530 {
531    /* arguments: R1 = Word# */
532
533    W_ val;              /* to avoid aliasing */
534    I_  s;
535    StgArrWords* p;      /* address of array result */
536    FB_
537
538    val = R1.w;
539    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
540    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
541    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
542
543    p = (StgArrWords *)Hp - 1;
544    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
545
546    if (val != 0) {
547         s = 1;
548         *Hp = val;
549    } else {
550         s = 0;
551    }
552
553    /* returns (# size  :: Int#, 
554                  data  :: ByteArray# 
555                #)
556    */
557    TICK_RET_UNBOXED_TUP(2);
558    RET_NP(s,p);
559    FE_
560 }
561
562
563 /*
564  * 'long long' primops for converting to/from Integers.
565  */
566
567 #ifdef SUPPORT_LONG_LONGS
568
569 FN_(int64ToIntegerzh_fast)
570 {
571    /* arguments: L1 = Int64# */
572
573    StgInt64  val; /* to avoid aliasing */
574    W_ hi;
575    I_  s, neg, words_needed;
576    StgArrWords* p;      /* address of array result */
577    FB_
578
579    val = (LI_)L1;
580    neg = 0;
581
582    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
583        words_needed = 2;
584    } else { 
585        /* minimum is one word */
586        words_needed = 1;
587    }
588    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
589    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
590    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
591
592    p = (StgArrWords *)(Hp-words_needed+1) - 1;
593    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
594
595    if ( val < 0LL ) {
596      neg = 1;
597      val = -val;
598    }
599
600    hi = (W_)((LW_)val / 0x100000000ULL);
601
602    if ( words_needed == 2 )  { 
603       s = 2;
604       Hp[-1] = (W_)val;
605       Hp[0] = hi;
606    } else if ( val != 0 ) {
607       s = 1;
608       Hp[0] = (W_)val;
609    }  else /* val==0 */   {
610       s = 0;
611    }
612    s = ( neg ? -s : s );
613
614    /* returns (# size  :: Int#, 
615                  data  :: ByteArray# 
616                #)
617    */
618    TICK_RET_UNBOXED_TUP(2);
619    RET_NP(s,p);
620    FE_
621 }
622
623 FN_(word64ToIntegerzh_fast)
624 {
625    /* arguments: L1 = Word64# */
626
627    StgWord64 val; /* to avoid aliasing */
628    StgWord hi;
629    I_  s, words_needed;
630    StgArrWords* p;      /* address of array result */
631    FB_
632
633    val = (LW_)L1;
634    if ( val >= 0x100000000ULL ) {
635       words_needed = 2;
636    } else {
637       words_needed = 1;
638    }
639    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
640    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
641    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
642
643    p = (StgArrWords *)(Hp-words_needed+1) - 1;
644    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
645
646    hi = (W_)((LW_)val / 0x100000000ULL);
647    if ( val >= 0x100000000ULL ) { 
648      s = 2;
649      Hp[-1] = ((W_)val);
650      Hp[0]  = (hi);
651    } else if ( val != 0 )      {
652       s = 1;
653       Hp[0] = ((W_)val);
654    } else /* val==0 */         {
655       s = 0;
656    }
657
658    /* returns (# size  :: Int#, 
659                  data  :: ByteArray# 
660                #)
661    */
662    TICK_RET_UNBOXED_TUP(2);
663    RET_NP(s,p);
664    FE_
665 }
666
667
668 #endif /* SUPPORT_LONG_LONGS */
669
670 /* ToDo: this is shockingly inefficient */
671
672 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
673 FN_(name)                                                               \
674 {                                                                       \
675   MP_INT arg1, arg2, result;                                            \
676   I_ s1, s2;                                                            \
677   StgArrWords* d1;                                                      \
678   StgArrWords* d2;                                                      \
679   FB_                                                                   \
680                                                                         \
681   /* call doYouWantToGC() */                                            \
682   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
683                                                                         \
684   d1 = (StgArrWords *)R2.p;                                             \
685   s1 = R1.i;                                                            \
686   d2 = (StgArrWords *)R4.p;                                             \
687   s2 = R3.i;                                                            \
688                                                                         \
689   arg1._mp_alloc        = d1->words;                                    \
690   arg1._mp_size         = (s1);                                         \
691   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
692   arg2._mp_alloc        = d2->words;                                    \
693   arg2._mp_size         = (s2);                                         \
694   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
695                                                                         \
696   STGCALL1(mpz_init,&result);                                           \
697                                                                         \
698   /* Perform the operation */                                           \
699   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
700                                                                         \
701   TICK_RET_UNBOXED_TUP(2);                                              \
702   RET_NP(result._mp_size,                                               \
703          result._mp_d-sizeofW(StgArrWords));                            \
704   FE_                                                                   \
705 }
706
707 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
708 FN_(name)                                                               \
709 {                                                                       \
710   MP_INT arg1, result;                                                  \
711   I_ s1;                                                                \
712   StgArrWords* d1;                                                      \
713   FB_                                                                   \
714                                                                         \
715   /* call doYouWantToGC() */                                            \
716   MAYBE_GC(R2_PTR, name);                                               \
717                                                                         \
718   d1 = (StgArrWords *)R2.p;                                             \
719   s1 = R1.i;                                                            \
720                                                                         \
721   arg1._mp_alloc        = d1->words;                                    \
722   arg1._mp_size         = (s1);                                         \
723   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
724                                                                         \
725   STGCALL1(mpz_init,&result);                                           \
726                                                                         \
727   /* Perform the operation */                                           \
728   STGCALL2(mp_fun,&result,&arg1);                                       \
729                                                                         \
730   TICK_RET_UNBOXED_TUP(2);                                              \
731   RET_NP(result._mp_size,                                               \
732          result._mp_d-sizeofW(StgArrWords));                            \
733   FE_                                                                   \
734 }
735
736 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
737 FN_(name)                                                               \
738 {                                                                       \
739   MP_INT arg1, arg2, result1, result2;                                  \
740   I_ s1, s2;                                                            \
741   StgArrWords* d1;                                                      \
742   StgArrWords* d2;                                                      \
743   FB_                                                                   \
744                                                                         \
745   /* call doYouWantToGC() */                                            \
746   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
747                                                                         \
748   d1 = (StgArrWords *)R2.p;                                             \
749   s1 = R1.i;                                                            \
750   d2 = (StgArrWords *)R4.p;                                             \
751   s2 = R3.i;                                                            \
752                                                                         \
753   arg1._mp_alloc        = d1->words;                                    \
754   arg1._mp_size         = (s1);                                         \
755   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
756   arg2._mp_alloc        = d2->words;                                    \
757   arg2._mp_size         = (s2);                                         \
758   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
759                                                                         \
760   STGCALL1(mpz_init,&result1);                                          \
761   STGCALL1(mpz_init,&result2);                                          \
762                                                                         \
763   /* Perform the operation */                                           \
764   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
765                                                                         \
766   TICK_RET_UNBOXED_TUP(4);                                              \
767   RET_NPNP(result1._mp_size,                                            \
768            result1._mp_d-sizeofW(StgArrWords),                          \
769            result2._mp_size,                                            \
770            result2._mp_d-sizeofW(StgArrWords));                         \
771   FE_                                                                   \
772 }
773
774 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
775 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
776 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
777 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
778 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
779 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
780 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
781 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
782 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
783 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
784 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
785
786 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
787 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
788
789
790 FN_(gcdIntzh_fast)
791 {
792   /* R1 = the first Int#; R2 = the second Int# */
793   mp_limb_t aa;
794   I_ r;
795   FB_
796   aa = (mp_limb_t)(R1.i);
797   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
798
799   R1.i = r;
800   /* Result parked in R1, return via info-pointer at TOS */
801   JMP_(ENTRY_CODE(Sp[0]));
802   FE_
803 }
804
805 FN_(gcdIntegerIntzh_fast)
806 {
807   /* R1 = s1; R2 = d1; R3 = the int */
808   I_ r;
809   FB_
810   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
811
812   R1.i = r;
813   /* Result parked in R1, return via info-pointer at TOS */
814   JMP_(ENTRY_CODE(Sp[0]));
815   FE_
816 }
817
818 FN_(cmpIntegerIntzh_fast)
819 {
820   /* R1 = s1; R2 = d1; R3 = the int */
821   I_ usize;
822   I_ vsize;
823   I_ v_digit;
824   mp_limb_t u_digit;
825   FB_
826
827   usize = R1.i;
828   vsize = 0;
829   v_digit = R3.i;
830
831   // paraphrased from mpz_cmp_si() in the GMP sources
832   if (v_digit > 0) {
833       vsize = 1;
834   } else if (v_digit < 0) {
835       vsize = -1;
836       v_digit = -v_digit;
837   }
838
839   if (usize != vsize) {
840     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
841   }
842
843   if (usize == 0) {
844     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
845   }
846
847   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
848
849   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
850     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
851   }
852
853   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
854     R1.i = usize; 
855   } else {
856     R1.i = -usize; 
857   }
858
859   JMP_(ENTRY_CODE(Sp[0]));
860   FE_
861 }
862
863 FN_(cmpIntegerzh_fast)
864 {
865   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
866   I_ usize;
867   I_ vsize;
868   I_ size;
869   StgPtr up, vp;
870   int cmp;
871   FB_
872
873   // paraphrased from mpz_cmp() in the GMP sources
874   usize = R1.i;
875   vsize = R3.i;
876
877   if (usize != vsize) {
878     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
879   }
880
881   if (usize == 0) {
882     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
883   }
884
885   size = abs(usize);
886
887   up = BYTE_ARR_CTS(R2.p);
888   vp = BYTE_ARR_CTS(R4.p);
889
890   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
891
892   if (cmp == 0) {
893     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
894   }
895
896   if ((cmp < 0) == (usize < 0)) {
897     R1.i = 1;
898   } else {
899     R1.i = (-1); 
900   }
901   /* Result parked in R1, return via info-pointer at TOS */
902   JMP_(ENTRY_CODE(Sp[0]));
903   FE_
904 }
905
906 FN_(integer2Intzh_fast)
907 {
908   /* R1 = s; R2 = d */
909   I_ r, s;
910   FB_
911   s = R1.i;
912   if (s == 0)
913     r = 0;
914   else {
915     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
916     if (s < 0) r = -r;
917   }
918   /* Result parked in R1, return via info-pointer at TOS */
919   R1.i = r;
920   JMP_(ENTRY_CODE(Sp[0]));
921   FE_
922 }
923
924 FN_(integer2Wordzh_fast)
925 {
926   /* R1 = s; R2 = d */
927   I_ s;
928   W_ r;
929   FB_
930   s = R1.i;
931   if (s == 0)
932     r = 0;
933   else {
934     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
935     if (s < 0) r = -r;
936   }
937   /* Result parked in R1, return via info-pointer at TOS */
938   R1.w = r;
939   JMP_(ENTRY_CODE(Sp[0]));
940   FE_
941 }
942
943
944 FN_(decodeFloatzh_fast)
945
946   MP_INT mantissa;
947   I_ exponent;
948   StgArrWords* p;
949   StgFloat arg;
950   FB_
951
952   /* arguments: F1 = Float# */
953   arg = F1;
954
955   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
956   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
957   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
958
959   /* Be prepared to tell Lennart-coded __decodeFloat    */
960   /* where mantissa._mp_d can be put (it does not care about the rest) */
961   p = (StgArrWords *)Hp - 1;
962   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
963   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
964
965   /* Perform the operation */
966   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
967
968   /* returns: (Int# (expn), Int#, ByteArray#) */
969   TICK_RET_UNBOXED_TUP(3);
970   RET_NNP(exponent,mantissa._mp_size,p);
971   FE_
972 }
973
974 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
975 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
976
977 FN_(decodeDoublezh_fast)
978 { MP_INT mantissa;
979   I_ exponent;
980   StgDouble arg;
981   StgArrWords* p;
982   FB_
983
984   /* arguments: D1 = Double# */
985   arg = D1;
986
987   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
988   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
989   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
990
991   /* Be prepared to tell Lennart-coded __decodeDouble   */
992   /* where mantissa.d can be put (it does not care about the rest) */
993   p = (StgArrWords *)(Hp-ARR_SIZE+1);
994   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
995   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
996
997   /* Perform the operation */
998   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
999
1000   /* returns: (Int# (expn), Int#, ByteArray#) */
1001   TICK_RET_UNBOXED_TUP(3);
1002   RET_NNP(exponent,mantissa._mp_size,p);
1003   FE_
1004 }
1005
1006 /* -----------------------------------------------------------------------------
1007  * Concurrency primitives
1008  * -------------------------------------------------------------------------- */
1009
1010 FN_(forkzh_fast)
1011 {
1012   FB_
1013   /* args: R1 = closure to spark */
1014   
1015   MAYBE_GC(R1_PTR, forkzh_fast);
1016
1017   /* create it right now, return ThreadID in R1 */
1018   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1019                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1020   STGCALL1(scheduleThread, R1.t);
1021       
1022   /* switch at the earliest opportunity */ 
1023   context_switch = 1;
1024   
1025   RET_P(R1.t);
1026   FE_
1027 }
1028
1029 FN_(forkProcesszh_fast)
1030 {
1031   pid_t pid;
1032
1033   FB_
1034   /* args: none */
1035   /* result: Pid */
1036
1037   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1038
1039   JMP_(ENTRY_CODE(Sp[0]));
1040
1041   FE_
1042 }
1043
1044 FN_(yieldzh_fast)
1045 {
1046   FB_
1047   JMP_(stg_yield_noregs);
1048   FE_
1049 }
1050
1051 FN_(myThreadIdzh_fast)
1052 {
1053   /* no args. */
1054   FB_
1055   RET_P((P_)CurrentTSO);
1056   FE_
1057 }
1058
1059 FN_(labelThreadzh_fast)
1060 {
1061   FB_
1062   /* args: R1.p = Addr# */
1063 #ifdef DEBUG
1064   STGCALL2(labelThread,CurrentTSO,(char *)R1.p);
1065 #endif
1066   JMP_(ENTRY_CODE(Sp[0]));
1067   FE_
1068 }
1069
1070
1071 /* -----------------------------------------------------------------------------
1072  * MVar primitives
1073  *
1074  * take & putMVar work as follows.  Firstly, an important invariant:
1075  *
1076  *    If the MVar is full, then the blocking queue contains only
1077  *    threads blocked on putMVar, and if the MVar is empty then the
1078  *    blocking queue contains only threads blocked on takeMVar.
1079  *
1080  * takeMvar:
1081  *    MVar empty : then add ourselves to the blocking queue
1082  *    MVar full  : remove the value from the MVar, and
1083  *                 blocking queue empty     : return
1084  *                 blocking queue non-empty : perform the first blocked putMVar
1085  *                                            from the queue, and wake up the
1086  *                                            thread (MVar is now full again)
1087  *
1088  * putMVar is just the dual of the above algorithm.
1089  *
1090  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1091  * the stack of the thread waiting to do the putMVar.  See
1092  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1093  * the stack layout, and the PerformPut and PerformTake macros below.
1094  *
1095  * It is important that a blocked take or put is woken up with the
1096  * take/put already performed, because otherwise there would be a
1097  * small window of vulnerability where the thread could receive an
1098  * exception and never perform its take or put, and we'd end up with a
1099  * deadlock.
1100  *
1101  * -------------------------------------------------------------------------- */
1102
1103 FN_(isEmptyMVarzh_fast)
1104 {
1105   /* args: R1 = MVar closure */
1106   I_ r;
1107   FB_
1108   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1109   RET_N(r);
1110   FE_
1111 }
1112
1113
1114 FN_(newMVarzh_fast)
1115 {
1116   StgMVar *mvar;
1117
1118   FB_
1119   /* args: none */
1120
1121   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1122   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1123                   1, 0);
1124   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1125   
1126   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1127   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1128   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1129   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1130
1131   TICK_RET_UNBOXED_TUP(1);
1132   RET_P(mvar);
1133   FE_
1134 }
1135
1136 /* If R1 isn't available, pass it on the stack */
1137 #ifdef REG_R1
1138 #define PerformTake(tso, value) ({              \
1139     (tso)->sp[1] = (W_)value;                   \
1140     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1141   })
1142 #else
1143 #define PerformTake(tso, value) ({              \
1144     (tso)->sp[1] = (W_)value;                   \
1145     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1146   })
1147 #endif
1148
1149
1150 #define PerformPut(tso) ({                              \
1151     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1152     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1153     (tso)->sp += 2;                                     \
1154     val;                                                \
1155   })
1156
1157 FN_(takeMVarzh_fast)
1158 {
1159   StgMVar *mvar;
1160   StgClosure *val;
1161   const StgInfoTable *info;
1162
1163   FB_
1164   /* args: R1 = MVar closure */
1165
1166   mvar = (StgMVar *)R1.p;
1167
1168 #ifdef SMP
1169   info = LOCK_CLOSURE(mvar);
1170 #else
1171   info = GET_INFO(mvar);
1172 #endif
1173
1174   /* If the MVar is empty, put ourselves on its blocking queue,
1175    * and wait until we're woken up.
1176    */
1177   if (info == &stg_EMPTY_MVAR_info) {
1178     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1179       mvar->head = CurrentTSO;
1180     } else {
1181       mvar->tail->link = CurrentTSO;
1182     }
1183     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1184     CurrentTSO->why_blocked = BlockedOnMVar;
1185     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1186     mvar->tail = CurrentTSO;
1187
1188 #ifdef SMP
1189     /* unlock the MVar */
1190     mvar->header.info = &stg_EMPTY_MVAR_info;
1191 #endif
1192     JMP_(stg_block_takemvar);
1193   }
1194
1195   /* we got the value... */
1196   val = mvar->value;
1197
1198   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1199       /* There are putMVar(s) waiting... 
1200        * wake up the first thread on the queue
1201        */
1202       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1203
1204       /* actually perform the putMVar for the thread that we just woke up */
1205       mvar->value = PerformPut(mvar->head);
1206
1207 #if defined(GRAN) || defined(PAR)
1208       /* ToDo: check 2nd arg (mvar) is right */
1209       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1210 #else
1211       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1212 #endif
1213       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1214           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1215       }
1216 #ifdef SMP
1217       /* unlock in the SMP case */
1218       SET_INFO(mvar,&stg_FULL_MVAR_info);
1219 #endif
1220       TICK_RET_UNBOXED_TUP(1);
1221       RET_P(val);
1222   } else {
1223       /* No further putMVars, MVar is now empty */
1224
1225       /* do this last... we might have locked the MVar in the SMP case,
1226        * and writing the info pointer will unlock it.
1227        */
1228       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1229       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1230       TICK_RET_UNBOXED_TUP(1);
1231       RET_P(val);
1232   }
1233   FE_
1234 }
1235
1236 FN_(tryTakeMVarzh_fast)
1237 {
1238   StgMVar *mvar;
1239   StgClosure *val;
1240   const StgInfoTable *info;
1241
1242   FB_
1243   /* args: R1 = MVar closure */
1244
1245   mvar = (StgMVar *)R1.p;
1246
1247 #ifdef SMP
1248   info = LOCK_CLOSURE(mvar);
1249 #else
1250   info = GET_INFO(mvar);
1251 #endif
1252
1253   if (info == &stg_EMPTY_MVAR_info) {
1254
1255 #ifdef SMP
1256       /* unlock the MVar */
1257       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1258 #endif
1259
1260       /* HACK: we need a pointer to pass back, 
1261        * so we abuse NO_FINALIZER_closure
1262        */
1263       RET_NP(0, &stg_NO_FINALIZER_closure);
1264   }
1265
1266   /* we got the value... */
1267   val = mvar->value;
1268
1269   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1270       /* There are putMVar(s) waiting... 
1271        * wake up the first thread on the queue
1272        */
1273       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1274
1275       /* actually perform the putMVar for the thread that we just woke up */
1276       mvar->value = PerformPut(mvar->head);
1277
1278 #if defined(GRAN) || defined(PAR)
1279       /* ToDo: check 2nd arg (mvar) is right */
1280       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1281 #else
1282       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1283 #endif
1284       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1285           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1286       }
1287 #ifdef SMP
1288       /* unlock in the SMP case */
1289       SET_INFO(mvar,&stg_FULL_MVAR_info);
1290 #endif
1291   } else {
1292       /* No further putMVars, MVar is now empty */
1293       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1294
1295       /* do this last... we might have locked the MVar in the SMP case,
1296        * and writing the info pointer will unlock it.
1297        */
1298       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1299   }
1300
1301   TICK_RET_UNBOXED_TUP(1);
1302   RET_NP((I_)1, val);
1303   FE_
1304 }
1305
1306 FN_(putMVarzh_fast)
1307 {
1308   StgMVar *mvar;
1309   const StgInfoTable *info;
1310
1311   FB_
1312   /* args: R1 = MVar, R2 = value */
1313
1314   mvar = (StgMVar *)R1.p;
1315
1316 #ifdef SMP
1317   info = LOCK_CLOSURE(mvar);
1318 #else
1319   info = GET_INFO(mvar);
1320 #endif
1321
1322   if (info == &stg_FULL_MVAR_info) {
1323     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1324       mvar->head = CurrentTSO;
1325     } else {
1326       mvar->tail->link = CurrentTSO;
1327     }
1328     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1329     CurrentTSO->why_blocked = BlockedOnMVar;
1330     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1331     mvar->tail = CurrentTSO;
1332
1333 #ifdef SMP
1334     /* unlock the MVar */
1335     SET_INFO(mvar,&stg_FULL_MVAR_info);
1336 #endif
1337     JMP_(stg_block_putmvar);
1338   }
1339   
1340   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1341       /* There are takeMVar(s) waiting: wake up the first one
1342        */
1343       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1344
1345       /* actually perform the takeMVar */
1346       PerformTake(mvar->head, R2.cl);
1347       
1348 #if defined(GRAN) || defined(PAR)
1349       /* ToDo: check 2nd arg (mvar) is right */
1350       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1351 #else
1352       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1353 #endif
1354       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1355           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1356       }
1357 #ifdef SMP
1358       /* unlocks the MVar in the SMP case */
1359       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1360 #endif
1361       JMP_(ENTRY_CODE(Sp[0]));
1362   } else {
1363       /* No further takes, the MVar is now full. */
1364       mvar->value = R2.cl;
1365       /* unlocks the MVar in the SMP case */
1366       SET_INFO(mvar,&stg_FULL_MVAR_info);
1367       JMP_(ENTRY_CODE(Sp[0]));
1368   }
1369
1370   /* ToDo: yield afterward for better communication performance? */
1371   FE_
1372 }
1373
1374 FN_(tryPutMVarzh_fast)
1375 {
1376   StgMVar *mvar;
1377   const StgInfoTable *info;
1378
1379   FB_
1380   /* args: R1 = MVar, R2 = value */
1381
1382   mvar = (StgMVar *)R1.p;
1383
1384 #ifdef SMP
1385   info = LOCK_CLOSURE(mvar);
1386 #else
1387   info = GET_INFO(mvar);
1388 #endif
1389
1390   if (info == &stg_FULL_MVAR_info) {
1391
1392 #ifdef SMP
1393     /* unlock the MVar */
1394     mvar->header.info = &stg_FULL_MVAR_info;
1395 #endif
1396
1397     RET_N(0);
1398   }
1399   
1400   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1401       /* There are takeMVar(s) waiting: wake up the first one
1402        */
1403       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1404
1405       /* actually perform the takeMVar */
1406       PerformTake(mvar->head, R2.cl);
1407       
1408 #if defined(GRAN) || defined(PAR)
1409       /* ToDo: check 2nd arg (mvar) is right */
1410       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1411 #else
1412       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1413 #endif
1414       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1415           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1416       }
1417 #ifdef SMP
1418       /* unlocks the MVar in the SMP case */
1419       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1420 #endif
1421       JMP_(ENTRY_CODE(Sp[0]));
1422   } else {
1423       /* No further takes, the MVar is now full. */
1424       mvar->value = R2.cl;
1425       /* unlocks the MVar in the SMP case */
1426       SET_INFO(mvar,&stg_FULL_MVAR_info);
1427       JMP_(ENTRY_CODE(Sp[0]));
1428   }
1429
1430   /* ToDo: yield afterward for better communication performance? */
1431   FE_
1432 }
1433
1434 /* -----------------------------------------------------------------------------
1435    Stable pointer primitives
1436    -------------------------------------------------------------------------  */
1437
1438 FN_(makeStableNamezh_fast)
1439 {
1440   StgWord index;
1441   StgStableName *sn_obj;
1442   FB_
1443
1444   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1445   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1446                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1447   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1448   
1449   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1450
1451   /* Is there already a StableName for this heap object? */
1452   if (stable_ptr_table[index].sn_obj == NULL) {
1453     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1454     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1455     sn_obj->sn = index;
1456     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1457   } else {
1458     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1459   }
1460
1461   TICK_RET_UNBOXED_TUP(1);
1462   RET_P(sn_obj);
1463 }
1464
1465
1466 FN_(makeStablePtrzh_fast)
1467 {
1468   /* Args: R1 = a */
1469   StgStablePtr sp;
1470   FB_
1471   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1472   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1473   RET_N(sp);
1474   FE_
1475 }
1476
1477 FN_(deRefStablePtrzh_fast)
1478 {
1479   /* Args: R1 = the stable ptr */
1480   P_ r;
1481   StgStablePtr sp;
1482   FB_
1483   sp = (StgStablePtr)R1.w;
1484   r = stable_ptr_table[(StgWord)sp].addr;
1485   RET_P(r);
1486   FE_
1487 }
1488
1489 /* -----------------------------------------------------------------------------
1490    Bytecode object primitives
1491    -------------------------------------------------------------------------  */
1492
1493 FN_(newBCOzh_fast)
1494 {
1495   /* R1.p = instrs
1496      R2.p = literals
1497      R3.p = ptrs
1498      R4.p = itbls
1499   */
1500   StgBCO *bco;
1501   FB_
1502
1503   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1504   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1505   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1506   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1507   SET_HDR(bco, &stg_BCO_info, CCCS);
1508
1509   bco->instrs     = (StgArrWords*)R1.cl;
1510   bco->literals   = (StgArrWords*)R2.cl;
1511   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1512   bco->itbls      = (StgArrWords*)R4.cl;
1513
1514   TICK_RET_UNBOXED_TUP(1);
1515   RET_P(bco);
1516   FE_
1517 }
1518
1519 FN_(mkApUpd0zh_fast)
1520 {
1521   /* R1.p = the fn for the AP_UPD
1522   */
1523   StgAP_UPD* ap;
1524   FB_
1525   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1526   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1527   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1528   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1529   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1530
1531   ap->n_args = 0;
1532   ap->fun = R1.cl;
1533
1534   TICK_RET_UNBOXED_TUP(1);
1535   RET_P(ap);
1536   FE_
1537 }
1538
1539 /* -----------------------------------------------------------------------------
1540    Thread I/O blocking primitives
1541    -------------------------------------------------------------------------- */
1542
1543 FN_(waitReadzh_fast)
1544 {
1545   FB_
1546     /* args: R1.i */
1547     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1548     CurrentTSO->why_blocked = BlockedOnRead;
1549     CurrentTSO->block_info.fd = R1.i;
1550     ACQUIRE_LOCK(&sched_mutex);
1551     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1552     RELEASE_LOCK(&sched_mutex);
1553     JMP_(stg_block_noregs);
1554   FE_
1555 }
1556
1557 FN_(waitWritezh_fast)
1558 {
1559   FB_
1560     /* args: R1.i */
1561     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1562     CurrentTSO->why_blocked = BlockedOnWrite;
1563     CurrentTSO->block_info.fd = R1.i;
1564     ACQUIRE_LOCK(&sched_mutex);
1565     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1566     RELEASE_LOCK(&sched_mutex);
1567     JMP_(stg_block_noregs);
1568   FE_
1569 }
1570
1571 FN_(delayzh_fast)
1572 {
1573   StgTSO *t, *prev;
1574   nat target;
1575   FB_
1576     /* args: R1.i */
1577     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1578     CurrentTSO->why_blocked = BlockedOnDelay;
1579
1580     ACQUIRE_LOCK(&sched_mutex);
1581
1582     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1583     CurrentTSO->block_info.target = target;
1584
1585     /* Insert the new thread in the sleeping queue. */
1586     prev = NULL;
1587     t = sleeping_queue;
1588     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1589         prev = t;
1590         t = t->link;
1591     }
1592
1593     CurrentTSO->link = t;
1594     if (prev == NULL) {
1595         sleeping_queue = CurrentTSO;
1596     } else {
1597         prev->link = CurrentTSO;
1598     }
1599
1600     RELEASE_LOCK(&sched_mutex);
1601     JMP_(stg_block_noregs);
1602   FE_
1603 }
1604