[project @ 2002-06-26 08:18:38 by stolz]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.99 2002/06/26 08:18:41 stolz Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 /* ** temporary **
26
27    classes CCallable and CReturnable don't really exist, but the
28    compiler insists on generating dictionaries containing references
29    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
30    for these.  Some C compilers can't cope with zero-length static arrays,
31    so we have to make these one element long.
32 */
33
34 StgWord GHC_ZCCCallable_static_info[1];
35 StgWord GHC_ZCCReturnable_static_info[1];
36   
37 /* -----------------------------------------------------------------------------
38    Macros for Hand-written primitives.
39    -------------------------------------------------------------------------- */
40
41 /*
42  * Horrible macros for returning unboxed tuples.
43  *
44  * How an unboxed tuple is returned depends on two factors:
45  *    - the number of real registers we have available
46  *    - the boxedness of the returned fields.
47  *
48  * To return an unboxed tuple from a primitive operation, we have macros
49  * RET_<layout> where <layout> describes the boxedness of each field of the
50  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
51  *
52  * We only define the cases actually used, to avoid having too much
53  * garbage in this section.  Warning: any bugs in here will be hard to
54  * track down.
55  */
56
57 /*------ All Regs available */
58 #if defined(REG_R8)
59 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
60 # define RET_N(a)     RET_P(a)
61
62 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
63 # define RET_NN(a,b)  RET_PP(a,b)
64 # define RET_NP(a,b)  RET_PP(a,b)
65
66 # define RET_PPP(a,b,c) \
67         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
68 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
69
70 # define RET_NNNP(a,b,c,d) \
71         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
72         JMP_(ENTRY_CODE(Sp[0]));
73
74 # define RET_NPNP(a,b,c,d) \
75         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
76         JMP_(ENTRY_CODE(Sp[0]));
77
78 # define RET_NNPNNP(a,b,c,d,e,f) \
79         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
80         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
81         JMP_(ENTRY_CODE(Sp[0]));
82
83 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
84       defined(REG_R4) || defined(REG_R3)
85 # error RET_n macros not defined for this setup.
86
87 /*------ 2 Registers available */
88 #elif defined(REG_R2)
89
90 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
91 # define RET_N(a)     RET_P(a)
92
93 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
94                        JMP_(ENTRY_CODE(Sp[0]));
95 # define RET_NN(a,b)   RET_PP(a,b)
96 # define RET_NP(a,b)   RET_PP(a,b)
97
98 # define RET_PPP(a,b,c) \
99         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
100         JMP_(ENTRY_CODE(Sp[1]));
101 # define RET_NNP(a,b,c) \
102         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
103         JMP_(ENTRY_CODE(Sp[1]));
104
105 # define RET_NNNP(a,b,c,d)                      \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108     /*  Sp[-3] = ARGTAG(1); */                  \
109         Sp[-2] = (W_)(c);                       \
110         Sp[-1] = (W_)(d);                       \
111         Sp -= 3;                                \
112         JMP_(ENTRY_CODE(Sp[3]));
113
114 # define RET_NPNP(a,b,c,d)                      \
115         R1.w = (W_)(a);                         \
116         R2.w = (W_)(b);                         \
117     /*  Sp[-3] = ARGTAG(1); */                  \
118         Sp[-2] = (W_)(c);                       \
119         Sp[-1] = (W_)(d);                       \
120         Sp -= 3;                                \
121         JMP_(ENTRY_CODE(Sp[3]));
122
123 # define RET_NNPNNP(a,b,c,d,e,f)                \
124         R1.w = (W_)(a);                         \
125         R2.w = (W_)(b);                         \
126         Sp[-6] = (W_)(c);                       \
127         /* Sp[-5] = ARGTAG(1); */               \
128         Sp[-4] = (W_)(d);                       \
129         /* Sp[-3] = ARGTAG(1); */               \
130         Sp[-2] = (W_)(e);                       \
131         Sp[-1] = (W_)(f);                       \
132         Sp -= 6;                                \
133         JMP_(ENTRY_CODE(Sp[6]));
134
135 /*------ 1 Register available */
136 #elif defined(REG_R1)
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c) \
147         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
148         JMP_(ENTRY_CODE(Sp[2]));
149 # define RET_NNP(a,b,c) \
150         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
151         JMP_(ENTRY_CODE(Sp[3]));
152
153 # define RET_NNNP(a,b,c,d)                      \
154         R1.w = (W_)(a);                         \
155     /*  Sp[-5] = ARGTAG(1); */                  \
156         Sp[-4] = (W_)(b);                       \
157     /*  Sp[-3] = ARGTAG(1); */                  \
158         Sp[-2] = (W_)(c);                       \
159         Sp[-1] = (W_)(d);                       \
160         Sp -= 5;                                \
161         JMP_(ENTRY_CODE(Sp[5]));
162
163 # define RET_NPNP(a,b,c,d)                      \
164         R1.w = (W_)(a);                         \
165         Sp[-4] = (W_)(b);                       \
166     /*  Sp[-3] = ARGTAG(1); */                  \
167         Sp[-2] = (W_)(c);                       \
168         Sp[-1] = (W_)(d);                       \
169         Sp -= 4;                                \
170         JMP_(ENTRY_CODE(Sp[4]));
171
172 # define RET_NNPNNP(a,b,c,d,e,f)                \
173         R1.w = (W_)(a);                         \
174         Sp[-1] = (W_)(f);                       \
175         Sp[-2] = (W_)(e);                       \
176         /* Sp[-3] = ARGTAG(1); */               \
177         Sp[-4] = (W_)(d);                       \
178         /* Sp[-5] = ARGTAG(1); */               \
179         Sp[-6] = (W_)(c);                       \
180         Sp[-7] = (W_)(b);                       \
181         /* Sp[-8] = ARGTAG(1); */               \
182         Sp -= 8;                                \
183         JMP_(ENTRY_CODE(Sp[8]));
184
185 #else /* 0 Regs available */
186
187 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
188
189 #ifdef DEBUG
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
191 #else
192 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
193 #endif
194
195 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
196
197 /* Here's how to construct these macros:
198  *
199  *   N = number of N's in the name;
200  *   P = number of P's in the name;
201  *   s = N * 2 + P;
202  *   while (nonNull(name)) {
203  *     if (nextChar == 'P') {
204  *       PUSH_P(s,_);
205  *       s -= 1;
206  *     } else {
207  *       PUSH_N(s,_);
208  *       s -= 2
209  *     }
210  *   }
211  *   PUSHED(N * 2 + P);
212  */
213
214 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
215 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
216
217 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
218 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
219 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
220
221 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
222 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
223
224 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
225 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
226 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
227
228 #endif
229
230 /*-----------------------------------------------------------------------------
231   Array Primitives
232
233   Basically just new*Array - the others are all inline macros.
234
235   The size arg is always passed in R1, and the result returned in R1.
236
237   The slow entry point is for returning from a heap check, the saved
238   size argument must be re-loaded from the stack.
239   -------------------------------------------------------------------------- */
240
241 /* for objects that are *less* than the size of a word, make sure we
242  * round up to the nearest word for the size of the array.
243  */
244
245 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
246
247 FN_(newByteArrayzh_fast)
248  {
249    W_ size, stuff_size, n;
250    StgArrWords* p;
251    FB_
252      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
253      n = R1.w;
254      stuff_size = BYTES_TO_STGWORDS(n);
255      size = sizeofW(StgArrWords)+ stuff_size;
256      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
257      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
258      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
259      p->words = stuff_size;
260      TICK_RET_UNBOXED_TUP(1)
261      RET_P(p);
262    FE_
263  }
264
265 FN_(newPinnedByteArrayzh_fast)
266  {
267    W_ size, stuff_size, n;
268    StgArrWords* p;
269    FB_
270      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
271      n = R1.w;
272      stuff_size = BYTES_TO_STGWORDS(n);
273
274      // We want an 8-byte aligned array.  allocatePinned() gives us
275      // 8-byte aligned memory by default, but we want to align the
276      // *goods* inside the ArrWords object, so we have to check the
277      // size of the ArrWords header and adjust our size accordingly.
278      size = sizeofW(StgArrWords)+ stuff_size;
279      if ((sizeof(StgArrWords) & 7) != 0) {
280          size++;
281      }
282
283      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
284      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
285
286      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
287      // have to push the object forward one word so that the goods
288      // fall on an 8-byte boundary.
289      if ((sizeof(StgArrWords) & 7) != 0) {
290          ((StgPtr)p)++;
291      }
292
293      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
294      p->words = stuff_size;
295      TICK_RET_UNBOXED_TUP(1)
296      RET_P(p);
297    FE_
298  }
299
300 FN_(newArrayzh_fast)
301 {
302   W_ size, n, init;
303   StgMutArrPtrs* arr;
304   StgPtr p;
305   FB_
306     n = R1.w;
307
308     MAYBE_GC(R2_PTR,newArrayzh_fast);
309
310     size = sizeofW(StgMutArrPtrs) + n;
311     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
312     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
313
314     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
315     arr->ptrs = n;
316
317     init = R2.w;
318     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
319          p < (P_)arr + size; p++) {
320         *p = (W_)init;
321     }
322
323     TICK_RET_UNBOXED_TUP(1);
324     RET_P(arr);
325   FE_
326 }
327
328 FN_(newMutVarzh_fast)
329 {
330   StgMutVar* mv;
331   /* Args: R1.p = initialisation value */
332   FB_
333
334   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
335   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
336   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
337
338   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
339   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
340   mv->var = R1.cl;
341
342   TICK_RET_UNBOXED_TUP(1);
343   RET_P(mv);
344   FE_
345 }
346
347 /* -----------------------------------------------------------------------------
348    Foreign Object Primitives
349    -------------------------------------------------------------------------- */
350
351 FN_(mkForeignObjzh_fast)
352 {
353   /* R1.p = ptr to foreign object,
354   */
355   StgForeignObj *result;
356   FB_
357
358   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
359   TICK_ALLOC_PRIM(sizeofW(StgHeader),
360                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
361   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
362
363   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
364   SET_HDR(result,&stg_FOREIGN_info,CCCS);
365   result->data = R1.p;
366
367   /* returns (# s#, ForeignObj# #) */
368   TICK_RET_UNBOXED_TUP(1);
369   RET_P(result);
370   FE_
371 }
372
373 /* These two are out-of-line for the benefit of the NCG */
374 FN_(unsafeThawArrayzh_fast)
375 {
376   FB_
377   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
378   recordMutable((StgMutClosure*)R1.cl);
379
380   TICK_RET_UNBOXED_TUP(1);
381   RET_P(R1.p);
382   FE_
383 }
384
385 /* -----------------------------------------------------------------------------
386    Weak Pointer Primitives
387    -------------------------------------------------------------------------- */
388
389 FN_(mkWeakzh_fast)
390 {
391   /* R1.p = key
392      R2.p = value
393      R3.p = finalizer (or NULL)
394   */
395   StgWeak *w;
396   FB_
397
398   if (R3.cl == NULL) {
399     R3.cl = &stg_NO_FINALIZER_closure;
400   }
401
402   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
403   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
404                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
405   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
406
407   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
408   SET_HDR(w, &stg_WEAK_info, CCCS);
409
410   w->key        = R1.cl;
411   w->value      = R2.cl;
412   w->finalizer  = R3.cl;
413
414   w->link       = weak_ptr_list;
415   weak_ptr_list = w;
416   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
417
418   TICK_RET_UNBOXED_TUP(1);
419   RET_P(w);
420   FE_
421 }
422
423 FN_(finalizzeWeakzh_fast)
424 {
425   /* R1.p = weak ptr
426    */
427   StgDeadWeak *w;
428   StgClosure *f;
429   FB_
430   TICK_RET_UNBOXED_TUP(0);
431   w = (StgDeadWeak *)R1.p;
432
433   /* already dead? */
434   if (w->header.info == &stg_DEAD_WEAK_info) {
435       RET_NP(0,&stg_NO_FINALIZER_closure);
436   }
437
438   /* kill it */
439 #ifdef PROFILING
440   // @LDV profiling
441   // A weak pointer is inherently used, so we do not need to call
442   // LDV_recordDead_FILL_SLOP_DYNAMIC():
443   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
444   // or, LDV_recordDead():
445   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
446   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
447   // large as weak pointers, so there is no need to fill the slop, either.
448   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
449 #endif
450   //
451   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
452   //
453   w->header.info = &stg_DEAD_WEAK_info;
454 #ifdef PROFILING
455   // @LDV profiling
456   LDV_recordCreate((StgClosure *)w);
457 #endif
458   f = ((StgWeak *)w)->finalizer;
459   w->link = ((StgWeak *)w)->link;
460
461   /* return the finalizer */
462   if (f == &stg_NO_FINALIZER_closure) {
463       RET_NP(0,&stg_NO_FINALIZER_closure);
464   } else {
465       RET_NP(1,f);
466   }
467   FE_
468 }
469
470 FN_(deRefWeakzh_fast)
471 {
472   /* R1.p = weak ptr */
473   StgWeak* w;
474   I_       code;
475   P_       val;
476   FB_
477   w = (StgWeak*)R1.p;
478   if (w->header.info == &stg_WEAK_info) {
479     code = 1;
480     val = (P_)((StgWeak *)w)->value;
481   } else {
482     code = 0;
483     val = (P_)w;
484   }
485   RET_NP(code,val);
486   FE_
487 }
488
489 /* -----------------------------------------------------------------------------
490    Arbitrary-precision Integer operations.
491    -------------------------------------------------------------------------- */
492
493 FN_(int2Integerzh_fast)
494 {
495    /* arguments: R1 = Int# */
496
497    I_ val, s;           /* to avoid aliasing */
498    StgArrWords* p;      /* address of array result */
499    FB_
500
501    val = R1.i;
502    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
503    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
504    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
505
506    p = (StgArrWords *)Hp - 1;
507    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
508
509    /* mpz_set_si is inlined here, makes things simpler */
510    if (val < 0) { 
511         s  = -1;
512         *Hp = -val;
513    } else if (val > 0) {
514         s = 1;
515         *Hp = val;
516    } else {
517         s = 0;
518    }
519
520    /* returns (# size  :: Int#, 
521                  data  :: ByteArray# 
522                #)
523    */
524    TICK_RET_UNBOXED_TUP(2);
525    RET_NP(s,p);
526    FE_
527 }
528
529 FN_(word2Integerzh_fast)
530 {
531    /* arguments: R1 = Word# */
532
533    W_ val;              /* to avoid aliasing */
534    I_  s;
535    StgArrWords* p;      /* address of array result */
536    FB_
537
538    val = R1.w;
539    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
540    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
541    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
542
543    p = (StgArrWords *)Hp - 1;
544    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
545
546    if (val != 0) {
547         s = 1;
548         *Hp = val;
549    } else {
550         s = 0;
551    }
552
553    /* returns (# size  :: Int#, 
554                  data  :: ByteArray# 
555                #)
556    */
557    TICK_RET_UNBOXED_TUP(2);
558    RET_NP(s,p);
559    FE_
560 }
561
562
563 /*
564  * 'long long' primops for converting to/from Integers.
565  */
566
567 #ifdef SUPPORT_LONG_LONGS
568
569 FN_(int64ToIntegerzh_fast)
570 {
571    /* arguments: L1 = Int64# */
572
573    StgInt64  val; /* to avoid aliasing */
574    W_ hi;
575    I_  s, neg, words_needed;
576    StgArrWords* p;      /* address of array result */
577    FB_
578
579    val = (LI_)L1;
580    neg = 0;
581
582    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
583        words_needed = 2;
584    } else { 
585        /* minimum is one word */
586        words_needed = 1;
587    }
588    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
589    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
590    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
591
592    p = (StgArrWords *)(Hp-words_needed+1) - 1;
593    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
594
595    if ( val < 0LL ) {
596      neg = 1;
597      val = -val;
598    }
599
600    hi = (W_)((LW_)val / 0x100000000ULL);
601
602    if ( words_needed == 2 )  { 
603       s = 2;
604       Hp[-1] = (W_)val;
605       Hp[0] = hi;
606    } else if ( val != 0 ) {
607       s = 1;
608       Hp[0] = (W_)val;
609    }  else /* val==0 */   {
610       s = 0;
611    }
612    s = ( neg ? -s : s );
613
614    /* returns (# size  :: Int#, 
615                  data  :: ByteArray# 
616                #)
617    */
618    TICK_RET_UNBOXED_TUP(2);
619    RET_NP(s,p);
620    FE_
621 }
622
623 FN_(word64ToIntegerzh_fast)
624 {
625    /* arguments: L1 = Word64# */
626
627    StgWord64 val; /* to avoid aliasing */
628    StgWord hi;
629    I_  s, words_needed;
630    StgArrWords* p;      /* address of array result */
631    FB_
632
633    val = (LW_)L1;
634    if ( val >= 0x100000000ULL ) {
635       words_needed = 2;
636    } else {
637       words_needed = 1;
638    }
639    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
640    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
641    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
642
643    p = (StgArrWords *)(Hp-words_needed+1) - 1;
644    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
645
646    hi = (W_)((LW_)val / 0x100000000ULL);
647    if ( val >= 0x100000000ULL ) { 
648      s = 2;
649      Hp[-1] = ((W_)val);
650      Hp[0]  = (hi);
651    } else if ( val != 0 )      {
652       s = 1;
653       Hp[0] = ((W_)val);
654    } else /* val==0 */         {
655       s = 0;
656    }
657
658    /* returns (# size  :: Int#, 
659                  data  :: ByteArray# 
660                #)
661    */
662    TICK_RET_UNBOXED_TUP(2);
663    RET_NP(s,p);
664    FE_
665 }
666
667
668 #endif /* SUPPORT_LONG_LONGS */
669
670 /* ToDo: this is shockingly inefficient */
671
672 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
673 FN_(name)                                                               \
674 {                                                                       \
675   MP_INT arg1, arg2, result;                                            \
676   I_ s1, s2;                                                            \
677   StgArrWords* d1;                                                      \
678   StgArrWords* d2;                                                      \
679   FB_                                                                   \
680                                                                         \
681   /* call doYouWantToGC() */                                            \
682   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
683                                                                         \
684   d1 = (StgArrWords *)R2.p;                                             \
685   s1 = R1.i;                                                            \
686   d2 = (StgArrWords *)R4.p;                                             \
687   s2 = R3.i;                                                            \
688                                                                         \
689   arg1._mp_alloc        = d1->words;                                    \
690   arg1._mp_size         = (s1);                                         \
691   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
692   arg2._mp_alloc        = d2->words;                                    \
693   arg2._mp_size         = (s2);                                         \
694   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
695                                                                         \
696   STGCALL1(mpz_init,&result);                                           \
697                                                                         \
698   /* Perform the operation */                                           \
699   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
700                                                                         \
701   TICK_RET_UNBOXED_TUP(2);                                              \
702   RET_NP(result._mp_size,                                               \
703          result._mp_d-sizeofW(StgArrWords));                            \
704   FE_                                                                   \
705 }
706
707 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
708 FN_(name)                                                               \
709 {                                                                       \
710   MP_INT arg1, result;                                                  \
711   I_ s1;                                                                \
712   StgArrWords* d1;                                                      \
713   FB_                                                                   \
714                                                                         \
715   /* call doYouWantToGC() */                                            \
716   MAYBE_GC(R2_PTR, name);                                               \
717                                                                         \
718   d1 = (StgArrWords *)R2.p;                                             \
719   s1 = R1.i;                                                            \
720                                                                         \
721   arg1._mp_alloc        = d1->words;                                    \
722   arg1._mp_size         = (s1);                                         \
723   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
724                                                                         \
725   STGCALL1(mpz_init,&result);                                           \
726                                                                         \
727   /* Perform the operation */                                           \
728   STGCALL2(mp_fun,&result,&arg1);                                       \
729                                                                         \
730   TICK_RET_UNBOXED_TUP(2);                                              \
731   RET_NP(result._mp_size,                                               \
732          result._mp_d-sizeofW(StgArrWords));                            \
733   FE_                                                                   \
734 }
735
736 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
737 FN_(name)                                                               \
738 {                                                                       \
739   MP_INT arg1, arg2, result1, result2;                                  \
740   I_ s1, s2;                                                            \
741   StgArrWords* d1;                                                      \
742   StgArrWords* d2;                                                      \
743   FB_                                                                   \
744                                                                         \
745   /* call doYouWantToGC() */                                            \
746   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
747                                                                         \
748   d1 = (StgArrWords *)R2.p;                                             \
749   s1 = R1.i;                                                            \
750   d2 = (StgArrWords *)R4.p;                                             \
751   s2 = R3.i;                                                            \
752                                                                         \
753   arg1._mp_alloc        = d1->words;                                    \
754   arg1._mp_size         = (s1);                                         \
755   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
756   arg2._mp_alloc        = d2->words;                                    \
757   arg2._mp_size         = (s2);                                         \
758   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
759                                                                         \
760   STGCALL1(mpz_init,&result1);                                          \
761   STGCALL1(mpz_init,&result2);                                          \
762                                                                         \
763   /* Perform the operation */                                           \
764   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
765                                                                         \
766   TICK_RET_UNBOXED_TUP(4);                                              \
767   RET_NPNP(result1._mp_size,                                            \
768            result1._mp_d-sizeofW(StgArrWords),                          \
769            result2._mp_size,                                            \
770            result2._mp_d-sizeofW(StgArrWords));                         \
771   FE_                                                                   \
772 }
773
774 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
775 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
776 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
777 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
778 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
779 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
780 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
781 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
782 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
783 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
784 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
785
786 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
787 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
788
789
790 FN_(gcdIntzh_fast)
791 {
792   /* R1 = the first Int#; R2 = the second Int# */
793   mp_limb_t aa;
794   I_ r;
795   FB_
796   aa = (mp_limb_t)(R1.i);
797   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
798
799   R1.i = r;
800   /* Result parked in R1, return via info-pointer at TOS */
801   JMP_(ENTRY_CODE(Sp[0]));
802   FE_
803 }
804
805 FN_(gcdIntegerIntzh_fast)
806 {
807   /* R1 = s1; R2 = d1; R3 = the int */
808   I_ r;
809   FB_
810   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
811
812   R1.i = r;
813   /* Result parked in R1, return via info-pointer at TOS */
814   JMP_(ENTRY_CODE(Sp[0]));
815   FE_
816 }
817
818 FN_(cmpIntegerIntzh_fast)
819 {
820   /* R1 = s1; R2 = d1; R3 = the int */
821   I_ usize;
822   I_ vsize;
823   I_ v_digit;
824   mp_limb_t u_digit;
825   FB_
826
827   usize = R1.i;
828   vsize = 0;
829   v_digit = R3.i;
830
831   // paraphrased from mpz_cmp_si() in the GMP sources
832   if (v_digit > 0) {
833       vsize = 1;
834   } else if (v_digit < 0) {
835       vsize = -1;
836       v_digit = -v_digit;
837   }
838
839   if (usize != vsize) {
840     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
841   }
842
843   if (usize == 0) {
844     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
845   }
846
847   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
848
849   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
850     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
851   }
852
853   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
854     R1.i = usize; 
855   } else {
856     R1.i = -usize; 
857   }
858
859   JMP_(ENTRY_CODE(Sp[0]));
860   FE_
861 }
862
863 FN_(cmpIntegerzh_fast)
864 {
865   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
866   I_ usize;
867   I_ vsize;
868   I_ size;
869   StgPtr up, vp;
870   int cmp;
871   FB_
872
873   // paraphrased from mpz_cmp() in the GMP sources
874   usize = R1.i;
875   vsize = R3.i;
876
877   if (usize != vsize) {
878     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
879   }
880
881   if (usize == 0) {
882     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
883   }
884
885   size = abs(usize);
886
887   up = BYTE_ARR_CTS(R2.p);
888   vp = BYTE_ARR_CTS(R4.p);
889
890   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
891
892   if (cmp == 0) {
893     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
894   }
895
896   if ((cmp < 0) == (usize < 0)) {
897     R1.i = 1;
898   } else {
899     R1.i = (-1); 
900   }
901   /* Result parked in R1, return via info-pointer at TOS */
902   JMP_(ENTRY_CODE(Sp[0]));
903   FE_
904 }
905
906 FN_(integer2Intzh_fast)
907 {
908   /* R1 = s; R2 = d */
909   I_ r, s;
910   FB_
911   s = R1.i;
912   if (s == 0)
913     r = 0;
914   else {
915     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
916     if (s < 0) r = -r;
917   }
918   /* Result parked in R1, return via info-pointer at TOS */
919   R1.i = r;
920   JMP_(ENTRY_CODE(Sp[0]));
921   FE_
922 }
923
924 FN_(integer2Wordzh_fast)
925 {
926   /* R1 = s; R2 = d */
927   I_ s;
928   W_ r;
929   FB_
930   s = R1.i;
931   if (s == 0)
932     r = 0;
933   else {
934     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
935     if (s < 0) r = -r;
936   }
937   /* Result parked in R1, return via info-pointer at TOS */
938   R1.w = r;
939   JMP_(ENTRY_CODE(Sp[0]));
940   FE_
941 }
942
943
944 FN_(decodeFloatzh_fast)
945
946   MP_INT mantissa;
947   I_ exponent;
948   StgArrWords* p;
949   StgFloat arg;
950   FB_
951
952   /* arguments: F1 = Float# */
953   arg = F1;
954
955   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
956   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
957   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
958
959   /* Be prepared to tell Lennart-coded __decodeFloat    */
960   /* where mantissa._mp_d can be put (it does not care about the rest) */
961   p = (StgArrWords *)Hp - 1;
962   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
963   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
964
965   /* Perform the operation */
966   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
967
968   /* returns: (Int# (expn), Int#, ByteArray#) */
969   TICK_RET_UNBOXED_TUP(3);
970   RET_NNP(exponent,mantissa._mp_size,p);
971   FE_
972 }
973
974 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
975 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
976
977 FN_(decodeDoublezh_fast)
978 { MP_INT mantissa;
979   I_ exponent;
980   StgDouble arg;
981   StgArrWords* p;
982   FB_
983
984   /* arguments: D1 = Double# */
985   arg = D1;
986
987   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
988   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
989   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
990
991   /* Be prepared to tell Lennart-coded __decodeDouble   */
992   /* where mantissa.d can be put (it does not care about the rest) */
993   p = (StgArrWords *)(Hp-ARR_SIZE+1);
994   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
995   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
996
997   /* Perform the operation */
998   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
999
1000   /* returns: (Int# (expn), Int#, ByteArray#) */
1001   TICK_RET_UNBOXED_TUP(3);
1002   RET_NNP(exponent,mantissa._mp_size,p);
1003   FE_
1004 }
1005
1006 /* -----------------------------------------------------------------------------
1007  * Concurrency primitives
1008  * -------------------------------------------------------------------------- */
1009
1010 FN_(forkzh_fast)
1011 {
1012   FB_
1013   /* args: R1 = closure to spark */
1014   
1015   MAYBE_GC(R1_PTR, forkzh_fast);
1016
1017   /* create it right now, return ThreadID in R1 */
1018   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1019                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1020   STGCALL1(scheduleThread, R1.t);
1021       
1022   /* switch at the earliest opportunity */ 
1023   context_switch = 1;
1024   
1025   RET_P(R1.t);
1026   FE_
1027 }
1028
1029 FN_(forkProcesszh_fast)
1030 {
1031   pid_t pid;
1032
1033   FB_
1034   /* args: none */
1035   /* result: Pid */
1036
1037   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1038
1039   JMP_(ENTRY_CODE(Sp[0]));
1040
1041   FE_
1042 }
1043
1044 FN_(yieldzh_fast)
1045 {
1046   FB_
1047   JMP_(stg_yield_noregs);
1048   FE_
1049 }
1050
1051 FN_(myThreadIdzh_fast)
1052 {
1053   /* no args. */
1054   FB_
1055   RET_P((P_)CurrentTSO);
1056   FE_
1057 }
1058
1059 FN_(labelThreadzh_fast)
1060 {
1061   FB_
1062   /* args: 
1063         R1.p = ThreadId#
1064         R2.p = Addr# */
1065 #ifdef DEBUG
1066   STGCALL2(labelThread,(StgTSO *)R1.p,(char *)R2.p);
1067 #endif
1068   JMP_(ENTRY_CODE(Sp[0]));
1069   FE_
1070 }
1071
1072
1073 /* -----------------------------------------------------------------------------
1074  * MVar primitives
1075  *
1076  * take & putMVar work as follows.  Firstly, an important invariant:
1077  *
1078  *    If the MVar is full, then the blocking queue contains only
1079  *    threads blocked on putMVar, and if the MVar is empty then the
1080  *    blocking queue contains only threads blocked on takeMVar.
1081  *
1082  * takeMvar:
1083  *    MVar empty : then add ourselves to the blocking queue
1084  *    MVar full  : remove the value from the MVar, and
1085  *                 blocking queue empty     : return
1086  *                 blocking queue non-empty : perform the first blocked putMVar
1087  *                                            from the queue, and wake up the
1088  *                                            thread (MVar is now full again)
1089  *
1090  * putMVar is just the dual of the above algorithm.
1091  *
1092  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1093  * the stack of the thread waiting to do the putMVar.  See
1094  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1095  * the stack layout, and the PerformPut and PerformTake macros below.
1096  *
1097  * It is important that a blocked take or put is woken up with the
1098  * take/put already performed, because otherwise there would be a
1099  * small window of vulnerability where the thread could receive an
1100  * exception and never perform its take or put, and we'd end up with a
1101  * deadlock.
1102  *
1103  * -------------------------------------------------------------------------- */
1104
1105 FN_(isEmptyMVarzh_fast)
1106 {
1107   /* args: R1 = MVar closure */
1108   I_ r;
1109   FB_
1110   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1111   RET_N(r);
1112   FE_
1113 }
1114
1115
1116 FN_(newMVarzh_fast)
1117 {
1118   StgMVar *mvar;
1119
1120   FB_
1121   /* args: none */
1122
1123   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1124   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1125                   1, 0);
1126   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1127   
1128   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1129   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1130   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1131   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1132
1133   TICK_RET_UNBOXED_TUP(1);
1134   RET_P(mvar);
1135   FE_
1136 }
1137
1138 /* If R1 isn't available, pass it on the stack */
1139 #ifdef REG_R1
1140 #define PerformTake(tso, value) ({              \
1141     (tso)->sp[1] = (W_)value;                   \
1142     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1143   })
1144 #else
1145 #define PerformTake(tso, value) ({              \
1146     (tso)->sp[1] = (W_)value;                   \
1147     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1148   })
1149 #endif
1150
1151
1152 #define PerformPut(tso) ({                              \
1153     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1154     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1155     (tso)->sp += 2;                                     \
1156     val;                                                \
1157   })
1158
1159 FN_(takeMVarzh_fast)
1160 {
1161   StgMVar *mvar;
1162   StgClosure *val;
1163   const StgInfoTable *info;
1164
1165   FB_
1166   /* args: R1 = MVar closure */
1167
1168   mvar = (StgMVar *)R1.p;
1169
1170 #ifdef SMP
1171   info = LOCK_CLOSURE(mvar);
1172 #else
1173   info = GET_INFO(mvar);
1174 #endif
1175
1176   /* If the MVar is empty, put ourselves on its blocking queue,
1177    * and wait until we're woken up.
1178    */
1179   if (info == &stg_EMPTY_MVAR_info) {
1180     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1181       mvar->head = CurrentTSO;
1182     } else {
1183       mvar->tail->link = CurrentTSO;
1184     }
1185     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1186     CurrentTSO->why_blocked = BlockedOnMVar;
1187     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1188     mvar->tail = CurrentTSO;
1189
1190 #ifdef SMP
1191     /* unlock the MVar */
1192     mvar->header.info = &stg_EMPTY_MVAR_info;
1193 #endif
1194     JMP_(stg_block_takemvar);
1195   }
1196
1197   /* we got the value... */
1198   val = mvar->value;
1199
1200   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1201       /* There are putMVar(s) waiting... 
1202        * wake up the first thread on the queue
1203        */
1204       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1205
1206       /* actually perform the putMVar for the thread that we just woke up */
1207       mvar->value = PerformPut(mvar->head);
1208
1209 #if defined(GRAN) || defined(PAR)
1210       /* ToDo: check 2nd arg (mvar) is right */
1211       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1212 #else
1213       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1214 #endif
1215       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1216           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1217       }
1218 #ifdef SMP
1219       /* unlock in the SMP case */
1220       SET_INFO(mvar,&stg_FULL_MVAR_info);
1221 #endif
1222       TICK_RET_UNBOXED_TUP(1);
1223       RET_P(val);
1224   } else {
1225       /* No further putMVars, MVar is now empty */
1226
1227       /* do this last... we might have locked the MVar in the SMP case,
1228        * and writing the info pointer will unlock it.
1229        */
1230       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1231       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1232       TICK_RET_UNBOXED_TUP(1);
1233       RET_P(val);
1234   }
1235   FE_
1236 }
1237
1238 FN_(tryTakeMVarzh_fast)
1239 {
1240   StgMVar *mvar;
1241   StgClosure *val;
1242   const StgInfoTable *info;
1243
1244   FB_
1245   /* args: R1 = MVar closure */
1246
1247   mvar = (StgMVar *)R1.p;
1248
1249 #ifdef SMP
1250   info = LOCK_CLOSURE(mvar);
1251 #else
1252   info = GET_INFO(mvar);
1253 #endif
1254
1255   if (info == &stg_EMPTY_MVAR_info) {
1256
1257 #ifdef SMP
1258       /* unlock the MVar */
1259       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1260 #endif
1261
1262       /* HACK: we need a pointer to pass back, 
1263        * so we abuse NO_FINALIZER_closure
1264        */
1265       RET_NP(0, &stg_NO_FINALIZER_closure);
1266   }
1267
1268   /* we got the value... */
1269   val = mvar->value;
1270
1271   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1272       /* There are putMVar(s) waiting... 
1273        * wake up the first thread on the queue
1274        */
1275       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1276
1277       /* actually perform the putMVar for the thread that we just woke up */
1278       mvar->value = PerformPut(mvar->head);
1279
1280 #if defined(GRAN) || defined(PAR)
1281       /* ToDo: check 2nd arg (mvar) is right */
1282       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1283 #else
1284       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1285 #endif
1286       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1287           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1288       }
1289 #ifdef SMP
1290       /* unlock in the SMP case */
1291       SET_INFO(mvar,&stg_FULL_MVAR_info);
1292 #endif
1293   } else {
1294       /* No further putMVars, MVar is now empty */
1295       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1296
1297       /* do this last... we might have locked the MVar in the SMP case,
1298        * and writing the info pointer will unlock it.
1299        */
1300       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1301   }
1302
1303   TICK_RET_UNBOXED_TUP(1);
1304   RET_NP((I_)1, val);
1305   FE_
1306 }
1307
1308 FN_(putMVarzh_fast)
1309 {
1310   StgMVar *mvar;
1311   const StgInfoTable *info;
1312
1313   FB_
1314   /* args: R1 = MVar, R2 = value */
1315
1316   mvar = (StgMVar *)R1.p;
1317
1318 #ifdef SMP
1319   info = LOCK_CLOSURE(mvar);
1320 #else
1321   info = GET_INFO(mvar);
1322 #endif
1323
1324   if (info == &stg_FULL_MVAR_info) {
1325     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1326       mvar->head = CurrentTSO;
1327     } else {
1328       mvar->tail->link = CurrentTSO;
1329     }
1330     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1331     CurrentTSO->why_blocked = BlockedOnMVar;
1332     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1333     mvar->tail = CurrentTSO;
1334
1335 #ifdef SMP
1336     /* unlock the MVar */
1337     SET_INFO(mvar,&stg_FULL_MVAR_info);
1338 #endif
1339     JMP_(stg_block_putmvar);
1340   }
1341   
1342   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1343       /* There are takeMVar(s) waiting: wake up the first one
1344        */
1345       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1346
1347       /* actually perform the takeMVar */
1348       PerformTake(mvar->head, R2.cl);
1349       
1350 #if defined(GRAN) || defined(PAR)
1351       /* ToDo: check 2nd arg (mvar) is right */
1352       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1353 #else
1354       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1355 #endif
1356       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1357           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1358       }
1359 #ifdef SMP
1360       /* unlocks the MVar in the SMP case */
1361       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1362 #endif
1363       JMP_(ENTRY_CODE(Sp[0]));
1364   } else {
1365       /* No further takes, the MVar is now full. */
1366       mvar->value = R2.cl;
1367       /* unlocks the MVar in the SMP case */
1368       SET_INFO(mvar,&stg_FULL_MVAR_info);
1369       JMP_(ENTRY_CODE(Sp[0]));
1370   }
1371
1372   /* ToDo: yield afterward for better communication performance? */
1373   FE_
1374 }
1375
1376 FN_(tryPutMVarzh_fast)
1377 {
1378   StgMVar *mvar;
1379   const StgInfoTable *info;
1380
1381   FB_
1382   /* args: R1 = MVar, R2 = value */
1383
1384   mvar = (StgMVar *)R1.p;
1385
1386 #ifdef SMP
1387   info = LOCK_CLOSURE(mvar);
1388 #else
1389   info = GET_INFO(mvar);
1390 #endif
1391
1392   if (info == &stg_FULL_MVAR_info) {
1393
1394 #ifdef SMP
1395     /* unlock the MVar */
1396     mvar->header.info = &stg_FULL_MVAR_info;
1397 #endif
1398
1399     RET_N(0);
1400   }
1401   
1402   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1403       /* There are takeMVar(s) waiting: wake up the first one
1404        */
1405       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1406
1407       /* actually perform the takeMVar */
1408       PerformTake(mvar->head, R2.cl);
1409       
1410 #if defined(GRAN) || defined(PAR)
1411       /* ToDo: check 2nd arg (mvar) is right */
1412       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1413 #else
1414       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1415 #endif
1416       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1417           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1418       }
1419 #ifdef SMP
1420       /* unlocks the MVar in the SMP case */
1421       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1422 #endif
1423       JMP_(ENTRY_CODE(Sp[0]));
1424   } else {
1425       /* No further takes, the MVar is now full. */
1426       mvar->value = R2.cl;
1427       /* unlocks the MVar in the SMP case */
1428       SET_INFO(mvar,&stg_FULL_MVAR_info);
1429       JMP_(ENTRY_CODE(Sp[0]));
1430   }
1431
1432   /* ToDo: yield afterward for better communication performance? */
1433   FE_
1434 }
1435
1436 /* -----------------------------------------------------------------------------
1437    Stable pointer primitives
1438    -------------------------------------------------------------------------  */
1439
1440 FN_(makeStableNamezh_fast)
1441 {
1442   StgWord index;
1443   StgStableName *sn_obj;
1444   FB_
1445
1446   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1447   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1448                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1449   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1450   
1451   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1452
1453   /* Is there already a StableName for this heap object? */
1454   if (stable_ptr_table[index].sn_obj == NULL) {
1455     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1456     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1457     sn_obj->sn = index;
1458     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1459   } else {
1460     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1461   }
1462
1463   TICK_RET_UNBOXED_TUP(1);
1464   RET_P(sn_obj);
1465 }
1466
1467
1468 FN_(makeStablePtrzh_fast)
1469 {
1470   /* Args: R1 = a */
1471   StgStablePtr sp;
1472   FB_
1473   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1474   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1475   RET_N(sp);
1476   FE_
1477 }
1478
1479 FN_(deRefStablePtrzh_fast)
1480 {
1481   /* Args: R1 = the stable ptr */
1482   P_ r;
1483   StgStablePtr sp;
1484   FB_
1485   sp = (StgStablePtr)R1.w;
1486   r = stable_ptr_table[(StgWord)sp].addr;
1487   RET_P(r);
1488   FE_
1489 }
1490
1491 /* -----------------------------------------------------------------------------
1492    Bytecode object primitives
1493    -------------------------------------------------------------------------  */
1494
1495 FN_(newBCOzh_fast)
1496 {
1497   /* R1.p = instrs
1498      R2.p = literals
1499      R3.p = ptrs
1500      R4.p = itbls
1501   */
1502   StgBCO *bco;
1503   FB_
1504
1505   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1506   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1507   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1508   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1509   SET_HDR(bco, &stg_BCO_info, CCCS);
1510
1511   bco->instrs     = (StgArrWords*)R1.cl;
1512   bco->literals   = (StgArrWords*)R2.cl;
1513   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1514   bco->itbls      = (StgArrWords*)R4.cl;
1515
1516   TICK_RET_UNBOXED_TUP(1);
1517   RET_P(bco);
1518   FE_
1519 }
1520
1521 FN_(mkApUpd0zh_fast)
1522 {
1523   /* R1.p = the fn for the AP_UPD
1524   */
1525   StgAP_UPD* ap;
1526   FB_
1527   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1528   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1529   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1530   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1531   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1532
1533   ap->n_args = 0;
1534   ap->fun = R1.cl;
1535
1536   TICK_RET_UNBOXED_TUP(1);
1537   RET_P(ap);
1538   FE_
1539 }
1540
1541 /* -----------------------------------------------------------------------------
1542    Thread I/O blocking primitives
1543    -------------------------------------------------------------------------- */
1544
1545 FN_(waitReadzh_fast)
1546 {
1547   FB_
1548     /* args: R1.i */
1549     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1550     CurrentTSO->why_blocked = BlockedOnRead;
1551     CurrentTSO->block_info.fd = R1.i;
1552     ACQUIRE_LOCK(&sched_mutex);
1553     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1554     RELEASE_LOCK(&sched_mutex);
1555     JMP_(stg_block_noregs);
1556   FE_
1557 }
1558
1559 FN_(waitWritezh_fast)
1560 {
1561   FB_
1562     /* args: R1.i */
1563     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1564     CurrentTSO->why_blocked = BlockedOnWrite;
1565     CurrentTSO->block_info.fd = R1.i;
1566     ACQUIRE_LOCK(&sched_mutex);
1567     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1568     RELEASE_LOCK(&sched_mutex);
1569     JMP_(stg_block_noregs);
1570   FE_
1571 }
1572
1573 FN_(delayzh_fast)
1574 {
1575   StgTSO *t, *prev;
1576   nat target;
1577   FB_
1578     /* args: R1.i */
1579     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1580     CurrentTSO->why_blocked = BlockedOnDelay;
1581
1582     ACQUIRE_LOCK(&sched_mutex);
1583
1584     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1585     CurrentTSO->block_info.target = target;
1586
1587     /* Insert the new thread in the sleeping queue. */
1588     prev = NULL;
1589     t = sleeping_queue;
1590     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1591         prev = t;
1592         t = t->link;
1593     }
1594
1595     CurrentTSO->link = t;
1596     if (prev == NULL) {
1597         sleeping_queue = CurrentTSO;
1598     } else {
1599         prev->link = CurrentTSO;
1600     }
1601
1602     RELEASE_LOCK(&sched_mutex);
1603     JMP_(stg_block_noregs);
1604   FE_
1605 }
1606