[project @ 2002-12-05 23:49:43 by mthomas]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.102 2002/10/22 11:01:19 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 #ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h>
27 #endif
28
29 #include <stdlib.h>
30
31 /* ** temporary **
32
33    classes CCallable and CReturnable don't really exist, but the
34    compiler insists on generating dictionaries containing references
35    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
36    for these.  Some C compilers can't cope with zero-length static arrays,
37    so we have to make these one element long.
38 */
39
40 StgWord GHC_ZCCCallable_static_info[1];
41 StgWord GHC_ZCCReturnable_static_info[1];
42   
43 /* -----------------------------------------------------------------------------
44    Macros for Hand-written primitives.
45    -------------------------------------------------------------------------- */
46
47 /*
48  * Horrible macros for returning unboxed tuples.
49  *
50  * How an unboxed tuple is returned depends on two factors:
51  *    - the number of real registers we have available
52  *    - the boxedness of the returned fields.
53  *
54  * To return an unboxed tuple from a primitive operation, we have macros
55  * RET_<layout> where <layout> describes the boxedness of each field of the
56  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
57  *
58  * We only define the cases actually used, to avoid having too much
59  * garbage in this section.  Warning: any bugs in here will be hard to
60  * track down.
61  */
62
63 /*------ All Regs available */
64 #if defined(REG_R8)
65 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
66 # define RET_N(a)     RET_P(a)
67
68 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
69 # define RET_NN(a,b)  RET_PP(a,b)
70 # define RET_NP(a,b)  RET_PP(a,b)
71
72 # define RET_PPP(a,b,c) \
73         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
74 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
75
76 # define RET_NNNP(a,b,c,d) \
77         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
78         JMP_(ENTRY_CODE(Sp[0]));
79
80 # define RET_NPNP(a,b,c,d) \
81         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
82         JMP_(ENTRY_CODE(Sp[0]));
83
84 # define RET_NNPNNP(a,b,c,d,e,f) \
85         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
86         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
87         JMP_(ENTRY_CODE(Sp[0]));
88
89 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
90       defined(REG_R4) || defined(REG_R3)
91 # error RET_n macros not defined for this setup.
92
93 /*------ 2 Registers available */
94 #elif defined(REG_R2)
95
96 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
97 # define RET_N(a)     RET_P(a)
98
99 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
100                        JMP_(ENTRY_CODE(Sp[0]));
101 # define RET_NN(a,b)   RET_PP(a,b)
102 # define RET_NP(a,b)   RET_PP(a,b)
103
104 # define RET_PPP(a,b,c) \
105         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
106         JMP_(ENTRY_CODE(Sp[1]));
107 # define RET_NNP(a,b,c) \
108         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
109         JMP_(ENTRY_CODE(Sp[1]));
110
111 # define RET_NNNP(a,b,c,d)                      \
112         R1.w = (W_)(a);                         \
113         R2.w = (W_)(b);                         \
114     /*  Sp[-3] = ARGTAG(1); */                  \
115         Sp[-2] = (W_)(c);                       \
116         Sp[-1] = (W_)(d);                       \
117         Sp -= 3;                                \
118         JMP_(ENTRY_CODE(Sp[3]));
119
120 # define RET_NPNP(a,b,c,d)                      \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123     /*  Sp[-3] = ARGTAG(1); */                  \
124         Sp[-2] = (W_)(c);                       \
125         Sp[-1] = (W_)(d);                       \
126         Sp -= 3;                                \
127         JMP_(ENTRY_CODE(Sp[3]));
128
129 # define RET_NNPNNP(a,b,c,d,e,f)                \
130         R1.w = (W_)(a);                         \
131         R2.w = (W_)(b);                         \
132         Sp[-6] = (W_)(c);                       \
133         /* Sp[-5] = ARGTAG(1); */               \
134         Sp[-4] = (W_)(d);                       \
135         /* Sp[-3] = ARGTAG(1); */               \
136         Sp[-2] = (W_)(e);                       \
137         Sp[-1] = (W_)(f);                       \
138         Sp -= 6;                                \
139         JMP_(ENTRY_CODE(Sp[6]));
140
141 /*------ 1 Register available */
142 #elif defined(REG_R1)
143 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
144 # define RET_N(a)     RET_P(a)
145
146 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
147                        JMP_(ENTRY_CODE(Sp[1]));
148 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
149                        JMP_(ENTRY_CODE(Sp[2]));
150 # define RET_NP(a,b)   RET_PP(a,b)
151
152 # define RET_PPP(a,b,c) \
153         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
154         JMP_(ENTRY_CODE(Sp[2]));
155 # define RET_NNP(a,b,c) \
156         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
157         JMP_(ENTRY_CODE(Sp[3]));
158
159 # define RET_NNNP(a,b,c,d)                      \
160         R1.w = (W_)(a);                         \
161     /*  Sp[-5] = ARGTAG(1); */                  \
162         Sp[-4] = (W_)(b);                       \
163     /*  Sp[-3] = ARGTAG(1); */                  \
164         Sp[-2] = (W_)(c);                       \
165         Sp[-1] = (W_)(d);                       \
166         Sp -= 5;                                \
167         JMP_(ENTRY_CODE(Sp[5]));
168
169 # define RET_NPNP(a,b,c,d)                      \
170         R1.w = (W_)(a);                         \
171         Sp[-4] = (W_)(b);                       \
172     /*  Sp[-3] = ARGTAG(1); */                  \
173         Sp[-2] = (W_)(c);                       \
174         Sp[-1] = (W_)(d);                       \
175         Sp -= 4;                                \
176         JMP_(ENTRY_CODE(Sp[4]));
177
178 # define RET_NNPNNP(a,b,c,d,e,f)                \
179         R1.w = (W_)(a);                         \
180         Sp[-1] = (W_)(f);                       \
181         Sp[-2] = (W_)(e);                       \
182         /* Sp[-3] = ARGTAG(1); */               \
183         Sp[-4] = (W_)(d);                       \
184         /* Sp[-5] = ARGTAG(1); */               \
185         Sp[-6] = (W_)(c);                       \
186         Sp[-7] = (W_)(b);                       \
187         /* Sp[-8] = ARGTAG(1); */               \
188         Sp -= 8;                                \
189         JMP_(ENTRY_CODE(Sp[8]));
190
191 #else /* 0 Regs available */
192
193 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
194
195 #ifdef DEBUG
196 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
197 #else
198 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
199 #endif
200
201 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
202
203 /* Here's how to construct these macros:
204  *
205  *   N = number of N's in the name;
206  *   P = number of P's in the name;
207  *   s = N * 2 + P;
208  *   while (nonNull(name)) {
209  *     if (nextChar == 'P') {
210  *       PUSH_P(s,_);
211  *       s -= 1;
212  *     } else {
213  *       PUSH_N(s,_);
214  *       s -= 2
215  *     }
216  *   }
217  *   PUSHED(N * 2 + P);
218  */
219
220 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
221 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
222
223 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
224 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
225 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
226
227 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
228 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
229
230 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
231 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
232 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
233
234 #endif
235
236 /*-----------------------------------------------------------------------------
237   Array Primitives
238
239   Basically just new*Array - the others are all inline macros.
240
241   The size arg is always passed in R1, and the result returned in R1.
242
243   The slow entry point is for returning from a heap check, the saved
244   size argument must be re-loaded from the stack.
245   -------------------------------------------------------------------------- */
246
247 /* for objects that are *less* than the size of a word, make sure we
248  * round up to the nearest word for the size of the array.
249  */
250
251 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
252
253 FN_(newByteArrayzh_fast)
254  {
255    W_ size, stuff_size, n;
256    StgArrWords* p;
257    FB_
258      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
259      n = R1.w;
260      stuff_size = BYTES_TO_STGWORDS(n);
261      size = sizeofW(StgArrWords)+ stuff_size;
262      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
263      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
264      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
265      p->words = stuff_size;
266      TICK_RET_UNBOXED_TUP(1)
267      RET_P(p);
268    FE_
269  }
270
271 FN_(newPinnedByteArrayzh_fast)
272  {
273    W_ size, stuff_size, n;
274    StgArrWords* p;
275    FB_
276      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
277      n = R1.w;
278      stuff_size = BYTES_TO_STGWORDS(n);
279
280      // We want an 8-byte aligned array.  allocatePinned() gives us
281      // 8-byte aligned memory by default, but we want to align the
282      // *goods* inside the ArrWords object, so we have to check the
283      // size of the ArrWords header and adjust our size accordingly.
284      size = sizeofW(StgArrWords)+ stuff_size;
285      if ((sizeof(StgArrWords) & 7) != 0) {
286          size++;
287      }
288
289      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
290      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
291
292      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
293      // have to push the object forward one word so that the goods
294      // fall on an 8-byte boundary.
295      if ((sizeof(StgArrWords) & 7) != 0) {
296          ((StgPtr)p)++;
297      }
298
299      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
300      p->words = stuff_size;
301      TICK_RET_UNBOXED_TUP(1)
302      RET_P(p);
303    FE_
304  }
305
306 FN_(newArrayzh_fast)
307 {
308   W_ size, n, init;
309   StgMutArrPtrs* arr;
310   StgPtr p;
311   FB_
312     n = R1.w;
313
314     MAYBE_GC(R2_PTR,newArrayzh_fast);
315
316     size = sizeofW(StgMutArrPtrs) + n;
317     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
318     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
319
320     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
321     arr->ptrs = n;
322
323     init = R2.w;
324     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
325          p < (P_)arr + size; p++) {
326         *p = (W_)init;
327     }
328
329     TICK_RET_UNBOXED_TUP(1);
330     RET_P(arr);
331   FE_
332 }
333
334 FN_(newMutVarzh_fast)
335 {
336   StgMutVar* mv;
337   /* Args: R1.p = initialisation value */
338   FB_
339
340   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
341   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
342   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
343
344   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
345   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
346   mv->var = R1.cl;
347
348   TICK_RET_UNBOXED_TUP(1);
349   RET_P(mv);
350   FE_
351 }
352
353 FN_(atomicModifyMutVarzh_fast)
354 {
355    StgMutVar* mv;
356    StgClosure *z, *x, *y, *r;
357    FB_
358    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
359
360    /* If x is the current contents of the MutVar#, then 
361       We want to make the new contents point to
362
363          (sel_0 (f x))
364  
365       and the return value is
366
367          (sel_1 (f x))
368
369       obviously we can share (f x).
370
371          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
372          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
373          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
374    */
375
376 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
377 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
378
379    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast,);
380    CCS_ALLOC(CCCS,SIZE);
381
382    x = ((StgMutVar *)R1.cl)->var;
383
384    TICK_ALLOC_UP_THK(2,0); // XXX
385    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
386    SET_HDR(z, &stg_ap_2_upd_info, CCCS);
387    z->payload[0] = R2.cl;
388    z->payload[1] = x;
389
390    TICK_ALLOC_UP_THK(1,1); // XXX
391    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
392    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
393    y->payload[0] = z;
394
395    ((StgMutVar *)R1.cl)->var = y;
396
397    TICK_ALLOC_UP_THK(1,1); // XXX
398    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
399    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
400    r->payload[0] = z;
401
402    RET_P(r);
403    JMP_(ENTRY_CODE(Sp[0]));
404    FE_
405 }
406
407 /* -----------------------------------------------------------------------------
408    Foreign Object Primitives
409    -------------------------------------------------------------------------- */
410
411 FN_(mkForeignObjzh_fast)
412 {
413   /* R1.p = ptr to foreign object,
414   */
415   StgForeignObj *result;
416   FB_
417
418   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
419   TICK_ALLOC_PRIM(sizeofW(StgHeader),
420                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
421   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
422
423   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
424   SET_HDR(result,&stg_FOREIGN_info,CCCS);
425   result->data = R1.p;
426
427   /* returns (# s#, ForeignObj# #) */
428   TICK_RET_UNBOXED_TUP(1);
429   RET_P(result);
430   FE_
431 }
432
433 /* These two are out-of-line for the benefit of the NCG */
434 FN_(unsafeThawArrayzh_fast)
435 {
436   FB_
437   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
438   recordMutable((StgMutClosure*)R1.cl);
439
440   TICK_RET_UNBOXED_TUP(1);
441   RET_P(R1.p);
442   FE_
443 }
444
445 /* -----------------------------------------------------------------------------
446    Weak Pointer Primitives
447    -------------------------------------------------------------------------- */
448
449 FN_(mkWeakzh_fast)
450 {
451   /* R1.p = key
452      R2.p = value
453      R3.p = finalizer (or NULL)
454   */
455   StgWeak *w;
456   FB_
457
458   if (R3.cl == NULL) {
459     R3.cl = &stg_NO_FINALIZER_closure;
460   }
461
462   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
463   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
464                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
465   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
466
467   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
468   SET_HDR(w, &stg_WEAK_info, CCCS);
469
470   w->key        = R1.cl;
471   w->value      = R2.cl;
472   w->finalizer  = R3.cl;
473
474   w->link       = weak_ptr_list;
475   weak_ptr_list = w;
476   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
477
478   TICK_RET_UNBOXED_TUP(1);
479   RET_P(w);
480   FE_
481 }
482
483 FN_(finalizzeWeakzh_fast)
484 {
485   /* R1.p = weak ptr
486    */
487   StgDeadWeak *w;
488   StgClosure *f;
489   FB_
490   TICK_RET_UNBOXED_TUP(0);
491   w = (StgDeadWeak *)R1.p;
492
493   /* already dead? */
494   if (w->header.info == &stg_DEAD_WEAK_info) {
495       RET_NP(0,&stg_NO_FINALIZER_closure);
496   }
497
498   /* kill it */
499 #ifdef PROFILING
500   // @LDV profiling
501   // A weak pointer is inherently used, so we do not need to call
502   // LDV_recordDead_FILL_SLOP_DYNAMIC():
503   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
504   // or, LDV_recordDead():
505   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
506   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
507   // large as weak pointers, so there is no need to fill the slop, either.
508   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
509 #endif
510   //
511   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
512   //
513   w->header.info = &stg_DEAD_WEAK_info;
514 #ifdef PROFILING
515   // @LDV profiling
516   LDV_recordCreate((StgClosure *)w);
517 #endif
518   f = ((StgWeak *)w)->finalizer;
519   w->link = ((StgWeak *)w)->link;
520
521   /* return the finalizer */
522   if (f == &stg_NO_FINALIZER_closure) {
523       RET_NP(0,&stg_NO_FINALIZER_closure);
524   } else {
525       RET_NP(1,f);
526   }
527   FE_
528 }
529
530 FN_(deRefWeakzh_fast)
531 {
532   /* R1.p = weak ptr */
533   StgWeak* w;
534   I_       code;
535   P_       val;
536   FB_
537   w = (StgWeak*)R1.p;
538   if (w->header.info == &stg_WEAK_info) {
539     code = 1;
540     val = (P_)((StgWeak *)w)->value;
541   } else {
542     code = 0;
543     val = (P_)w;
544   }
545   RET_NP(code,val);
546   FE_
547 }
548
549 /* -----------------------------------------------------------------------------
550    Arbitrary-precision Integer operations.
551    -------------------------------------------------------------------------- */
552
553 FN_(int2Integerzh_fast)
554 {
555    /* arguments: R1 = Int# */
556
557    I_ val, s;           /* to avoid aliasing */
558    StgArrWords* p;      /* address of array result */
559    FB_
560
561    val = R1.i;
562    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
563    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
564    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
565
566    p = (StgArrWords *)Hp - 1;
567    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
568
569    /* mpz_set_si is inlined here, makes things simpler */
570    if (val < 0) { 
571         s  = -1;
572         *Hp = -val;
573    } else if (val > 0) {
574         s = 1;
575         *Hp = val;
576    } else {
577         s = 0;
578    }
579
580    /* returns (# size  :: Int#, 
581                  data  :: ByteArray# 
582                #)
583    */
584    TICK_RET_UNBOXED_TUP(2);
585    RET_NP(s,p);
586    FE_
587 }
588
589 FN_(word2Integerzh_fast)
590 {
591    /* arguments: R1 = Word# */
592
593    W_ val;              /* to avoid aliasing */
594    I_  s;
595    StgArrWords* p;      /* address of array result */
596    FB_
597
598    val = R1.w;
599    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
600    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
601    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
602
603    p = (StgArrWords *)Hp - 1;
604    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
605
606    if (val != 0) {
607         s = 1;
608         *Hp = val;
609    } else {
610         s = 0;
611    }
612
613    /* returns (# size  :: Int#, 
614                  data  :: ByteArray# 
615                #)
616    */
617    TICK_RET_UNBOXED_TUP(2);
618    RET_NP(s,p);
619    FE_
620 }
621
622
623 /*
624  * 'long long' primops for converting to/from Integers.
625  */
626
627 #ifdef SUPPORT_LONG_LONGS
628
629 FN_(int64ToIntegerzh_fast)
630 {
631    /* arguments: L1 = Int64# */
632
633    StgInt64  val; /* to avoid aliasing */
634    W_ hi;
635    I_  s, neg, words_needed;
636    StgArrWords* p;      /* address of array result */
637    FB_
638
639    val = (LI_)L1;
640    neg = 0;
641
642    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
643        words_needed = 2;
644    } else { 
645        /* minimum is one word */
646        words_needed = 1;
647    }
648    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
649    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
650    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
651
652    p = (StgArrWords *)(Hp-words_needed+1) - 1;
653    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
654
655    if ( val < 0LL ) {
656      neg = 1;
657      val = -val;
658    }
659
660    hi = (W_)((LW_)val / 0x100000000ULL);
661
662    if ( words_needed == 2 )  { 
663       s = 2;
664       Hp[-1] = (W_)val;
665       Hp[0] = hi;
666    } else if ( val != 0 ) {
667       s = 1;
668       Hp[0] = (W_)val;
669    }  else /* val==0 */   {
670       s = 0;
671    }
672    s = ( neg ? -s : s );
673
674    /* returns (# size  :: Int#, 
675                  data  :: ByteArray# 
676                #)
677    */
678    TICK_RET_UNBOXED_TUP(2);
679    RET_NP(s,p);
680    FE_
681 }
682
683 FN_(word64ToIntegerzh_fast)
684 {
685    /* arguments: L1 = Word64# */
686
687    StgWord64 val; /* to avoid aliasing */
688    StgWord hi;
689    I_  s, words_needed;
690    StgArrWords* p;      /* address of array result */
691    FB_
692
693    val = (LW_)L1;
694    if ( val >= 0x100000000ULL ) {
695       words_needed = 2;
696    } else {
697       words_needed = 1;
698    }
699    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
700    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
701    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
702
703    p = (StgArrWords *)(Hp-words_needed+1) - 1;
704    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
705
706    hi = (W_)((LW_)val / 0x100000000ULL);
707    if ( val >= 0x100000000ULL ) { 
708      s = 2;
709      Hp[-1] = ((W_)val);
710      Hp[0]  = (hi);
711    } else if ( val != 0 )      {
712       s = 1;
713       Hp[0] = ((W_)val);
714    } else /* val==0 */         {
715       s = 0;
716    }
717
718    /* returns (# size  :: Int#, 
719                  data  :: ByteArray# 
720                #)
721    */
722    TICK_RET_UNBOXED_TUP(2);
723    RET_NP(s,p);
724    FE_
725 }
726
727
728 #endif /* SUPPORT_LONG_LONGS */
729
730 /* ToDo: this is shockingly inefficient */
731
732 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
733 FN_(name)                                                               \
734 {                                                                       \
735   MP_INT arg1, arg2, result;                                            \
736   I_ s1, s2;                                                            \
737   StgArrWords* d1;                                                      \
738   StgArrWords* d2;                                                      \
739   FB_                                                                   \
740                                                                         \
741   /* call doYouWantToGC() */                                            \
742   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
743                                                                         \
744   d1 = (StgArrWords *)R2.p;                                             \
745   s1 = R1.i;                                                            \
746   d2 = (StgArrWords *)R4.p;                                             \
747   s2 = R3.i;                                                            \
748                                                                         \
749   arg1._mp_alloc        = d1->words;                                    \
750   arg1._mp_size         = (s1);                                         \
751   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
752   arg2._mp_alloc        = d2->words;                                    \
753   arg2._mp_size         = (s2);                                         \
754   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
755                                                                         \
756   STGCALL1(mpz_init,&result);                                           \
757                                                                         \
758   /* Perform the operation */                                           \
759   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
760                                                                         \
761   TICK_RET_UNBOXED_TUP(2);                                              \
762   RET_NP(result._mp_size,                                               \
763          result._mp_d-sizeofW(StgArrWords));                            \
764   FE_                                                                   \
765 }
766
767 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
768 FN_(name)                                                               \
769 {                                                                       \
770   MP_INT arg1, result;                                                  \
771   I_ s1;                                                                \
772   StgArrWords* d1;                                                      \
773   FB_                                                                   \
774                                                                         \
775   /* call doYouWantToGC() */                                            \
776   MAYBE_GC(R2_PTR, name);                                               \
777                                                                         \
778   d1 = (StgArrWords *)R2.p;                                             \
779   s1 = R1.i;                                                            \
780                                                                         \
781   arg1._mp_alloc        = d1->words;                                    \
782   arg1._mp_size         = (s1);                                         \
783   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
784                                                                         \
785   STGCALL1(mpz_init,&result);                                           \
786                                                                         \
787   /* Perform the operation */                                           \
788   STGCALL2(mp_fun,&result,&arg1);                                       \
789                                                                         \
790   TICK_RET_UNBOXED_TUP(2);                                              \
791   RET_NP(result._mp_size,                                               \
792          result._mp_d-sizeofW(StgArrWords));                            \
793   FE_                                                                   \
794 }
795
796 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
797 FN_(name)                                                               \
798 {                                                                       \
799   MP_INT arg1, arg2, result1, result2;                                  \
800   I_ s1, s2;                                                            \
801   StgArrWords* d1;                                                      \
802   StgArrWords* d2;                                                      \
803   FB_                                                                   \
804                                                                         \
805   /* call doYouWantToGC() */                                            \
806   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
807                                                                         \
808   d1 = (StgArrWords *)R2.p;                                             \
809   s1 = R1.i;                                                            \
810   d2 = (StgArrWords *)R4.p;                                             \
811   s2 = R3.i;                                                            \
812                                                                         \
813   arg1._mp_alloc        = d1->words;                                    \
814   arg1._mp_size         = (s1);                                         \
815   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
816   arg2._mp_alloc        = d2->words;                                    \
817   arg2._mp_size         = (s2);                                         \
818   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
819                                                                         \
820   STGCALL1(mpz_init,&result1);                                          \
821   STGCALL1(mpz_init,&result2);                                          \
822                                                                         \
823   /* Perform the operation */                                           \
824   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
825                                                                         \
826   TICK_RET_UNBOXED_TUP(4);                                              \
827   RET_NPNP(result1._mp_size,                                            \
828            result1._mp_d-sizeofW(StgArrWords),                          \
829            result2._mp_size,                                            \
830            result2._mp_d-sizeofW(StgArrWords));                         \
831   FE_                                                                   \
832 }
833
834 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
835 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
836 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
837 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
838 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
839 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
840 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
841 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
842 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
843 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
844 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
845
846 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
847 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
848
849
850 FN_(gcdIntzh_fast)
851 {
852   /* R1 = the first Int#; R2 = the second Int# */
853   mp_limb_t aa;
854   I_ r;
855   FB_
856   aa = (mp_limb_t)(R1.i);
857   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
858
859   R1.i = r;
860   /* Result parked in R1, return via info-pointer at TOS */
861   JMP_(ENTRY_CODE(Sp[0]));
862   FE_
863 }
864
865 FN_(gcdIntegerIntzh_fast)
866 {
867   /* R1 = s1; R2 = d1; R3 = the int */
868   I_ r;
869   FB_
870   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
871
872   R1.i = r;
873   /* Result parked in R1, return via info-pointer at TOS */
874   JMP_(ENTRY_CODE(Sp[0]));
875   FE_
876 }
877
878 FN_(cmpIntegerIntzh_fast)
879 {
880   /* R1 = s1; R2 = d1; R3 = the int */
881   I_ usize;
882   I_ vsize;
883   I_ v_digit;
884   mp_limb_t u_digit;
885   FB_
886
887   usize = R1.i;
888   vsize = 0;
889   v_digit = R3.i;
890
891   // paraphrased from mpz_cmp_si() in the GMP sources
892   if (v_digit > 0) {
893       vsize = 1;
894   } else if (v_digit < 0) {
895       vsize = -1;
896       v_digit = -v_digit;
897   }
898
899   if (usize != vsize) {
900     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
901   }
902
903   if (usize == 0) {
904     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
905   }
906
907   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
908
909   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
910     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
911   }
912
913   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
914     R1.i = usize; 
915   } else {
916     R1.i = -usize; 
917   }
918
919   JMP_(ENTRY_CODE(Sp[0]));
920   FE_
921 }
922
923 FN_(cmpIntegerzh_fast)
924 {
925   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
926   I_ usize;
927   I_ vsize;
928   I_ size;
929   StgPtr up, vp;
930   int cmp;
931   FB_
932
933   // paraphrased from mpz_cmp() in the GMP sources
934   usize = R1.i;
935   vsize = R3.i;
936
937   if (usize != vsize) {
938     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
939   }
940
941   if (usize == 0) {
942     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
943   }
944
945   size = abs(usize);
946
947   up = BYTE_ARR_CTS(R2.p);
948   vp = BYTE_ARR_CTS(R4.p);
949
950   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
951
952   if (cmp == 0) {
953     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
954   }
955
956   if ((cmp < 0) == (usize < 0)) {
957     R1.i = 1;
958   } else {
959     R1.i = (-1); 
960   }
961   /* Result parked in R1, return via info-pointer at TOS */
962   JMP_(ENTRY_CODE(Sp[0]));
963   FE_
964 }
965
966 FN_(integer2Intzh_fast)
967 {
968   /* R1 = s; R2 = d */
969   I_ r, s;
970   FB_
971   s = R1.i;
972   if (s == 0)
973     r = 0;
974   else {
975     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
976     if (s < 0) r = -r;
977   }
978   /* Result parked in R1, return via info-pointer at TOS */
979   R1.i = r;
980   JMP_(ENTRY_CODE(Sp[0]));
981   FE_
982 }
983
984 FN_(integer2Wordzh_fast)
985 {
986   /* R1 = s; R2 = d */
987   I_ s;
988   W_ r;
989   FB_
990   s = R1.i;
991   if (s == 0)
992     r = 0;
993   else {
994     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
995     if (s < 0) r = -r;
996   }
997   /* Result parked in R1, return via info-pointer at TOS */
998   R1.w = r;
999   JMP_(ENTRY_CODE(Sp[0]));
1000   FE_
1001 }
1002
1003
1004 FN_(decodeFloatzh_fast)
1005
1006   MP_INT mantissa;
1007   I_ exponent;
1008   StgArrWords* p;
1009   StgFloat arg;
1010   FB_
1011
1012   /* arguments: F1 = Float# */
1013   arg = F1;
1014
1015   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
1016   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
1017   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
1018
1019   /* Be prepared to tell Lennart-coded __decodeFloat    */
1020   /* where mantissa._mp_d can be put (it does not care about the rest) */
1021   p = (StgArrWords *)Hp - 1;
1022   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
1023   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1024
1025   /* Perform the operation */
1026   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
1027
1028   /* returns: (Int# (expn), Int#, ByteArray#) */
1029   TICK_RET_UNBOXED_TUP(3);
1030   RET_NNP(exponent,mantissa._mp_size,p);
1031   FE_
1032 }
1033
1034 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
1035 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
1036
1037 FN_(decodeDoublezh_fast)
1038 { MP_INT mantissa;
1039   I_ exponent;
1040   StgDouble arg;
1041   StgArrWords* p;
1042   FB_
1043
1044   /* arguments: D1 = Double# */
1045   arg = D1;
1046
1047   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
1048   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1049   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1050
1051   /* Be prepared to tell Lennart-coded __decodeDouble   */
1052   /* where mantissa.d can be put (it does not care about the rest) */
1053   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1054   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1055   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1056
1057   /* Perform the operation */
1058   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1059
1060   /* returns: (Int# (expn), Int#, ByteArray#) */
1061   TICK_RET_UNBOXED_TUP(3);
1062   RET_NNP(exponent,mantissa._mp_size,p);
1063   FE_
1064 }
1065
1066 /* -----------------------------------------------------------------------------
1067  * Concurrency primitives
1068  * -------------------------------------------------------------------------- */
1069
1070 FN_(forkzh_fast)
1071 {
1072   FB_
1073   /* args: R1 = closure to spark */
1074   
1075   MAYBE_GC(R1_PTR, forkzh_fast);
1076
1077   /* create it right now, return ThreadID in R1 */
1078   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1079                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1080   STGCALL1(scheduleThread, R1.t);
1081       
1082   /* switch at the earliest opportunity */ 
1083   context_switch = 1;
1084   
1085   RET_P(R1.t);
1086   FE_
1087 }
1088
1089 FN_(forkProcesszh_fast)
1090 {
1091   pid_t pid;
1092
1093   FB_
1094   /* args: none */
1095   /* result: Pid */
1096
1097   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1098
1099   JMP_(ENTRY_CODE(Sp[0]));
1100
1101   FE_
1102 }
1103
1104 FN_(yieldzh_fast)
1105 {
1106   FB_
1107   JMP_(stg_yield_noregs);
1108   FE_
1109 }
1110
1111 FN_(myThreadIdzh_fast)
1112 {
1113   /* no args. */
1114   FB_
1115   RET_P((P_)CurrentTSO);
1116   FE_
1117 }
1118
1119 FN_(labelThreadzh_fast)
1120 {
1121   FB_
1122   /* args: 
1123         R1.p = ThreadId#
1124         R2.p = Addr# */
1125 #ifdef DEBUG
1126   STGCALL2(labelThread,R1.p,(char *)R2.p);
1127 #endif
1128   JMP_(ENTRY_CODE(Sp[0]));
1129   FE_
1130 }
1131
1132
1133 /* -----------------------------------------------------------------------------
1134  * MVar primitives
1135  *
1136  * take & putMVar work as follows.  Firstly, an important invariant:
1137  *
1138  *    If the MVar is full, then the blocking queue contains only
1139  *    threads blocked on putMVar, and if the MVar is empty then the
1140  *    blocking queue contains only threads blocked on takeMVar.
1141  *
1142  * takeMvar:
1143  *    MVar empty : then add ourselves to the blocking queue
1144  *    MVar full  : remove the value from the MVar, and
1145  *                 blocking queue empty     : return
1146  *                 blocking queue non-empty : perform the first blocked putMVar
1147  *                                            from the queue, and wake up the
1148  *                                            thread (MVar is now full again)
1149  *
1150  * putMVar is just the dual of the above algorithm.
1151  *
1152  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1153  * the stack of the thread waiting to do the putMVar.  See
1154  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1155  * the stack layout, and the PerformPut and PerformTake macros below.
1156  *
1157  * It is important that a blocked take or put is woken up with the
1158  * take/put already performed, because otherwise there would be a
1159  * small window of vulnerability where the thread could receive an
1160  * exception and never perform its take or put, and we'd end up with a
1161  * deadlock.
1162  *
1163  * -------------------------------------------------------------------------- */
1164
1165 FN_(isEmptyMVarzh_fast)
1166 {
1167   /* args: R1 = MVar closure */
1168   I_ r;
1169   FB_
1170   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1171   RET_N(r);
1172   FE_
1173 }
1174
1175
1176 FN_(newMVarzh_fast)
1177 {
1178   StgMVar *mvar;
1179
1180   FB_
1181   /* args: none */
1182
1183   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1184   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1185                   1, 0);
1186   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1187   
1188   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1189   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1190   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1191   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1192
1193   TICK_RET_UNBOXED_TUP(1);
1194   RET_P(mvar);
1195   FE_
1196 }
1197
1198 /* If R1 isn't available, pass it on the stack */
1199 #ifdef REG_R1
1200 #define PerformTake(tso, value) ({              \
1201     (tso)->sp[1] = (W_)value;                   \
1202     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1203   })
1204 #else
1205 #define PerformTake(tso, value) ({              \
1206     (tso)->sp[1] = (W_)value;                   \
1207     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1208   })
1209 #endif
1210
1211
1212 #define PerformPut(tso) ({                              \
1213     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1214     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1215     (tso)->sp += 2;                                     \
1216     val;                                                \
1217   })
1218
1219 FN_(takeMVarzh_fast)
1220 {
1221   StgMVar *mvar;
1222   StgClosure *val;
1223   const StgInfoTable *info;
1224
1225   FB_
1226   /* args: R1 = MVar closure */
1227
1228   mvar = (StgMVar *)R1.p;
1229
1230 #ifdef SMP
1231   info = LOCK_CLOSURE(mvar);
1232 #else
1233   info = GET_INFO(mvar);
1234 #endif
1235
1236   /* If the MVar is empty, put ourselves on its blocking queue,
1237    * and wait until we're woken up.
1238    */
1239   if (info == &stg_EMPTY_MVAR_info) {
1240     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1241       mvar->head = CurrentTSO;
1242     } else {
1243       mvar->tail->link = CurrentTSO;
1244     }
1245     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1246     CurrentTSO->why_blocked = BlockedOnMVar;
1247     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1248     mvar->tail = CurrentTSO;
1249
1250 #ifdef SMP
1251     /* unlock the MVar */
1252     mvar->header.info = &stg_EMPTY_MVAR_info;
1253 #endif
1254     JMP_(stg_block_takemvar);
1255   }
1256
1257   /* we got the value... */
1258   val = mvar->value;
1259
1260   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1261       /* There are putMVar(s) waiting... 
1262        * wake up the first thread on the queue
1263        */
1264       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1265
1266       /* actually perform the putMVar for the thread that we just woke up */
1267       mvar->value = PerformPut(mvar->head);
1268
1269 #if defined(GRAN) || defined(PAR)
1270       /* ToDo: check 2nd arg (mvar) is right */
1271       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1272 #else
1273       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1274 #endif
1275       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1276           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1277       }
1278 #ifdef SMP
1279       /* unlock in the SMP case */
1280       SET_INFO(mvar,&stg_FULL_MVAR_info);
1281 #endif
1282       TICK_RET_UNBOXED_TUP(1);
1283       RET_P(val);
1284   } else {
1285       /* No further putMVars, MVar is now empty */
1286
1287       /* do this last... we might have locked the MVar in the SMP case,
1288        * and writing the info pointer will unlock it.
1289        */
1290       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1291       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1292       TICK_RET_UNBOXED_TUP(1);
1293       RET_P(val);
1294   }
1295   FE_
1296 }
1297
1298 FN_(tryTakeMVarzh_fast)
1299 {
1300   StgMVar *mvar;
1301   StgClosure *val;
1302   const StgInfoTable *info;
1303
1304   FB_
1305   /* args: R1 = MVar closure */
1306
1307   mvar = (StgMVar *)R1.p;
1308
1309 #ifdef SMP
1310   info = LOCK_CLOSURE(mvar);
1311 #else
1312   info = GET_INFO(mvar);
1313 #endif
1314
1315   if (info == &stg_EMPTY_MVAR_info) {
1316
1317 #ifdef SMP
1318       /* unlock the MVar */
1319       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1320 #endif
1321
1322       /* HACK: we need a pointer to pass back, 
1323        * so we abuse NO_FINALIZER_closure
1324        */
1325       RET_NP(0, &stg_NO_FINALIZER_closure);
1326   }
1327
1328   /* we got the value... */
1329   val = mvar->value;
1330
1331   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1332       /* There are putMVar(s) waiting... 
1333        * wake up the first thread on the queue
1334        */
1335       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1336
1337       /* actually perform the putMVar for the thread that we just woke up */
1338       mvar->value = PerformPut(mvar->head);
1339
1340 #if defined(GRAN) || defined(PAR)
1341       /* ToDo: check 2nd arg (mvar) is right */
1342       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1343 #else
1344       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1345 #endif
1346       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1347           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1348       }
1349 #ifdef SMP
1350       /* unlock in the SMP case */
1351       SET_INFO(mvar,&stg_FULL_MVAR_info);
1352 #endif
1353   } else {
1354       /* No further putMVars, MVar is now empty */
1355       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1356
1357       /* do this last... we might have locked the MVar in the SMP case,
1358        * and writing the info pointer will unlock it.
1359        */
1360       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1361   }
1362
1363   TICK_RET_UNBOXED_TUP(1);
1364   RET_NP((I_)1, val);
1365   FE_
1366 }
1367
1368 FN_(putMVarzh_fast)
1369 {
1370   StgMVar *mvar;
1371   const StgInfoTable *info;
1372
1373   FB_
1374   /* args: R1 = MVar, R2 = value */
1375
1376   mvar = (StgMVar *)R1.p;
1377
1378 #ifdef SMP
1379   info = LOCK_CLOSURE(mvar);
1380 #else
1381   info = GET_INFO(mvar);
1382 #endif
1383
1384   if (info == &stg_FULL_MVAR_info) {
1385     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1386       mvar->head = CurrentTSO;
1387     } else {
1388       mvar->tail->link = CurrentTSO;
1389     }
1390     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1391     CurrentTSO->why_blocked = BlockedOnMVar;
1392     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1393     mvar->tail = CurrentTSO;
1394
1395 #ifdef SMP
1396     /* unlock the MVar */
1397     SET_INFO(mvar,&stg_FULL_MVAR_info);
1398 #endif
1399     JMP_(stg_block_putmvar);
1400   }
1401   
1402   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1403       /* There are takeMVar(s) waiting: wake up the first one
1404        */
1405       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1406
1407       /* actually perform the takeMVar */
1408       PerformTake(mvar->head, R2.cl);
1409       
1410 #if defined(GRAN) || defined(PAR)
1411       /* ToDo: check 2nd arg (mvar) is right */
1412       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1413 #else
1414       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1415 #endif
1416       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1417           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1418       }
1419 #ifdef SMP
1420       /* unlocks the MVar in the SMP case */
1421       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1422 #endif
1423       JMP_(ENTRY_CODE(Sp[0]));
1424   } else {
1425       /* No further takes, the MVar is now full. */
1426       mvar->value = R2.cl;
1427       /* unlocks the MVar in the SMP case */
1428       SET_INFO(mvar,&stg_FULL_MVAR_info);
1429       JMP_(ENTRY_CODE(Sp[0]));
1430   }
1431
1432   /* ToDo: yield afterward for better communication performance? */
1433   FE_
1434 }
1435
1436 FN_(tryPutMVarzh_fast)
1437 {
1438   StgMVar *mvar;
1439   const StgInfoTable *info;
1440
1441   FB_
1442   /* args: R1 = MVar, R2 = value */
1443
1444   mvar = (StgMVar *)R1.p;
1445
1446 #ifdef SMP
1447   info = LOCK_CLOSURE(mvar);
1448 #else
1449   info = GET_INFO(mvar);
1450 #endif
1451
1452   if (info == &stg_FULL_MVAR_info) {
1453
1454 #ifdef SMP
1455     /* unlock the MVar */
1456     mvar->header.info = &stg_FULL_MVAR_info;
1457 #endif
1458
1459     RET_N(0);
1460   }
1461   
1462   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1463       /* There are takeMVar(s) waiting: wake up the first one
1464        */
1465       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1466
1467       /* actually perform the takeMVar */
1468       PerformTake(mvar->head, R2.cl);
1469       
1470 #if defined(GRAN) || defined(PAR)
1471       /* ToDo: check 2nd arg (mvar) is right */
1472       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1473 #else
1474       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1475 #endif
1476       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1477           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1478       }
1479 #ifdef SMP
1480       /* unlocks the MVar in the SMP case */
1481       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1482 #endif
1483       JMP_(ENTRY_CODE(Sp[0]));
1484   } else {
1485       /* No further takes, the MVar is now full. */
1486       mvar->value = R2.cl;
1487       /* unlocks the MVar in the SMP case */
1488       SET_INFO(mvar,&stg_FULL_MVAR_info);
1489       JMP_(ENTRY_CODE(Sp[0]));
1490   }
1491
1492   /* ToDo: yield afterward for better communication performance? */
1493   FE_
1494 }
1495
1496 /* -----------------------------------------------------------------------------
1497    Stable pointer primitives
1498    -------------------------------------------------------------------------  */
1499
1500 FN_(makeStableNamezh_fast)
1501 {
1502   StgWord index;
1503   StgStableName *sn_obj;
1504   FB_
1505
1506   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1507   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1508                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1509   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1510   
1511   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1512
1513   /* Is there already a StableName for this heap object? */
1514   if (stable_ptr_table[index].sn_obj == NULL) {
1515     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1516     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1517     sn_obj->sn = index;
1518     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1519   } else {
1520     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1521   }
1522
1523   TICK_RET_UNBOXED_TUP(1);
1524   RET_P(sn_obj);
1525 }
1526
1527
1528 FN_(makeStablePtrzh_fast)
1529 {
1530   /* Args: R1 = a */
1531   StgStablePtr sp;
1532   FB_
1533   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1534   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1535   RET_N(sp);
1536   FE_
1537 }
1538
1539 FN_(deRefStablePtrzh_fast)
1540 {
1541   /* Args: R1 = the stable ptr */
1542   P_ r;
1543   StgStablePtr sp;
1544   FB_
1545   sp = (StgStablePtr)R1.w;
1546   r = stable_ptr_table[(StgWord)sp].addr;
1547   RET_P(r);
1548   FE_
1549 }
1550
1551 /* -----------------------------------------------------------------------------
1552    Bytecode object primitives
1553    -------------------------------------------------------------------------  */
1554
1555 FN_(newBCOzh_fast)
1556 {
1557   /* R1.p = instrs
1558      R2.p = literals
1559      R3.p = ptrs
1560      R4.p = itbls
1561   */
1562   StgBCO *bco;
1563   FB_
1564
1565   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1566   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1567   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1568   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1569   SET_HDR(bco, &stg_BCO_info, CCCS);
1570
1571   bco->instrs     = (StgArrWords*)R1.cl;
1572   bco->literals   = (StgArrWords*)R2.cl;
1573   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1574   bco->itbls      = (StgArrWords*)R4.cl;
1575
1576   TICK_RET_UNBOXED_TUP(1);
1577   RET_P(bco);
1578   FE_
1579 }
1580
1581 FN_(mkApUpd0zh_fast)
1582 {
1583   /* R1.p = the fn for the AP_UPD
1584   */
1585   StgAP_UPD* ap;
1586   FB_
1587   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1588   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1589   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1590   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1591   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1592
1593   ap->n_args = 0;
1594   ap->fun = R1.cl;
1595
1596   TICK_RET_UNBOXED_TUP(1);
1597   RET_P(ap);
1598   FE_
1599 }
1600
1601 /* -----------------------------------------------------------------------------
1602    Thread I/O blocking primitives
1603    -------------------------------------------------------------------------- */
1604
1605 FN_(waitReadzh_fast)
1606 {
1607   FB_
1608     /* args: R1.i */
1609     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1610     CurrentTSO->why_blocked = BlockedOnRead;
1611     CurrentTSO->block_info.fd = R1.i;
1612     ACQUIRE_LOCK(&sched_mutex);
1613     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1614     RELEASE_LOCK(&sched_mutex);
1615     JMP_(stg_block_noregs);
1616   FE_
1617 }
1618
1619 FN_(waitWritezh_fast)
1620 {
1621   FB_
1622     /* args: R1.i */
1623     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1624     CurrentTSO->why_blocked = BlockedOnWrite;
1625     CurrentTSO->block_info.fd = R1.i;
1626     ACQUIRE_LOCK(&sched_mutex);
1627     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1628     RELEASE_LOCK(&sched_mutex);
1629     JMP_(stg_block_noregs);
1630   FE_
1631 }
1632
1633 FN_(delayzh_fast)
1634 {
1635   StgTSO *t, *prev;
1636   nat target;
1637   FB_
1638     /* args: R1.i */
1639     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1640     CurrentTSO->why_blocked = BlockedOnDelay;
1641
1642     ACQUIRE_LOCK(&sched_mutex);
1643
1644     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1645     CurrentTSO->block_info.target = target;
1646
1647     /* Insert the new thread in the sleeping queue. */
1648     prev = NULL;
1649     t = sleeping_queue;
1650     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1651         prev = t;
1652         t = t->link;
1653     }
1654
1655     CurrentTSO->link = t;
1656     if (prev == NULL) {
1657         sleeping_queue = CurrentTSO;
1658     } else {
1659         prev->link = CurrentTSO;
1660     }
1661
1662     RELEASE_LOCK(&sched_mutex);
1663     JMP_(stg_block_noregs);
1664   FE_
1665 }
1666