[project @ 2002-07-24 03:38:58 by sof]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.100 2002/07/17 09:21:50 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2000
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 #ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h>
27 #endif
28
29 #include <stdlib.h>
30
31 /* ** temporary **
32
33    classes CCallable and CReturnable don't really exist, but the
34    compiler insists on generating dictionaries containing references
35    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
36    for these.  Some C compilers can't cope with zero-length static arrays,
37    so we have to make these one element long.
38 */
39
40 StgWord GHC_ZCCCallable_static_info[1];
41 StgWord GHC_ZCCReturnable_static_info[1];
42   
43 /* -----------------------------------------------------------------------------
44    Macros for Hand-written primitives.
45    -------------------------------------------------------------------------- */
46
47 /*
48  * Horrible macros for returning unboxed tuples.
49  *
50  * How an unboxed tuple is returned depends on two factors:
51  *    - the number of real registers we have available
52  *    - the boxedness of the returned fields.
53  *
54  * To return an unboxed tuple from a primitive operation, we have macros
55  * RET_<layout> where <layout> describes the boxedness of each field of the
56  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
57  *
58  * We only define the cases actually used, to avoid having too much
59  * garbage in this section.  Warning: any bugs in here will be hard to
60  * track down.
61  */
62
63 /*------ All Regs available */
64 #if defined(REG_R8)
65 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
66 # define RET_N(a)     RET_P(a)
67
68 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
69 # define RET_NN(a,b)  RET_PP(a,b)
70 # define RET_NP(a,b)  RET_PP(a,b)
71
72 # define RET_PPP(a,b,c) \
73         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
74 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
75
76 # define RET_NNNP(a,b,c,d) \
77         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
78         JMP_(ENTRY_CODE(Sp[0]));
79
80 # define RET_NPNP(a,b,c,d) \
81         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
82         JMP_(ENTRY_CODE(Sp[0]));
83
84 # define RET_NNPNNP(a,b,c,d,e,f) \
85         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
86         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
87         JMP_(ENTRY_CODE(Sp[0]));
88
89 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
90       defined(REG_R4) || defined(REG_R3)
91 # error RET_n macros not defined for this setup.
92
93 /*------ 2 Registers available */
94 #elif defined(REG_R2)
95
96 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
97 # define RET_N(a)     RET_P(a)
98
99 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
100                        JMP_(ENTRY_CODE(Sp[0]));
101 # define RET_NN(a,b)   RET_PP(a,b)
102 # define RET_NP(a,b)   RET_PP(a,b)
103
104 # define RET_PPP(a,b,c) \
105         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
106         JMP_(ENTRY_CODE(Sp[1]));
107 # define RET_NNP(a,b,c) \
108         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
109         JMP_(ENTRY_CODE(Sp[1]));
110
111 # define RET_NNNP(a,b,c,d)                      \
112         R1.w = (W_)(a);                         \
113         R2.w = (W_)(b);                         \
114     /*  Sp[-3] = ARGTAG(1); */                  \
115         Sp[-2] = (W_)(c);                       \
116         Sp[-1] = (W_)(d);                       \
117         Sp -= 3;                                \
118         JMP_(ENTRY_CODE(Sp[3]));
119
120 # define RET_NPNP(a,b,c,d)                      \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123     /*  Sp[-3] = ARGTAG(1); */                  \
124         Sp[-2] = (W_)(c);                       \
125         Sp[-1] = (W_)(d);                       \
126         Sp -= 3;                                \
127         JMP_(ENTRY_CODE(Sp[3]));
128
129 # define RET_NNPNNP(a,b,c,d,e,f)                \
130         R1.w = (W_)(a);                         \
131         R2.w = (W_)(b);                         \
132         Sp[-6] = (W_)(c);                       \
133         /* Sp[-5] = ARGTAG(1); */               \
134         Sp[-4] = (W_)(d);                       \
135         /* Sp[-3] = ARGTAG(1); */               \
136         Sp[-2] = (W_)(e);                       \
137         Sp[-1] = (W_)(f);                       \
138         Sp -= 6;                                \
139         JMP_(ENTRY_CODE(Sp[6]));
140
141 /*------ 1 Register available */
142 #elif defined(REG_R1)
143 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
144 # define RET_N(a)     RET_P(a)
145
146 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
147                        JMP_(ENTRY_CODE(Sp[1]));
148 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
149                        JMP_(ENTRY_CODE(Sp[2]));
150 # define RET_NP(a,b)   RET_PP(a,b)
151
152 # define RET_PPP(a,b,c) \
153         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
154         JMP_(ENTRY_CODE(Sp[2]));
155 # define RET_NNP(a,b,c) \
156         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
157         JMP_(ENTRY_CODE(Sp[3]));
158
159 # define RET_NNNP(a,b,c,d)                      \
160         R1.w = (W_)(a);                         \
161     /*  Sp[-5] = ARGTAG(1); */                  \
162         Sp[-4] = (W_)(b);                       \
163     /*  Sp[-3] = ARGTAG(1); */                  \
164         Sp[-2] = (W_)(c);                       \
165         Sp[-1] = (W_)(d);                       \
166         Sp -= 5;                                \
167         JMP_(ENTRY_CODE(Sp[5]));
168
169 # define RET_NPNP(a,b,c,d)                      \
170         R1.w = (W_)(a);                         \
171         Sp[-4] = (W_)(b);                       \
172     /*  Sp[-3] = ARGTAG(1); */                  \
173         Sp[-2] = (W_)(c);                       \
174         Sp[-1] = (W_)(d);                       \
175         Sp -= 4;                                \
176         JMP_(ENTRY_CODE(Sp[4]));
177
178 # define RET_NNPNNP(a,b,c,d,e,f)                \
179         R1.w = (W_)(a);                         \
180         Sp[-1] = (W_)(f);                       \
181         Sp[-2] = (W_)(e);                       \
182         /* Sp[-3] = ARGTAG(1); */               \
183         Sp[-4] = (W_)(d);                       \
184         /* Sp[-5] = ARGTAG(1); */               \
185         Sp[-6] = (W_)(c);                       \
186         Sp[-7] = (W_)(b);                       \
187         /* Sp[-8] = ARGTAG(1); */               \
188         Sp -= 8;                                \
189         JMP_(ENTRY_CODE(Sp[8]));
190
191 #else /* 0 Regs available */
192
193 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
194
195 #ifdef DEBUG
196 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
197 #else
198 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
199 #endif
200
201 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
202
203 /* Here's how to construct these macros:
204  *
205  *   N = number of N's in the name;
206  *   P = number of P's in the name;
207  *   s = N * 2 + P;
208  *   while (nonNull(name)) {
209  *     if (nextChar == 'P') {
210  *       PUSH_P(s,_);
211  *       s -= 1;
212  *     } else {
213  *       PUSH_N(s,_);
214  *       s -= 2
215  *     }
216  *   }
217  *   PUSHED(N * 2 + P);
218  */
219
220 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
221 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
222
223 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
224 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
225 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
226
227 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
228 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
229
230 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
231 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
232 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
233
234 #endif
235
236 /*-----------------------------------------------------------------------------
237   Array Primitives
238
239   Basically just new*Array - the others are all inline macros.
240
241   The size arg is always passed in R1, and the result returned in R1.
242
243   The slow entry point is for returning from a heap check, the saved
244   size argument must be re-loaded from the stack.
245   -------------------------------------------------------------------------- */
246
247 /* for objects that are *less* than the size of a word, make sure we
248  * round up to the nearest word for the size of the array.
249  */
250
251 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
252
253 FN_(newByteArrayzh_fast)
254  {
255    W_ size, stuff_size, n;
256    StgArrWords* p;
257    FB_
258      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
259      n = R1.w;
260      stuff_size = BYTES_TO_STGWORDS(n);
261      size = sizeofW(StgArrWords)+ stuff_size;
262      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
263      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
264      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
265      p->words = stuff_size;
266      TICK_RET_UNBOXED_TUP(1)
267      RET_P(p);
268    FE_
269  }
270
271 FN_(newPinnedByteArrayzh_fast)
272  {
273    W_ size, stuff_size, n;
274    StgArrWords* p;
275    FB_
276      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
277      n = R1.w;
278      stuff_size = BYTES_TO_STGWORDS(n);
279
280      // We want an 8-byte aligned array.  allocatePinned() gives us
281      // 8-byte aligned memory by default, but we want to align the
282      // *goods* inside the ArrWords object, so we have to check the
283      // size of the ArrWords header and adjust our size accordingly.
284      size = sizeofW(StgArrWords)+ stuff_size;
285      if ((sizeof(StgArrWords) & 7) != 0) {
286          size++;
287      }
288
289      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
290      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
291
292      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
293      // have to push the object forward one word so that the goods
294      // fall on an 8-byte boundary.
295      if ((sizeof(StgArrWords) & 7) != 0) {
296          ((StgPtr)p)++;
297      }
298
299      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
300      p->words = stuff_size;
301      TICK_RET_UNBOXED_TUP(1)
302      RET_P(p);
303    FE_
304  }
305
306 FN_(newArrayzh_fast)
307 {
308   W_ size, n, init;
309   StgMutArrPtrs* arr;
310   StgPtr p;
311   FB_
312     n = R1.w;
313
314     MAYBE_GC(R2_PTR,newArrayzh_fast);
315
316     size = sizeofW(StgMutArrPtrs) + n;
317     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
318     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
319
320     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
321     arr->ptrs = n;
322
323     init = R2.w;
324     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
325          p < (P_)arr + size; p++) {
326         *p = (W_)init;
327     }
328
329     TICK_RET_UNBOXED_TUP(1);
330     RET_P(arr);
331   FE_
332 }
333
334 FN_(newMutVarzh_fast)
335 {
336   StgMutVar* mv;
337   /* Args: R1.p = initialisation value */
338   FB_
339
340   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
341   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
342   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
343
344   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
345   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
346   mv->var = R1.cl;
347
348   TICK_RET_UNBOXED_TUP(1);
349   RET_P(mv);
350   FE_
351 }
352
353 /* -----------------------------------------------------------------------------
354    Foreign Object Primitives
355    -------------------------------------------------------------------------- */
356
357 FN_(mkForeignObjzh_fast)
358 {
359   /* R1.p = ptr to foreign object,
360   */
361   StgForeignObj *result;
362   FB_
363
364   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
365   TICK_ALLOC_PRIM(sizeofW(StgHeader),
366                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
367   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
368
369   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
370   SET_HDR(result,&stg_FOREIGN_info,CCCS);
371   result->data = R1.p;
372
373   /* returns (# s#, ForeignObj# #) */
374   TICK_RET_UNBOXED_TUP(1);
375   RET_P(result);
376   FE_
377 }
378
379 /* These two are out-of-line for the benefit of the NCG */
380 FN_(unsafeThawArrayzh_fast)
381 {
382   FB_
383   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
384   recordMutable((StgMutClosure*)R1.cl);
385
386   TICK_RET_UNBOXED_TUP(1);
387   RET_P(R1.p);
388   FE_
389 }
390
391 /* -----------------------------------------------------------------------------
392    Weak Pointer Primitives
393    -------------------------------------------------------------------------- */
394
395 FN_(mkWeakzh_fast)
396 {
397   /* R1.p = key
398      R2.p = value
399      R3.p = finalizer (or NULL)
400   */
401   StgWeak *w;
402   FB_
403
404   if (R3.cl == NULL) {
405     R3.cl = &stg_NO_FINALIZER_closure;
406   }
407
408   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
409   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
410                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
411   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
412
413   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
414   SET_HDR(w, &stg_WEAK_info, CCCS);
415
416   w->key        = R1.cl;
417   w->value      = R2.cl;
418   w->finalizer  = R3.cl;
419
420   w->link       = weak_ptr_list;
421   weak_ptr_list = w;
422   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
423
424   TICK_RET_UNBOXED_TUP(1);
425   RET_P(w);
426   FE_
427 }
428
429 FN_(finalizzeWeakzh_fast)
430 {
431   /* R1.p = weak ptr
432    */
433   StgDeadWeak *w;
434   StgClosure *f;
435   FB_
436   TICK_RET_UNBOXED_TUP(0);
437   w = (StgDeadWeak *)R1.p;
438
439   /* already dead? */
440   if (w->header.info == &stg_DEAD_WEAK_info) {
441       RET_NP(0,&stg_NO_FINALIZER_closure);
442   }
443
444   /* kill it */
445 #ifdef PROFILING
446   // @LDV profiling
447   // A weak pointer is inherently used, so we do not need to call
448   // LDV_recordDead_FILL_SLOP_DYNAMIC():
449   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
450   // or, LDV_recordDead():
451   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
452   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
453   // large as weak pointers, so there is no need to fill the slop, either.
454   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
455 #endif
456   //
457   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
458   //
459   w->header.info = &stg_DEAD_WEAK_info;
460 #ifdef PROFILING
461   // @LDV profiling
462   LDV_recordCreate((StgClosure *)w);
463 #endif
464   f = ((StgWeak *)w)->finalizer;
465   w->link = ((StgWeak *)w)->link;
466
467   /* return the finalizer */
468   if (f == &stg_NO_FINALIZER_closure) {
469       RET_NP(0,&stg_NO_FINALIZER_closure);
470   } else {
471       RET_NP(1,f);
472   }
473   FE_
474 }
475
476 FN_(deRefWeakzh_fast)
477 {
478   /* R1.p = weak ptr */
479   StgWeak* w;
480   I_       code;
481   P_       val;
482   FB_
483   w = (StgWeak*)R1.p;
484   if (w->header.info == &stg_WEAK_info) {
485     code = 1;
486     val = (P_)((StgWeak *)w)->value;
487   } else {
488     code = 0;
489     val = (P_)w;
490   }
491   RET_NP(code,val);
492   FE_
493 }
494
495 /* -----------------------------------------------------------------------------
496    Arbitrary-precision Integer operations.
497    -------------------------------------------------------------------------- */
498
499 FN_(int2Integerzh_fast)
500 {
501    /* arguments: R1 = Int# */
502
503    I_ val, s;           /* to avoid aliasing */
504    StgArrWords* p;      /* address of array result */
505    FB_
506
507    val = R1.i;
508    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
509    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
510    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
511
512    p = (StgArrWords *)Hp - 1;
513    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
514
515    /* mpz_set_si is inlined here, makes things simpler */
516    if (val < 0) { 
517         s  = -1;
518         *Hp = -val;
519    } else if (val > 0) {
520         s = 1;
521         *Hp = val;
522    } else {
523         s = 0;
524    }
525
526    /* returns (# size  :: Int#, 
527                  data  :: ByteArray# 
528                #)
529    */
530    TICK_RET_UNBOXED_TUP(2);
531    RET_NP(s,p);
532    FE_
533 }
534
535 FN_(word2Integerzh_fast)
536 {
537    /* arguments: R1 = Word# */
538
539    W_ val;              /* to avoid aliasing */
540    I_  s;
541    StgArrWords* p;      /* address of array result */
542    FB_
543
544    val = R1.w;
545    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
546    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
547    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
548
549    p = (StgArrWords *)Hp - 1;
550    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
551
552    if (val != 0) {
553         s = 1;
554         *Hp = val;
555    } else {
556         s = 0;
557    }
558
559    /* returns (# size  :: Int#, 
560                  data  :: ByteArray# 
561                #)
562    */
563    TICK_RET_UNBOXED_TUP(2);
564    RET_NP(s,p);
565    FE_
566 }
567
568
569 /*
570  * 'long long' primops for converting to/from Integers.
571  */
572
573 #ifdef SUPPORT_LONG_LONGS
574
575 FN_(int64ToIntegerzh_fast)
576 {
577    /* arguments: L1 = Int64# */
578
579    StgInt64  val; /* to avoid aliasing */
580    W_ hi;
581    I_  s, neg, words_needed;
582    StgArrWords* p;      /* address of array result */
583    FB_
584
585    val = (LI_)L1;
586    neg = 0;
587
588    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
589        words_needed = 2;
590    } else { 
591        /* minimum is one word */
592        words_needed = 1;
593    }
594    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
595    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
596    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
597
598    p = (StgArrWords *)(Hp-words_needed+1) - 1;
599    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
600
601    if ( val < 0LL ) {
602      neg = 1;
603      val = -val;
604    }
605
606    hi = (W_)((LW_)val / 0x100000000ULL);
607
608    if ( words_needed == 2 )  { 
609       s = 2;
610       Hp[-1] = (W_)val;
611       Hp[0] = hi;
612    } else if ( val != 0 ) {
613       s = 1;
614       Hp[0] = (W_)val;
615    }  else /* val==0 */   {
616       s = 0;
617    }
618    s = ( neg ? -s : s );
619
620    /* returns (# size  :: Int#, 
621                  data  :: ByteArray# 
622                #)
623    */
624    TICK_RET_UNBOXED_TUP(2);
625    RET_NP(s,p);
626    FE_
627 }
628
629 FN_(word64ToIntegerzh_fast)
630 {
631    /* arguments: L1 = Word64# */
632
633    StgWord64 val; /* to avoid aliasing */
634    StgWord hi;
635    I_  s, words_needed;
636    StgArrWords* p;      /* address of array result */
637    FB_
638
639    val = (LW_)L1;
640    if ( val >= 0x100000000ULL ) {
641       words_needed = 2;
642    } else {
643       words_needed = 1;
644    }
645    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
646    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
647    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
648
649    p = (StgArrWords *)(Hp-words_needed+1) - 1;
650    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
651
652    hi = (W_)((LW_)val / 0x100000000ULL);
653    if ( val >= 0x100000000ULL ) { 
654      s = 2;
655      Hp[-1] = ((W_)val);
656      Hp[0]  = (hi);
657    } else if ( val != 0 )      {
658       s = 1;
659       Hp[0] = ((W_)val);
660    } else /* val==0 */         {
661       s = 0;
662    }
663
664    /* returns (# size  :: Int#, 
665                  data  :: ByteArray# 
666                #)
667    */
668    TICK_RET_UNBOXED_TUP(2);
669    RET_NP(s,p);
670    FE_
671 }
672
673
674 #endif /* SUPPORT_LONG_LONGS */
675
676 /* ToDo: this is shockingly inefficient */
677
678 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
679 FN_(name)                                                               \
680 {                                                                       \
681   MP_INT arg1, arg2, result;                                            \
682   I_ s1, s2;                                                            \
683   StgArrWords* d1;                                                      \
684   StgArrWords* d2;                                                      \
685   FB_                                                                   \
686                                                                         \
687   /* call doYouWantToGC() */                                            \
688   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
689                                                                         \
690   d1 = (StgArrWords *)R2.p;                                             \
691   s1 = R1.i;                                                            \
692   d2 = (StgArrWords *)R4.p;                                             \
693   s2 = R3.i;                                                            \
694                                                                         \
695   arg1._mp_alloc        = d1->words;                                    \
696   arg1._mp_size         = (s1);                                         \
697   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
698   arg2._mp_alloc        = d2->words;                                    \
699   arg2._mp_size         = (s2);                                         \
700   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
701                                                                         \
702   STGCALL1(mpz_init,&result);                                           \
703                                                                         \
704   /* Perform the operation */                                           \
705   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
706                                                                         \
707   TICK_RET_UNBOXED_TUP(2);                                              \
708   RET_NP(result._mp_size,                                               \
709          result._mp_d-sizeofW(StgArrWords));                            \
710   FE_                                                                   \
711 }
712
713 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
714 FN_(name)                                                               \
715 {                                                                       \
716   MP_INT arg1, result;                                                  \
717   I_ s1;                                                                \
718   StgArrWords* d1;                                                      \
719   FB_                                                                   \
720                                                                         \
721   /* call doYouWantToGC() */                                            \
722   MAYBE_GC(R2_PTR, name);                                               \
723                                                                         \
724   d1 = (StgArrWords *)R2.p;                                             \
725   s1 = R1.i;                                                            \
726                                                                         \
727   arg1._mp_alloc        = d1->words;                                    \
728   arg1._mp_size         = (s1);                                         \
729   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
730                                                                         \
731   STGCALL1(mpz_init,&result);                                           \
732                                                                         \
733   /* Perform the operation */                                           \
734   STGCALL2(mp_fun,&result,&arg1);                                       \
735                                                                         \
736   TICK_RET_UNBOXED_TUP(2);                                              \
737   RET_NP(result._mp_size,                                               \
738          result._mp_d-sizeofW(StgArrWords));                            \
739   FE_                                                                   \
740 }
741
742 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
743 FN_(name)                                                               \
744 {                                                                       \
745   MP_INT arg1, arg2, result1, result2;                                  \
746   I_ s1, s2;                                                            \
747   StgArrWords* d1;                                                      \
748   StgArrWords* d2;                                                      \
749   FB_                                                                   \
750                                                                         \
751   /* call doYouWantToGC() */                                            \
752   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
753                                                                         \
754   d1 = (StgArrWords *)R2.p;                                             \
755   s1 = R1.i;                                                            \
756   d2 = (StgArrWords *)R4.p;                                             \
757   s2 = R3.i;                                                            \
758                                                                         \
759   arg1._mp_alloc        = d1->words;                                    \
760   arg1._mp_size         = (s1);                                         \
761   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
762   arg2._mp_alloc        = d2->words;                                    \
763   arg2._mp_size         = (s2);                                         \
764   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
765                                                                         \
766   STGCALL1(mpz_init,&result1);                                          \
767   STGCALL1(mpz_init,&result2);                                          \
768                                                                         \
769   /* Perform the operation */                                           \
770   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
771                                                                         \
772   TICK_RET_UNBOXED_TUP(4);                                              \
773   RET_NPNP(result1._mp_size,                                            \
774            result1._mp_d-sizeofW(StgArrWords),                          \
775            result2._mp_size,                                            \
776            result2._mp_d-sizeofW(StgArrWords));                         \
777   FE_                                                                   \
778 }
779
780 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
781 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
782 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
783 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
784 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
785 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
786 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
787 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
788 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
789 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
790 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
791
792 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
793 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
794
795
796 FN_(gcdIntzh_fast)
797 {
798   /* R1 = the first Int#; R2 = the second Int# */
799   mp_limb_t aa;
800   I_ r;
801   FB_
802   aa = (mp_limb_t)(R1.i);
803   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
804
805   R1.i = r;
806   /* Result parked in R1, return via info-pointer at TOS */
807   JMP_(ENTRY_CODE(Sp[0]));
808   FE_
809 }
810
811 FN_(gcdIntegerIntzh_fast)
812 {
813   /* R1 = s1; R2 = d1; R3 = the int */
814   I_ r;
815   FB_
816   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
817
818   R1.i = r;
819   /* Result parked in R1, return via info-pointer at TOS */
820   JMP_(ENTRY_CODE(Sp[0]));
821   FE_
822 }
823
824 FN_(cmpIntegerIntzh_fast)
825 {
826   /* R1 = s1; R2 = d1; R3 = the int */
827   I_ usize;
828   I_ vsize;
829   I_ v_digit;
830   mp_limb_t u_digit;
831   FB_
832
833   usize = R1.i;
834   vsize = 0;
835   v_digit = R3.i;
836
837   // paraphrased from mpz_cmp_si() in the GMP sources
838   if (v_digit > 0) {
839       vsize = 1;
840   } else if (v_digit < 0) {
841       vsize = -1;
842       v_digit = -v_digit;
843   }
844
845   if (usize != vsize) {
846     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
847   }
848
849   if (usize == 0) {
850     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
851   }
852
853   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
854
855   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
856     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
857   }
858
859   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
860     R1.i = usize; 
861   } else {
862     R1.i = -usize; 
863   }
864
865   JMP_(ENTRY_CODE(Sp[0]));
866   FE_
867 }
868
869 FN_(cmpIntegerzh_fast)
870 {
871   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
872   I_ usize;
873   I_ vsize;
874   I_ size;
875   StgPtr up, vp;
876   int cmp;
877   FB_
878
879   // paraphrased from mpz_cmp() in the GMP sources
880   usize = R1.i;
881   vsize = R3.i;
882
883   if (usize != vsize) {
884     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
885   }
886
887   if (usize == 0) {
888     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
889   }
890
891   size = abs(usize);
892
893   up = BYTE_ARR_CTS(R2.p);
894   vp = BYTE_ARR_CTS(R4.p);
895
896   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
897
898   if (cmp == 0) {
899     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
900   }
901
902   if ((cmp < 0) == (usize < 0)) {
903     R1.i = 1;
904   } else {
905     R1.i = (-1); 
906   }
907   /* Result parked in R1, return via info-pointer at TOS */
908   JMP_(ENTRY_CODE(Sp[0]));
909   FE_
910 }
911
912 FN_(integer2Intzh_fast)
913 {
914   /* R1 = s; R2 = d */
915   I_ r, s;
916   FB_
917   s = R1.i;
918   if (s == 0)
919     r = 0;
920   else {
921     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
922     if (s < 0) r = -r;
923   }
924   /* Result parked in R1, return via info-pointer at TOS */
925   R1.i = r;
926   JMP_(ENTRY_CODE(Sp[0]));
927   FE_
928 }
929
930 FN_(integer2Wordzh_fast)
931 {
932   /* R1 = s; R2 = d */
933   I_ s;
934   W_ r;
935   FB_
936   s = R1.i;
937   if (s == 0)
938     r = 0;
939   else {
940     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
941     if (s < 0) r = -r;
942   }
943   /* Result parked in R1, return via info-pointer at TOS */
944   R1.w = r;
945   JMP_(ENTRY_CODE(Sp[0]));
946   FE_
947 }
948
949
950 FN_(decodeFloatzh_fast)
951
952   MP_INT mantissa;
953   I_ exponent;
954   StgArrWords* p;
955   StgFloat arg;
956   FB_
957
958   /* arguments: F1 = Float# */
959   arg = F1;
960
961   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
962   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
963   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
964
965   /* Be prepared to tell Lennart-coded __decodeFloat    */
966   /* where mantissa._mp_d can be put (it does not care about the rest) */
967   p = (StgArrWords *)Hp - 1;
968   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
969   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
970
971   /* Perform the operation */
972   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
973
974   /* returns: (Int# (expn), Int#, ByteArray#) */
975   TICK_RET_UNBOXED_TUP(3);
976   RET_NNP(exponent,mantissa._mp_size,p);
977   FE_
978 }
979
980 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
981 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
982
983 FN_(decodeDoublezh_fast)
984 { MP_INT mantissa;
985   I_ exponent;
986   StgDouble arg;
987   StgArrWords* p;
988   FB_
989
990   /* arguments: D1 = Double# */
991   arg = D1;
992
993   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
994   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
995   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
996
997   /* Be prepared to tell Lennart-coded __decodeDouble   */
998   /* where mantissa.d can be put (it does not care about the rest) */
999   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1000   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1001   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1002
1003   /* Perform the operation */
1004   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1005
1006   /* returns: (Int# (expn), Int#, ByteArray#) */
1007   TICK_RET_UNBOXED_TUP(3);
1008   RET_NNP(exponent,mantissa._mp_size,p);
1009   FE_
1010 }
1011
1012 /* -----------------------------------------------------------------------------
1013  * Concurrency primitives
1014  * -------------------------------------------------------------------------- */
1015
1016 FN_(forkzh_fast)
1017 {
1018   FB_
1019   /* args: R1 = closure to spark */
1020   
1021   MAYBE_GC(R1_PTR, forkzh_fast);
1022
1023   /* create it right now, return ThreadID in R1 */
1024   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1025                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1026   STGCALL1(scheduleThread, R1.t);
1027       
1028   /* switch at the earliest opportunity */ 
1029   context_switch = 1;
1030   
1031   RET_P(R1.t);
1032   FE_
1033 }
1034
1035 FN_(forkProcesszh_fast)
1036 {
1037   pid_t pid;
1038
1039   FB_
1040   /* args: none */
1041   /* result: Pid */
1042
1043   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1044
1045   JMP_(ENTRY_CODE(Sp[0]));
1046
1047   FE_
1048 }
1049
1050 FN_(yieldzh_fast)
1051 {
1052   FB_
1053   JMP_(stg_yield_noregs);
1054   FE_
1055 }
1056
1057 FN_(myThreadIdzh_fast)
1058 {
1059   /* no args. */
1060   FB_
1061   RET_P((P_)CurrentTSO);
1062   FE_
1063 }
1064
1065 FN_(labelThreadzh_fast)
1066 {
1067   FB_
1068   /* args: 
1069         R1.p = ThreadId#
1070         R2.p = Addr# */
1071 #ifdef DEBUG
1072   STGCALL2(labelThread,(StgTSO *)R1.p,(char *)R2.p);
1073 #endif
1074   JMP_(ENTRY_CODE(Sp[0]));
1075   FE_
1076 }
1077
1078
1079 /* -----------------------------------------------------------------------------
1080  * MVar primitives
1081  *
1082  * take & putMVar work as follows.  Firstly, an important invariant:
1083  *
1084  *    If the MVar is full, then the blocking queue contains only
1085  *    threads blocked on putMVar, and if the MVar is empty then the
1086  *    blocking queue contains only threads blocked on takeMVar.
1087  *
1088  * takeMvar:
1089  *    MVar empty : then add ourselves to the blocking queue
1090  *    MVar full  : remove the value from the MVar, and
1091  *                 blocking queue empty     : return
1092  *                 blocking queue non-empty : perform the first blocked putMVar
1093  *                                            from the queue, and wake up the
1094  *                                            thread (MVar is now full again)
1095  *
1096  * putMVar is just the dual of the above algorithm.
1097  *
1098  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1099  * the stack of the thread waiting to do the putMVar.  See
1100  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1101  * the stack layout, and the PerformPut and PerformTake macros below.
1102  *
1103  * It is important that a blocked take or put is woken up with the
1104  * take/put already performed, because otherwise there would be a
1105  * small window of vulnerability where the thread could receive an
1106  * exception and never perform its take or put, and we'd end up with a
1107  * deadlock.
1108  *
1109  * -------------------------------------------------------------------------- */
1110
1111 FN_(isEmptyMVarzh_fast)
1112 {
1113   /* args: R1 = MVar closure */
1114   I_ r;
1115   FB_
1116   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1117   RET_N(r);
1118   FE_
1119 }
1120
1121
1122 FN_(newMVarzh_fast)
1123 {
1124   StgMVar *mvar;
1125
1126   FB_
1127   /* args: none */
1128
1129   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
1130   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1131                   1, 0);
1132   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1133   
1134   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1135   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1136   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1137   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1138
1139   TICK_RET_UNBOXED_TUP(1);
1140   RET_P(mvar);
1141   FE_
1142 }
1143
1144 /* If R1 isn't available, pass it on the stack */
1145 #ifdef REG_R1
1146 #define PerformTake(tso, value) ({              \
1147     (tso)->sp[1] = (W_)value;                   \
1148     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1149   })
1150 #else
1151 #define PerformTake(tso, value) ({              \
1152     (tso)->sp[1] = (W_)value;                   \
1153     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1154   })
1155 #endif
1156
1157
1158 #define PerformPut(tso) ({                              \
1159     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1160     (tso)->sp[2] = (W_)&stg_gc_noregs_info;             \
1161     (tso)->sp += 2;                                     \
1162     val;                                                \
1163   })
1164
1165 FN_(takeMVarzh_fast)
1166 {
1167   StgMVar *mvar;
1168   StgClosure *val;
1169   const StgInfoTable *info;
1170
1171   FB_
1172   /* args: R1 = MVar closure */
1173
1174   mvar = (StgMVar *)R1.p;
1175
1176 #ifdef SMP
1177   info = LOCK_CLOSURE(mvar);
1178 #else
1179   info = GET_INFO(mvar);
1180 #endif
1181
1182   /* If the MVar is empty, put ourselves on its blocking queue,
1183    * and wait until we're woken up.
1184    */
1185   if (info == &stg_EMPTY_MVAR_info) {
1186     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1187       mvar->head = CurrentTSO;
1188     } else {
1189       mvar->tail->link = CurrentTSO;
1190     }
1191     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1192     CurrentTSO->why_blocked = BlockedOnMVar;
1193     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1194     mvar->tail = CurrentTSO;
1195
1196 #ifdef SMP
1197     /* unlock the MVar */
1198     mvar->header.info = &stg_EMPTY_MVAR_info;
1199 #endif
1200     JMP_(stg_block_takemvar);
1201   }
1202
1203   /* we got the value... */
1204   val = mvar->value;
1205
1206   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1207       /* There are putMVar(s) waiting... 
1208        * wake up the first thread on the queue
1209        */
1210       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1211
1212       /* actually perform the putMVar for the thread that we just woke up */
1213       mvar->value = PerformPut(mvar->head);
1214
1215 #if defined(GRAN) || defined(PAR)
1216       /* ToDo: check 2nd arg (mvar) is right */
1217       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1218 #else
1219       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1220 #endif
1221       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1222           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1223       }
1224 #ifdef SMP
1225       /* unlock in the SMP case */
1226       SET_INFO(mvar,&stg_FULL_MVAR_info);
1227 #endif
1228       TICK_RET_UNBOXED_TUP(1);
1229       RET_P(val);
1230   } else {
1231       /* No further putMVars, MVar is now empty */
1232
1233       /* do this last... we might have locked the MVar in the SMP case,
1234        * and writing the info pointer will unlock it.
1235        */
1236       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1237       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1238       TICK_RET_UNBOXED_TUP(1);
1239       RET_P(val);
1240   }
1241   FE_
1242 }
1243
1244 FN_(tryTakeMVarzh_fast)
1245 {
1246   StgMVar *mvar;
1247   StgClosure *val;
1248   const StgInfoTable *info;
1249
1250   FB_
1251   /* args: R1 = MVar closure */
1252
1253   mvar = (StgMVar *)R1.p;
1254
1255 #ifdef SMP
1256   info = LOCK_CLOSURE(mvar);
1257 #else
1258   info = GET_INFO(mvar);
1259 #endif
1260
1261   if (info == &stg_EMPTY_MVAR_info) {
1262
1263 #ifdef SMP
1264       /* unlock the MVar */
1265       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1266 #endif
1267
1268       /* HACK: we need a pointer to pass back, 
1269        * so we abuse NO_FINALIZER_closure
1270        */
1271       RET_NP(0, &stg_NO_FINALIZER_closure);
1272   }
1273
1274   /* we got the value... */
1275   val = mvar->value;
1276
1277   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1278       /* There are putMVar(s) waiting... 
1279        * wake up the first thread on the queue
1280        */
1281       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1282
1283       /* actually perform the putMVar for the thread that we just woke up */
1284       mvar->value = PerformPut(mvar->head);
1285
1286 #if defined(GRAN) || defined(PAR)
1287       /* ToDo: check 2nd arg (mvar) is right */
1288       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1289 #else
1290       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1291 #endif
1292       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1293           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1294       }
1295 #ifdef SMP
1296       /* unlock in the SMP case */
1297       SET_INFO(mvar,&stg_FULL_MVAR_info);
1298 #endif
1299   } else {
1300       /* No further putMVars, MVar is now empty */
1301       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1302
1303       /* do this last... we might have locked the MVar in the SMP case,
1304        * and writing the info pointer will unlock it.
1305        */
1306       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1307   }
1308
1309   TICK_RET_UNBOXED_TUP(1);
1310   RET_NP((I_)1, val);
1311   FE_
1312 }
1313
1314 FN_(putMVarzh_fast)
1315 {
1316   StgMVar *mvar;
1317   const StgInfoTable *info;
1318
1319   FB_
1320   /* args: R1 = MVar, R2 = value */
1321
1322   mvar = (StgMVar *)R1.p;
1323
1324 #ifdef SMP
1325   info = LOCK_CLOSURE(mvar);
1326 #else
1327   info = GET_INFO(mvar);
1328 #endif
1329
1330   if (info == &stg_FULL_MVAR_info) {
1331     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1332       mvar->head = CurrentTSO;
1333     } else {
1334       mvar->tail->link = CurrentTSO;
1335     }
1336     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1337     CurrentTSO->why_blocked = BlockedOnMVar;
1338     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1339     mvar->tail = CurrentTSO;
1340
1341 #ifdef SMP
1342     /* unlock the MVar */
1343     SET_INFO(mvar,&stg_FULL_MVAR_info);
1344 #endif
1345     JMP_(stg_block_putmvar);
1346   }
1347   
1348   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1349       /* There are takeMVar(s) waiting: wake up the first one
1350        */
1351       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1352
1353       /* actually perform the takeMVar */
1354       PerformTake(mvar->head, R2.cl);
1355       
1356 #if defined(GRAN) || defined(PAR)
1357       /* ToDo: check 2nd arg (mvar) is right */
1358       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1359 #else
1360       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1361 #endif
1362       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1363           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1364       }
1365 #ifdef SMP
1366       /* unlocks the MVar in the SMP case */
1367       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1368 #endif
1369       JMP_(ENTRY_CODE(Sp[0]));
1370   } else {
1371       /* No further takes, the MVar is now full. */
1372       mvar->value = R2.cl;
1373       /* unlocks the MVar in the SMP case */
1374       SET_INFO(mvar,&stg_FULL_MVAR_info);
1375       JMP_(ENTRY_CODE(Sp[0]));
1376   }
1377
1378   /* ToDo: yield afterward for better communication performance? */
1379   FE_
1380 }
1381
1382 FN_(tryPutMVarzh_fast)
1383 {
1384   StgMVar *mvar;
1385   const StgInfoTable *info;
1386
1387   FB_
1388   /* args: R1 = MVar, R2 = value */
1389
1390   mvar = (StgMVar *)R1.p;
1391
1392 #ifdef SMP
1393   info = LOCK_CLOSURE(mvar);
1394 #else
1395   info = GET_INFO(mvar);
1396 #endif
1397
1398   if (info == &stg_FULL_MVAR_info) {
1399
1400 #ifdef SMP
1401     /* unlock the MVar */
1402     mvar->header.info = &stg_FULL_MVAR_info;
1403 #endif
1404
1405     RET_N(0);
1406   }
1407   
1408   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1409       /* There are takeMVar(s) waiting: wake up the first one
1410        */
1411       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1412
1413       /* actually perform the takeMVar */
1414       PerformTake(mvar->head, R2.cl);
1415       
1416 #if defined(GRAN) || defined(PAR)
1417       /* ToDo: check 2nd arg (mvar) is right */
1418       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1419 #else
1420       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1421 #endif
1422       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1423           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1424       }
1425 #ifdef SMP
1426       /* unlocks the MVar in the SMP case */
1427       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1428 #endif
1429       JMP_(ENTRY_CODE(Sp[0]));
1430   } else {
1431       /* No further takes, the MVar is now full. */
1432       mvar->value = R2.cl;
1433       /* unlocks the MVar in the SMP case */
1434       SET_INFO(mvar,&stg_FULL_MVAR_info);
1435       JMP_(ENTRY_CODE(Sp[0]));
1436   }
1437
1438   /* ToDo: yield afterward for better communication performance? */
1439   FE_
1440 }
1441
1442 /* -----------------------------------------------------------------------------
1443    Stable pointer primitives
1444    -------------------------------------------------------------------------  */
1445
1446 FN_(makeStableNamezh_fast)
1447 {
1448   StgWord index;
1449   StgStableName *sn_obj;
1450   FB_
1451
1452   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
1453   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1454                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1455   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1456   
1457   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1458
1459   /* Is there already a StableName for this heap object? */
1460   if (stable_ptr_table[index].sn_obj == NULL) {
1461     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1462     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1463     sn_obj->sn = index;
1464     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1465   } else {
1466     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1467   }
1468
1469   TICK_RET_UNBOXED_TUP(1);
1470   RET_P(sn_obj);
1471 }
1472
1473
1474 FN_(makeStablePtrzh_fast)
1475 {
1476   /* Args: R1 = a */
1477   StgStablePtr sp;
1478   FB_
1479   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1480   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1481   RET_N(sp);
1482   FE_
1483 }
1484
1485 FN_(deRefStablePtrzh_fast)
1486 {
1487   /* Args: R1 = the stable ptr */
1488   P_ r;
1489   StgStablePtr sp;
1490   FB_
1491   sp = (StgStablePtr)R1.w;
1492   r = stable_ptr_table[(StgWord)sp].addr;
1493   RET_P(r);
1494   FE_
1495 }
1496
1497 /* -----------------------------------------------------------------------------
1498    Bytecode object primitives
1499    -------------------------------------------------------------------------  */
1500
1501 FN_(newBCOzh_fast)
1502 {
1503   /* R1.p = instrs
1504      R2.p = literals
1505      R3.p = ptrs
1506      R4.p = itbls
1507   */
1508   StgBCO *bco;
1509   FB_
1510
1511   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
1512   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1513   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1514   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1515   SET_HDR(bco, &stg_BCO_info, CCCS);
1516
1517   bco->instrs     = (StgArrWords*)R1.cl;
1518   bco->literals   = (StgArrWords*)R2.cl;
1519   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1520   bco->itbls      = (StgArrWords*)R4.cl;
1521
1522   TICK_RET_UNBOXED_TUP(1);
1523   RET_P(bco);
1524   FE_
1525 }
1526
1527 FN_(mkApUpd0zh_fast)
1528 {
1529   /* R1.p = the fn for the AP_UPD
1530   */
1531   StgAP_UPD* ap;
1532   FB_
1533   HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
1534   TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
1535   CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
1536   ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
1537   SET_HDR(ap, &stg_AP_UPD_info, CCCS);
1538
1539   ap->n_args = 0;
1540   ap->fun = R1.cl;
1541
1542   TICK_RET_UNBOXED_TUP(1);
1543   RET_P(ap);
1544   FE_
1545 }
1546
1547 /* -----------------------------------------------------------------------------
1548    Thread I/O blocking primitives
1549    -------------------------------------------------------------------------- */
1550
1551 FN_(waitReadzh_fast)
1552 {
1553   FB_
1554     /* args: R1.i */
1555     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1556     CurrentTSO->why_blocked = BlockedOnRead;
1557     CurrentTSO->block_info.fd = R1.i;
1558     ACQUIRE_LOCK(&sched_mutex);
1559     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1560     RELEASE_LOCK(&sched_mutex);
1561     JMP_(stg_block_noregs);
1562   FE_
1563 }
1564
1565 FN_(waitWritezh_fast)
1566 {
1567   FB_
1568     /* args: R1.i */
1569     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1570     CurrentTSO->why_blocked = BlockedOnWrite;
1571     CurrentTSO->block_info.fd = R1.i;
1572     ACQUIRE_LOCK(&sched_mutex);
1573     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1574     RELEASE_LOCK(&sched_mutex);
1575     JMP_(stg_block_noregs);
1576   FE_
1577 }
1578
1579 FN_(delayzh_fast)
1580 {
1581   StgTSO *t, *prev;
1582   nat target;
1583   FB_
1584     /* args: R1.i */
1585     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1586     CurrentTSO->why_blocked = BlockedOnDelay;
1587
1588     ACQUIRE_LOCK(&sched_mutex);
1589
1590     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1591     CurrentTSO->block_info.target = target;
1592
1593     /* Insert the new thread in the sleeping queue. */
1594     prev = NULL;
1595     t = sleeping_queue;
1596     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1597         prev = t;
1598         t = t->link;
1599     }
1600
1601     CurrentTSO->link = t;
1602     if (prev == NULL) {
1603         sleeping_queue = CurrentTSO;
1604     } else {
1605         prev->link = CurrentTSO;
1606     }
1607
1608     RELEASE_LOCK(&sched_mutex);
1609     JMP_(stg_block_noregs);
1610   FE_
1611 }
1612