[project @ 2000-02-28 13:59:43 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.43 2000/02/28 13:59:43 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-1999
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "StgStartup.h"
14 #include "SchedAPI.h"
15 #include "Schedule.h"
16 #include "RtsUtils.h"
17 #include "Storage.h"
18 #include "BlockAlloc.h" /* tmp */
19 #include "StablePriv.h"
20 #include "HeapStackCheck.h"
21 #include "StgRun.h"
22
23 /* ** temporary **
24
25    classes CCallable and CReturnable don't really exist, but the
26    compiler insists on generating dictionaries containing references
27    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
28    for these.
29 */
30
31 W_ GHC_ZCCCallable_static_info[0];
32 W_ GHC_ZCCReturnable_static_info[0];
33
34
35 /* -----------------------------------------------------------------------------
36    Macros for Hand-written primitives.
37    -------------------------------------------------------------------------- */
38
39 /*
40  * Horrible macros for returning unboxed tuples.
41  *
42  * How an unboxed tuple is returned depends on two factors:
43  *    - the number of real registers we have available
44  *    - the boxedness of the returned fields.
45  *
46  * To return an unboxed tuple from a primitive operation, we have macros
47  * RET_<layout> where <layout> describes the boxedness of each field of the
48  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
49  *
50  * We only define the cases actually used, to avoid having too much
51  * garbage in this section.  Warning: any bugs in here will be hard to
52  * track down.
53  */
54
55 /*------ All Regs available */
56 #if defined(REG_R8)
57 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
58 # define RET_N(a)     RET_P(a)
59
60 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
61 # define RET_NN(a,b)  RET_PP(a,b)
62 # define RET_NP(a,b)  RET_PP(a,b)
63
64 # define RET_PPP(a,b,c) \
65         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
66 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
67
68 # define RET_NNNP(a,b,c,d) \
69         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
70         JMP_(ENTRY_CODE(Sp[0]));
71
72 # define RET_NPNP(a,b,c,d) \
73         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
74         JMP_(ENTRY_CODE(Sp[0]));
75
76 # define RET_NNPNNP(a,b,c,d,e,f) \
77         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
78         R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
79         JMP_(ENTRY_CODE(Sp[0]));
80
81 #elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
82       defined(REG_R4) || defined(REG_R3)
83 # error RET_n macros not defined for this setup.
84
85 /*------ 2 Registers available */
86 #elif defined(REG_R2)
87
88 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
89 # define RET_N(a)     RET_P(a)
90
91 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
92                        JMP_(ENTRY_CODE(Sp[0]));
93 # define RET_NN(a,b)   RET_PP(a,b)
94 # define RET_NP(a,b)   RET_PP(a,b)
95
96 # define RET_PPP(a,b,c) \
97         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
98         JMP_(ENTRY_CODE(Sp[1]));
99 # define RET_NNP(a,b,c) \
100         R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
101         JMP_(ENTRY_CODE(Sp[1]));
102
103 # define RET_NNNP(a,b,c,d)                      \
104         R1.w = (W_)(a);                         \
105         R2.w = (W_)(b);                         \
106     /*  Sp[-3] = ARGTAG(1); */                  \
107         Sp[-2] = (W_)(c);                       \
108         Sp[-1] = (W_)(d);                       \
109         Sp -= 3;                                \
110         JMP_(ENTRY_CODE(Sp[3]));
111
112 # define RET_NPNP(a,b,c,d)                      \
113         R1.w = (W_)(a);                         \
114         R2.w = (W_)(b);                         \
115     /*  Sp[-3] = ARGTAG(1); */                  \
116         Sp[-2] = (W_)(c);                       \
117         Sp[-1] = (W_)(d);                       \
118         Sp -= 3;                                \
119         JMP_(ENTRY_CODE(Sp[3]));
120
121 # define RET_NNPNNP(a,b,c,d,e,f)                \
122         R1.w = (W_)(a);                         \
123         R2.w = (W_)(b);                         \
124         Sp[-6] = (W_)(c);                       \
125         /* Sp[-5] = ARGTAG(1); */               \
126         Sp[-4] = (W_)(d);                       \
127         /* Sp[-3] = ARGTAG(1); */               \
128         Sp[-2] = (W_)(e);                       \
129         Sp[-1] = (W_)(f);                       \
130         Sp -= 6;                                \
131         JMP_(ENTRY_CODE(Sp[6]));
132
133 /*------ 1 Register available */
134 #elif defined(REG_R1)
135 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
136 # define RET_N(a)     RET_P(a)
137
138 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
139                        JMP_(ENTRY_CODE(Sp[1]));
140 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
141                        JMP_(ENTRY_CODE(Sp[2]));
142 # define RET_NP(a,b)   RET_PP(a,b)
143
144 # define RET_PPP(a,b,c) \
145         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
146         JMP_(ENTRY_CODE(Sp[2]));
147 # define RET_NNP(a,b,c) \
148         R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
149         JMP_(ENTRY_CODE(Sp[3]));
150
151 # define RET_NNNP(a,b,c,d)                      \
152         R1.w = (W_)(a);                         \
153     /*  Sp[-5] = ARGTAG(1); */                  \
154         Sp[-4] = (W_)(b);                       \
155     /*  Sp[-3] = ARGTAG(1); */                  \
156         Sp[-2] = (W_)(c);                       \
157         Sp[-1] = (W_)(d);                       \
158         Sp -= 5;                                \
159         JMP_(ENTRY_CODE(Sp[5]));
160
161 # define RET_NPNP(a,b,c,d)                      \
162         R1.w = (W_)(a);                         \
163         Sp[-4] = (W_)(b);                       \
164     /*  Sp[-3] = ARGTAG(1); */                  \
165         Sp[-2] = (W_)(c);                       \
166         Sp[-1] = (W_)(d);                       \
167         Sp -= 4;                                \
168         JMP_(ENTRY_CODE(Sp[4]));
169
170 # define RET_NNPNNP(a,b,c,d,e,f)                \
171         R1.w = (W_)(a);                         \
172         Sp[-1] = (W_)(f);                       \
173         Sp[-2] = (W_)(e);                       \
174         /* Sp[-3] = ARGTAG(1); */               \
175         Sp[-4] = (W_)(d);                       \
176         /* Sp[-5] = ARGTAG(1); */               \
177         Sp[-6] = (W_)(c);                       \
178         Sp[-7] = (W_)(b);                       \
179         /* Sp[-8] = ARGTAG(1); */               \
180         Sp -= 8;                                \
181         JMP_(ENTRY_CODE(Sp[8]));
182
183 #else /* 0 Regs available */
184
185 #define PUSH_P(o,x) Sp[-o] = (W_)(x)
186
187 #ifdef DEBUG
188 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
189 #else
190 #define PUSH_N(o,x) Sp[1-o] = (W_)(x);
191 #endif
192
193 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
194
195 /* Here's how to construct these macros:
196  *
197  *   N = number of N's in the name;
198  *   P = number of P's in the name;
199  *   s = N * 2 + P;
200  *   while (nonNull(name)) {
201  *     if (nextChar == 'P') {
202  *       PUSH_P(s,_);
203  *       s -= 1;
204  *     } else {
205  *       PUSH_N(s,_);
206  *       s -= 2
207  *     }
208  *   }
209  *   PUSHED(N * 2 + P);
210  */
211
212 # define RET_P(a)     PUSH_P(1,a); PUSHED(1)
213 # define RET_N(a)     PUSH_N(2,a); PUSHED(2)
214
215 # define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
216 # define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
217 # define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
218
219 # define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
220 # define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
221
222 # define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)        
223 # define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)        
224 # define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
225
226 #endif
227
228 /*-----------------------------------------------------------------------------
229   Array Primitives
230
231   Basically just new*Array - the others are all inline macros.
232
233   The size arg is always passed in R1, and the result returned in R1.
234
235   The slow entry point is for returning from a heap check, the saved
236   size argument must be re-loaded from the stack.
237   -------------------------------------------------------------------------- */
238
239 /* for objects that are *less* than the size of a word, make sure we
240  * round up to the nearest word for the size of the array.
241  */
242
243 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
244
245 #define newByteArray(ty,scale)                          \
246  FN_(new##ty##Arrayzh_fast)                             \
247  {                                                      \
248    W_ stuff_size, size, n;                              \
249    StgArrWords* p;                                      \
250    FB_                                                  \
251      MAYBE_GC(NO_PTRS,new##ty##Arrayzh_fast);           \
252      n = R1.w;                                          \
253      stuff_size = BYTES_TO_STGWORDS(n*scale);           \
254      size = sizeofW(StgArrWords)+ stuff_size;           \
255      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); \
256      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); \
257      SET_HDR(p, &ARR_WORDS_info, CCCS);         \
258      p->words = stuff_size;                             \
259      TICK_RET_UNBOXED_TUP(1)                            \
260      RET_P(p);                                          \
261    FE_                                                  \
262  }
263
264 newByteArray(Char,   sizeof(C_))
265 newByteArray(Int,    sizeof(I_));
266 newByteArray(Word,   sizeof(W_));
267 newByteArray(Addr,   sizeof(P_));
268 newByteArray(Float,  sizeof(StgFloat));
269 newByteArray(Double, sizeof(StgDouble));
270 newByteArray(StablePtr, sizeof(StgStablePtr));
271
272 FN_(newArrayzh_fast)
273 {
274   W_ size, n, init;
275   StgMutArrPtrs* arr;
276   StgPtr p;
277   FB_
278     n = R1.w;
279
280     MAYBE_GC(R2_PTR,newArrayzh_fast);
281
282     size = sizeofW(StgMutArrPtrs) + n;
283     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
284     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
285
286     SET_HDR(arr,&MUT_ARR_PTRS_info,CCCS);
287     arr->ptrs = n;
288
289     init = R2.w;
290     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
291          p < (P_)arr + size; p++) {
292         *p = (W_)init;
293     }
294
295     TICK_RET_UNBOXED_TUP(1);
296     RET_P(arr);
297   FE_
298 }
299
300 FN_(newMutVarzh_fast)
301 {
302   StgMutVar* mv;
303   /* Args: R1.p = initialisation value */
304   FB_
305
306   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
307   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
308   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
309
310   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
311   SET_HDR(mv,&MUT_VAR_info,CCCS);
312   mv->var = R1.cl;
313
314   TICK_RET_UNBOXED_TUP(1);
315   RET_P(mv);
316   FE_
317 }
318
319 /* -----------------------------------------------------------------------------
320    Foreign Object Primitives
321
322    -------------------------------------------------------------------------- */
323
324 #ifndef PAR
325 FN_(makeForeignObjzh_fast)
326 {
327   /* R1.p = ptr to foreign object,
328   */
329   StgForeignObj *result;
330   FB_
331
332   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, makeForeignObjzh_fast,);
333   TICK_ALLOC_PRIM(sizeofW(StgHeader),
334                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
335   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
336
337   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
338   SET_HDR(result,&FOREIGN_info,CCCS);
339   result->data = R1.p;
340
341   /* returns (# s#, ForeignObj# #) */
342   TICK_RET_UNBOXED_TUP(1);
343   RET_P(result);
344   FE_
345 }
346 #endif
347
348 /* These two are out-of-line for the benefit of the NCG */
349 FN_(unsafeThawArrayzh_fast)
350 {
351   FB_
352   SET_INFO((StgClosure *)R1.cl,&MUT_ARR_PTRS_info);
353   recordMutable((StgMutClosure*)R1.cl);
354
355   TICK_RET_UNBOXED_TUP(1);
356   RET_P(R1.p);
357   FE_
358 }
359
360 /* -----------------------------------------------------------------------------
361    Weak Pointer Primitives
362    -------------------------------------------------------------------------- */
363
364 #ifndef PAR
365
366 FN_(mkWeakzh_fast)
367 {
368   /* R1.p = key
369      R2.p = value
370      R3.p = finalizer (or NULL)
371   */
372   StgWeak *w;
373   FB_
374
375   if (R3.cl == NULL) {
376     R3.cl = &NO_FINALIZER_closure;
377   }
378
379   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
380   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
381                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
382   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
383
384   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
385   SET_HDR(w, &WEAK_info, CCCS);
386
387   w->key        = R1.cl;
388   w->value      = R2.cl;
389   w->finalizer  = R3.cl;
390
391   w->link       = weak_ptr_list;
392   weak_ptr_list = w;
393   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
394
395   TICK_RET_UNBOXED_TUP(1);
396   RET_P(w);
397   FE_
398 }
399
400 FN_(finalizzeWeakzh_fast)
401 {
402   /* R1.p = weak ptr
403    */
404   StgDeadWeak *w;
405   StgClosure *f;
406   FB_
407   TICK_RET_UNBOXED_TUP(0);
408   w = (StgDeadWeak *)R1.p;
409
410   /* already dead? */
411   if (w->header.info == &DEAD_WEAK_info) {
412       RET_NP(0,&NO_FINALIZER_closure);
413   }
414
415   /* kill it */
416   w->header.info = &DEAD_WEAK_info;
417   f = ((StgWeak *)w)->finalizer;
418   w->link = ((StgWeak *)w)->link;
419
420   /* return the finalizer */
421   if (f == &NO_FINALIZER_closure) {
422       RET_NP(0,&NO_FINALIZER_closure);
423   } else {
424       RET_NP(1,f);
425   }
426   FE_
427 }
428
429 #endif /* !PAR */
430
431 /* -----------------------------------------------------------------------------
432    Arbitrary-precision Integer operations.
433    -------------------------------------------------------------------------- */
434
435 FN_(int2Integerzh_fast)
436 {
437    /* arguments: R1 = Int# */
438
439    I_ val, s;           /* to avoid aliasing */
440    StgArrWords* p;      /* address of array result */
441    FB_
442
443    val = R1.i;
444    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
445    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
446    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
447
448    p = (StgArrWords *)Hp - 1;
449    SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, 1);
450
451    /* mpz_set_si is inlined here, makes things simpler */
452    if (val < 0) { 
453         s  = -1;
454         *Hp = -val;
455    } else if (val > 0) {
456         s = 1;
457         *Hp = val;
458    } else {
459         s = 0;
460    }
461
462    /* returns (# size  :: Int#, 
463                  data  :: ByteArray# 
464                #)
465    */
466    TICK_RET_UNBOXED_TUP(2);
467    RET_NP(s,p);
468    FE_
469 }
470
471 FN_(word2Integerzh_fast)
472 {
473    /* arguments: R1 = Word# */
474
475    W_ val;              /* to avoid aliasing */
476    I_  s;
477    StgArrWords* p;      /* address of array result */
478    FB_
479
480    val = R1.w;
481    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
482    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
483    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
484
485    p = (StgArrWords *)Hp - 1;
486    SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, 1);
487
488    if (val != 0) {
489         s = 1;
490         *Hp = val;
491    } else {
492         s = 0;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# 
497                #)
498    */
499    TICK_RET_UNBOXED_TUP(2);
500    RET_NP(s,p);
501    FE_
502 }
503
504 FN_(addr2Integerzh_fast)
505 {
506   MP_INT result;
507   char *str;
508   FB_
509
510   MAYBE_GC(NO_PTRS,addr2Integerzh_fast);
511
512   /* args:   R1 :: Addr# */
513   str = R1.a;
514
515   /* Perform the operation */
516   if (RET_STGCALL3(int, mpz_init_set_str,&result,(str),/*base*/10))
517       abort();
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# 
521                #)
522    */
523   TICK_RET_UNBOXED_TUP(2);
524   RET_NP(result._mp_size, 
525           result._mp_d - sizeofW(StgArrWords));
526   FE_
527 }
528
529 /*
530  * 'long long' primops for converting to/from Integers.
531  */
532
533 #ifdef SUPPORT_LONG_LONGS
534
535 FN_(int64ToIntegerzh_fast)
536 {
537    /* arguments: L1 = Int64# */
538
539    StgInt64  val; /* to avoid aliasing */
540    W_ hi;
541    I_  s, neg, words_needed;
542    StgArrWords* p;      /* address of array result */
543    FB_
544
545    val = (LI_)L1;
546    neg = 0;
547
548    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
549        words_needed = 2;
550    } else { 
551        /* minimum is one word */
552        words_needed = 1;
553    }
554    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
555    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
556    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
557
558    p = (StgArrWords *)(Hp-words_needed+1) - 1;
559    SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, words_needed);
560
561    if ( val < 0LL ) {
562      neg = 1;
563      val = -val;
564    } 
565
566    hi = (W_)((LW_)val / 0x100000000ULL);
567
568    if ( words_needed == 2 )  { 
569       s = 2; 
570       Hp[-1] = (W_)val;
571       Hp[0] = hi;
572    } else if ( val != 0 ) {
573       s = 1;
574       Hp[0] = (W_)val;
575    }  else /* val==0 */   {
576       s = 0;
577    }
578    s = ( neg ? -s : s );
579
580    /* returns (# size  :: Int#, 
581                  data  :: ByteArray# 
582                #)
583    */
584    TICK_RET_UNBOXED_TUP(2);
585    RET_NP(s,p);
586    FE_
587 }
588
589 FN_(word64ToIntegerzh_fast)
590 {
591    /* arguments: L1 = Word64# */
592
593    StgWord64 val; /* to avoid aliasing */
594    StgWord hi;
595    I_  s, words_needed;
596    StgArrWords* p;      /* address of array result */
597    FB_
598
599    val = (LW_)L1;
600    if ( val >= 0x100000000ULL ) {
601       words_needed = 2;
602    } else {
603       words_needed = 1;
604    }
605    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
606    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
607    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
608
609    p = (StgArrWords *)(Hp-words_needed+1) - 1;
610    SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, words_needed);
611
612    hi = (W_)((LW_)val / 0x100000000ULL);
613    if ( val >= 0x100000000ULL ) { 
614      s = 2;
615      Hp[-1] = ((W_)val);
616      Hp[0]  = (hi);
617    } else if ( val != 0 )      {
618       s = 1;
619       Hp[0] = ((W_)val);
620    } else /* val==0 */         {
621       s = 0;
622    }
623
624    /* returns (# size  :: Int#, 
625                  data  :: ByteArray# 
626                #)
627    */
628    TICK_RET_UNBOXED_TUP(2);
629    RET_NP(s,p);
630    FE_
631 }
632
633
634 #endif /* HAVE_LONG_LONG */
635
636 /* ToDo: this is shockingly inefficient */
637
638 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
639 FN_(name)                                                               \
640 {                                                                       \
641   MP_INT arg1, arg2, result;                                            \
642   I_ s1, s2;                                                            \
643   StgArrWords* d1;                                                      \
644   StgArrWords* d2;                                                      \
645   FB_                                                                   \
646                                                                         \
647   /* call doYouWantToGC() */                                            \
648   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
649                                                                         \
650   d1 = (StgArrWords *)R2.p;                                             \
651   s1 = R1.i;                                                            \
652   d2 = (StgArrWords *)R4.p;                                             \
653   s2 = R3.i;                                                            \
654                                                                         \
655   arg1._mp_alloc        = d1->words;                                    \
656   arg1._mp_size         = (s1);                                         \
657   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
658   arg2._mp_alloc        = d2->words;                                    \
659   arg2._mp_size         = (s2);                                         \
660   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
661                                                                         \
662   STGCALL1(mpz_init,&result);                                           \
663                                                                         \
664   /* Perform the operation */                                           \
665   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
666                                                                         \
667   TICK_RET_UNBOXED_TUP(2);                                              \
668   RET_NP(result._mp_size,                                               \
669          result._mp_d-sizeofW(StgArrWords));                            \
670   FE_                                                                   \
671 }
672
673 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
674 FN_(name)                                                               \
675 {                                                                       \
676   MP_INT arg1, arg2, result1, result2;                                  \
677   I_ s1, s2;                                                            \
678   StgArrWords* d1;                                                      \
679   StgArrWords* d2;                                                      \
680   FB_                                                                   \
681                                                                         \
682   /* call doYouWantToGC() */                                            \
683   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
684                                                                         \
685   d1 = (StgArrWords *)R2.p;                                             \
686   s1 = R1.i;                                                            \
687   d2 = (StgArrWords *)R4.p;                                             \
688   s2 = R3.i;                                                            \
689                                                                         \
690   arg1._mp_alloc        = d1->words;                                    \
691   arg1._mp_size         = (s1);                                         \
692   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
693   arg2._mp_alloc        = d2->words;                                    \
694   arg2._mp_size         = (s2);                                         \
695   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
696                                                                         \
697   STGCALL1(mpz_init,&result1);                                          \
698   STGCALL1(mpz_init,&result2);                                          \
699                                                                         \
700   /* Perform the operation */                                           \
701   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
702                                                                         \
703   TICK_RET_UNBOXED_TUP(4);                                              \
704   RET_NPNP(result1._mp_size,                                            \
705            result1._mp_d-sizeofW(StgArrWords),                          \
706            result2._mp_size,                                            \
707            result2._mp_d-sizeofW(StgArrWords));                         \
708   FE_                                                                   \
709 }
710
711 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
712 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
713 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
714 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
715 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
716 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
717 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
718
719 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
720 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
721
722 #ifndef FLOATS_AS_DOUBLES
723 FN_(decodeFloatzh_fast)
724
725   MP_INT mantissa;
726   I_ exponent;
727   StgArrWords* p;
728   StgFloat arg;
729   FB_
730
731   /* arguments: F1 = Float# */
732   arg = F1;
733
734   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
735   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
736   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
737
738   /* Be prepared to tell Lennart-coded __decodeFloat    */
739   /* where mantissa._mp_d can be put (it does not care about the rest) */
740   p = (StgArrWords *)Hp - 1;
741   SET_ARR_HDR(p,&ARR_WORDS_info,CCCS,1)
742   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
743
744   /* Perform the operation */
745   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
746
747   /* returns: (Int# (expn), Int#, ByteArray#) */
748   TICK_RET_UNBOXED_TUP(3);
749   RET_NNP(exponent,mantissa._mp_size,p);
750   FE_
751 }
752 #endif /* !FLOATS_AS_DOUBLES */
753
754 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
755 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
756
757 FN_(decodeDoublezh_fast)
758 { MP_INT mantissa;
759   I_ exponent;
760   StgDouble arg;
761   StgArrWords* p;
762   FB_
763
764   /* arguments: D1 = Double# */
765   arg = D1;
766
767   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
768   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
769   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
770
771   /* Be prepared to tell Lennart-coded __decodeDouble   */
772   /* where mantissa.d can be put (it does not care about the rest) */
773   p = (StgArrWords *)(Hp-ARR_SIZE+1);
774   SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
775   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
776
777   /* Perform the operation */
778   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
779
780   /* returns: (Int# (expn), Int#, ByteArray#) */
781   TICK_RET_UNBOXED_TUP(3);
782   RET_NNP(exponent,mantissa._mp_size,p);
783   FE_
784 }
785
786 /* -----------------------------------------------------------------------------
787  * Concurrency primitives
788  * -------------------------------------------------------------------------- */
789
790 FN_(forkzh_fast)
791 {
792   FB_
793   /* args: R1 = closure to spark */
794   
795   MAYBE_GC(R1_PTR, forkzh_fast);
796
797   /* create it right now, return ThreadID in R1 */
798   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
799                       RtsFlags.GcFlags.initialStkSize, R1.cl);
800   STGCALL1(scheduleThread, R1.t);
801       
802   /* switch at the earliest opportunity */ 
803   context_switch = 1;
804   
805   JMP_(ENTRY_CODE(Sp[0]));
806   FE_
807 }
808
809 FN_(yieldzh_fast)
810 {
811   FB_
812   JMP_(stg_yield_noregs);
813   FE_
814 }
815
816 FN_(newMVarzh_fast)
817 {
818   StgMVar *mvar;
819
820   FB_
821   /* args: none */
822
823   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
824   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
825                   1, 0);
826   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
827   
828   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
829   SET_HDR(mvar,&EMPTY_MVAR_info,CCCS);
830   mvar->head = mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure;
831   mvar->value = (StgClosure *)&END_TSO_QUEUE_closure;
832
833   TICK_RET_UNBOXED_TUP(1);
834   RET_P(mvar);
835   FE_
836 }
837
838 FN_(takeMVarzh_fast)
839 {
840   StgMVar *mvar;
841   StgClosure *val;
842   const StgInfoTable *info;
843
844   FB_
845   /* args: R1 = MVar closure */
846
847   mvar = (StgMVar *)R1.p;
848
849 #ifdef SMP
850   info = LOCK_CLOSURE(mvar);
851 #else
852   info = GET_INFO(mvar);
853 #endif
854
855   /* If the MVar is empty, put ourselves on its blocking queue,
856    * and wait until we're woken up.
857    */
858   if (info == &EMPTY_MVAR_info) {
859     if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) {
860       mvar->head = CurrentTSO;
861     } else {
862       mvar->tail->link = CurrentTSO;
863     }
864     CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure;
865     CurrentTSO->why_blocked = BlockedOnMVar;
866     CurrentTSO->block_info.closure = (StgClosure *)mvar;
867     mvar->tail = CurrentTSO;
868
869 #ifdef SMP
870     /* unlock the MVar */
871     mvar->header.info = &EMPTY_MVAR_info;
872 #endif
873     BLOCK(R1_PTR, takeMVarzh_fast);
874   }
875
876   val = mvar->value;
877   mvar->value = (StgClosure *)&END_TSO_QUEUE_closure;
878
879   /* do this last... we might have locked the MVar in the SMP case,
880    * and writing the info pointer will unlock it.
881    */
882   SET_INFO(mvar,&EMPTY_MVAR_info);
883
884   TICK_RET_UNBOXED_TUP(1);
885   RET_P(val);
886   FE_
887 }
888
889 FN_(putMVarzh_fast)
890 {
891   StgMVar *mvar;
892   const StgInfoTable *info;
893
894   FB_
895   /* args: R1 = MVar, R2 = value */
896
897   mvar = (StgMVar *)R1.p;
898
899 #ifdef SMP
900   info = LOCK_CLOSURE(mvar);
901 #else
902   info = GET_INFO(mvar);
903 #endif
904
905   if (info == &FULL_MVAR_info) {
906     barf("putMVar#: MVar already full");
907   }
908   
909   mvar->value = R2.cl;
910
911   /* wake up the first thread on the queue, it will continue with the
912    * takeMVar operation and mark the MVar empty again.
913    */
914   if (mvar->head != (StgTSO *)&END_TSO_QUEUE_closure) {
915     ASSERT(mvar->head->why_blocked == BlockedOnMVar);
916 #if defined(GRAN)
917     mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
918 #elif defined(PAR)
919     // ToDo: check 2nd arg (mvar) is right
920     mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
921 #else
922     mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
923 #endif
924     if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) {
925       mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure;
926     }
927   }
928
929   /* unlocks the MVar in the SMP case */
930   SET_INFO(mvar,&FULL_MVAR_info);
931
932   /* ToDo: yield here for better communication performance? */
933   JMP_(ENTRY_CODE(Sp[0]));
934   FE_
935 }
936
937 /* -----------------------------------------------------------------------------
938    Stable pointer primitives
939    -------------------------------------------------------------------------  */
940
941 FN_(makeStableNamezh_fast)
942 {
943   StgWord index;
944   StgStableName *sn_obj;
945   FB_
946
947   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
948   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
949                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
950   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
951   
952   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
953
954   /* Is there already a StableName for this heap object? */
955   if (stable_ptr_table[index].sn_obj == NULL) {
956     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
957     sn_obj->header.info = &STABLE_NAME_info;
958     sn_obj->sn = index;
959     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
960   } else {
961     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
962   }
963
964   TICK_RET_UNBOXED_TUP(1);
965   RET_P(sn_obj);
966 }
967
968 /* -----------------------------------------------------------------------------
969    Thread I/O blocking primitives
970    -------------------------------------------------------------------------- */
971
972 FN_(waitReadzh_fast)
973 {
974   FB_
975     /* args: R1.i */
976     ASSERT(CurrentTSO->why_blocked == NotBlocked);
977     CurrentTSO->why_blocked = BlockedOnRead;
978     CurrentTSO->block_info.fd = R1.i;
979     ACQUIRE_LOCK(&sched_mutex);
980     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
981     RELEASE_LOCK(&sched_mutex);
982     JMP_(stg_block_noregs);
983   FE_
984 }
985
986 FN_(waitWritezh_fast)
987 {
988   FB_
989     /* args: R1.i */
990     ASSERT(CurrentTSO->why_blocked == NotBlocked);
991     CurrentTSO->why_blocked = BlockedOnWrite;
992     CurrentTSO->block_info.fd = R1.i;
993     ACQUIRE_LOCK(&sched_mutex);
994     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
995     RELEASE_LOCK(&sched_mutex);
996     JMP_(stg_block_noregs);
997   FE_
998 }
999
1000 FN_(delayzh_fast)
1001 {
1002   FB_
1003     /* args: R1.i */
1004     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1005     CurrentTSO->why_blocked = BlockedOnDelay;
1006
1007     ACQUIRE_LOCK(&sched_mutex);
1008
1009     /* Add on ticks_since_select, since these will be subtracted at
1010      * the next awaitEvent call.
1011      */
1012     CurrentTSO->block_info.delay = R1.i + ticks_since_select;
1013
1014     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1015
1016     RELEASE_LOCK(&sched_mutex);
1017     JMP_(stg_block_noregs);
1018   FE_
1019 }
1020
1021