[project @ 2002-12-11 15:36:20 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.103 2002/12/11 15:36:45 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2002
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Itimer.h"
23 #include "Prelude.h"
24
25 #ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h>
27 #endif
28
29 #include <stdlib.h>
30
31 /* ** temporary **
32
33    classes CCallable and CReturnable don't really exist, but the
34    compiler insists on generating dictionaries containing references
35    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
36    for these.  Some C compilers can't cope with zero-length static arrays,
37    so we have to make these one element long.
38 */
39
40 StgWord GHC_ZCCCallable_static_info[1];
41 StgWord GHC_ZCCReturnable_static_info[1];
42   
43 /* -----------------------------------------------------------------------------
44    Macros for Hand-written primitives.
45    -------------------------------------------------------------------------- */
46
47 /*
48  * Horrible macros for returning unboxed tuples.
49  *
50  * How an unboxed tuple is returned depends on two factors:
51  *    - the number of real registers we have available
52  *    - the boxedness of the returned fields.
53  *
54  * To return an unboxed tuple from a primitive operation, we have macros
55  * RET_<layout> where <layout> describes the boxedness of each field of the
56  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
57  *
58  * We only define the cases actually used, to avoid having too much
59  * garbage in this section.  Warning: any bugs in here will be hard to
60  * track down.
61  *
62  * The return convention for an unboxed tuple is as follows:
63  *   - fit as many fields as possible in registers (as per the
64  *     function fast-entry point calling convention).
65  *   - sort the rest of the fields into pointers and non-pointers.
66  *     push the pointers on the stack, followed by the non-pointers.
67  *     (so the pointers have higher addresses).
68  */
69
70 /*------ All Regs available */
71 #if MAX_REAL_VANILLA_REG == 8
72 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
73 # define RET_N(a)     RET_P(a)
74
75 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
76 # define RET_NN(a,b)  RET_PP(a,b)
77 # define RET_NP(a,b)  RET_PP(a,b)
78
79 # define RET_PPP(a,b,c) \
80         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
82
83 # define RET_NNNP(a,b,c,d) \
84         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
85         JMP_(ENTRY_CODE(Sp[0]));
86
87 # define RET_NPNP(a,b,c,d) \
88         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
89         JMP_(ENTRY_CODE(Sp[0]));
90
91 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
92 # error RET_n macros not defined for this setup.
93
94 /*------ 2 Registers available */
95 #elif MAX_REAL_VANILLA_REG == 2
96
97 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
98 # define RET_N(a)     RET_P(a)
99
100 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
101                        JMP_(ENTRY_CODE(Sp[0]));
102 # define RET_NN(a,b)   RET_PP(a,b)
103 # define RET_NP(a,b)   RET_PP(a,b)
104
105 # define RET_PPP(a,b,c)                         \
106         R1.w = (W_)(a);                         \
107         R2.w = (W_)(b);                         \
108         Sp[-1] = (W_)(c);                       \
109         Sp -= 1;                                \
110         JMP_(ENTRY_CODE(Sp[1]));
111
112 # define RET_NNP(a,b,c)                         \
113         R1.w = (W_)(a);                         \
114         R2.w = (W_)(b);                         \
115         Sp[-1] = (W_)(c);                       \
116         Sp -= 1;                                \
117         JMP_(ENTRY_CODE(Sp[1]));
118
119 # define RET_NNNP(a,b,c,d)                      \
120         R1.w = (W_)(a);                         \
121         R2.w = (W_)(b);                         \
122         Sp[-2] = (W_)(c);                       \
123         Sp[-1] = (W_)(d);                       \
124         Sp -= 2;                                \
125         JMP_(ENTRY_CODE(Sp[2]));
126
127 # define RET_NPNP(a,b,c,d)                      \
128         R1.w = (W_)(a);                         \
129         R2.w = (W_)(b);                         \
130         Sp[-2] = (W_)(c);                       \
131         Sp[-1] = (W_)(d);                       \
132         Sp -= 2;                                \
133         JMP_(ENTRY_CODE(Sp[2]));
134
135 /*------ 1 Register available */
136 #elif MAX_REAL_VANILLA_REG == 1
137 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a)     RET_P(a)
139
140 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141                        JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143                        JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b)   RET_PP(a,b)
145
146 # define RET_PPP(a,b,c)                         \
147         R1.w = (W_)(a);                         \
148         Sp[-2] = (W_)(b);                       \
149         Sp[-1] = (W_)(c);                       \
150         Sp -= 2;                                \
151         JMP_(ENTRY_CODE(Sp[2]));
152
153 # define RET_NNP(a,b,c)                         \
154         R1.w = (W_)(a);                         \
155         Sp[-2] = (W_)(b);                       \
156         Sp[-1] = (W_)(c);                       \
157         Sp -= 2;                                \
158         JMP_(ENTRY_CODE(Sp[2]));
159
160 # define RET_NNNP(a,b,c,d)                      \
161         R1.w = (W_)(a);                         \
162         Sp[-3] = (W_)(b);                       \
163         Sp[-2] = (W_)(c);                       \
164         Sp[-1] = (W_)(d);                       \
165         Sp -= 3;                                \
166         JMP_(ENTRY_CODE(Sp[3]));
167
168 # define RET_NPNP(a,b,c,d)                      \
169         R1.w = (W_)(a);                         \
170         Sp[-3] = (W_)(c);                       \
171         Sp[-2] = (W_)(b);                       \
172         Sp[-1] = (W_)(d);                       \
173         Sp -= 3;                                \
174         JMP_(ENTRY_CODE(Sp[3]));
175
176 #else /* 0 Regs available */
177
178 #define PUSH(o,x) Sp[-o] = (W_)(x)
179
180 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
181
182 # define RET_P(a)     PUSH(1,a); PUSHED(1)
183 # define RET_N(a)     PUSH(1,a); PUSHED(2)
184
185 # define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
186 # define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
187 # define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
188
189 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
190 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
191
192 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)        
193 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)        
194 #endif
195
196 /*-----------------------------------------------------------------------------
197   Array Primitives
198
199   Basically just new*Array - the others are all inline macros.
200
201   The size arg is always passed in R1, and the result returned in R1.
202
203   The slow entry point is for returning from a heap check, the saved
204   size argument must be re-loaded from the stack.
205   -------------------------------------------------------------------------- */
206
207 /* for objects that are *less* than the size of a word, make sure we
208  * round up to the nearest word for the size of the array.
209  */
210
211 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
212
213 FN_(newByteArrayzh_fast)
214  {
215    W_ size, stuff_size, n;
216    StgArrWords* p;
217    FB_
218      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
219      n = R1.w;
220      stuff_size = BYTES_TO_STGWORDS(n);
221      size = sizeofW(StgArrWords)+ stuff_size;
222      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
223      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
224      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
225      p->words = stuff_size;
226      TICK_RET_UNBOXED_TUP(1)
227      RET_P(p);
228    FE_
229  }
230
231 FN_(newPinnedByteArrayzh_fast)
232  {
233    W_ size, stuff_size, n;
234    StgArrWords* p;
235    FB_
236      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
237      n = R1.w;
238      stuff_size = BYTES_TO_STGWORDS(n);
239
240      // We want an 8-byte aligned array.  allocatePinned() gives us
241      // 8-byte aligned memory by default, but we want to align the
242      // *goods* inside the ArrWords object, so we have to check the
243      // size of the ArrWords header and adjust our size accordingly.
244      size = sizeofW(StgArrWords)+ stuff_size;
245      if ((sizeof(StgArrWords) & 7) != 0) {
246          size++;
247      }
248
249      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
250      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
251
252      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
253      // have to push the object forward one word so that the goods
254      // fall on an 8-byte boundary.
255      if ((sizeof(StgArrWords) & 7) != 0) {
256          ((StgPtr)p)++;
257      }
258
259      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
260      p->words = stuff_size;
261      TICK_RET_UNBOXED_TUP(1)
262      RET_P(p);
263    FE_
264  }
265
266 FN_(newArrayzh_fast)
267 {
268   W_ size, n, init;
269   StgMutArrPtrs* arr;
270   StgPtr p;
271   FB_
272     n = R1.w;
273
274     MAYBE_GC(R2_PTR,newArrayzh_fast);
275
276     size = sizeofW(StgMutArrPtrs) + n;
277     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
278     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
279
280     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
281     arr->ptrs = n;
282
283     init = R2.w;
284     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
285          p < (P_)arr + size; p++) {
286         *p = (W_)init;
287     }
288
289     TICK_RET_UNBOXED_TUP(1);
290     RET_P(arr);
291   FE_
292 }
293
294 FN_(newMutVarzh_fast)
295 {
296   StgMutVar* mv;
297   /* Args: R1.p = initialisation value */
298   FB_
299
300   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
301   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
302   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
303
304   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
305   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
306   mv->var = R1.cl;
307
308   TICK_RET_UNBOXED_TUP(1);
309   RET_P(mv);
310   FE_
311 }
312
313 FN_(atomicModifyMutVarzh_fast)
314 {
315    StgMutVar* mv;
316    StgClosure *z, *x, *y, *r;
317    FB_
318    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
319
320    /* If x is the current contents of the MutVar#, then 
321       We want to make the new contents point to
322
323          (sel_0 (f x))
324  
325       and the return value is
326
327          (sel_1 (f x))
328
329       obviously we can share (f x).
330
331          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
332          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
333          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
334    */
335
336 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
337 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
338
339    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
340    CCS_ALLOC(CCCS,SIZE);
341
342    x = ((StgMutVar *)R1.cl)->var;
343
344    TICK_ALLOC_UP_THK(2,0); // XXX
345    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
346    SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
347    z->payload[0] = R2.cl;
348    z->payload[1] = x;
349
350    TICK_ALLOC_UP_THK(1,1); // XXX
351    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
352    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
353    y->payload[0] = z;
354
355    ((StgMutVar *)R1.cl)->var = y;
356
357    TICK_ALLOC_UP_THK(1,1); // XXX
358    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
359    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
360    r->payload[0] = z;
361
362    RET_P(r);
363    JMP_(ENTRY_CODE(Sp[0]));
364    FE_
365 }
366
367 /* -----------------------------------------------------------------------------
368    Foreign Object Primitives
369    -------------------------------------------------------------------------- */
370
371 FN_(mkForeignObjzh_fast)
372 {
373   /* R1.p = ptr to foreign object,
374   */
375   StgForeignObj *result;
376   FB_
377
378   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
379   TICK_ALLOC_PRIM(sizeofW(StgHeader),
380                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
381   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
382
383   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
384   SET_HDR(result,&stg_FOREIGN_info,CCCS);
385   result->data = R1.p;
386
387   /* returns (# s#, ForeignObj# #) */
388   TICK_RET_UNBOXED_TUP(1);
389   RET_P(result);
390   FE_
391 }
392
393 /* These two are out-of-line for the benefit of the NCG */
394 FN_(unsafeThawArrayzh_fast)
395 {
396   FB_
397   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
398   recordMutable((StgMutClosure*)R1.cl);
399
400   TICK_RET_UNBOXED_TUP(1);
401   RET_P(R1.p);
402   FE_
403 }
404
405 /* -----------------------------------------------------------------------------
406    Weak Pointer Primitives
407    -------------------------------------------------------------------------- */
408
409 FN_(mkWeakzh_fast)
410 {
411   /* R1.p = key
412      R2.p = value
413      R3.p = finalizer (or NULL)
414   */
415   StgWeak *w;
416   FB_
417
418   if (R3.cl == NULL) {
419     R3.cl = &stg_NO_FINALIZER_closure;
420   }
421
422   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
423   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
424                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
425   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
426
427   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
428   SET_HDR(w, &stg_WEAK_info, CCCS);
429
430   w->key        = R1.cl;
431   w->value      = R2.cl;
432   w->finalizer  = R3.cl;
433
434   w->link       = weak_ptr_list;
435   weak_ptr_list = w;
436   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
437
438   TICK_RET_UNBOXED_TUP(1);
439   RET_P(w);
440   FE_
441 }
442
443 FN_(finalizzeWeakzh_fast)
444 {
445   /* R1.p = weak ptr
446    */
447   StgDeadWeak *w;
448   StgClosure *f;
449   FB_
450   TICK_RET_UNBOXED_TUP(0);
451   w = (StgDeadWeak *)R1.p;
452
453   /* already dead? */
454   if (w->header.info == &stg_DEAD_WEAK_info) {
455       RET_NP(0,&stg_NO_FINALIZER_closure);
456   }
457
458   /* kill it */
459 #ifdef PROFILING
460   // @LDV profiling
461   // A weak pointer is inherently used, so we do not need to call
462   // LDV_recordDead_FILL_SLOP_DYNAMIC():
463   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
464   // or, LDV_recordDead():
465   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
466   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
467   // large as weak pointers, so there is no need to fill the slop, either.
468   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
469 #endif
470   //
471   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
472   //
473   w->header.info = &stg_DEAD_WEAK_info;
474 #ifdef PROFILING
475   // @LDV profiling
476   LDV_recordCreate((StgClosure *)w);
477 #endif
478   f = ((StgWeak *)w)->finalizer;
479   w->link = ((StgWeak *)w)->link;
480
481   /* return the finalizer */
482   if (f == &stg_NO_FINALIZER_closure) {
483       RET_NP(0,&stg_NO_FINALIZER_closure);
484   } else {
485       RET_NP(1,f);
486   }
487   FE_
488 }
489
490 FN_(deRefWeakzh_fast)
491 {
492   /* R1.p = weak ptr */
493   StgWeak* w;
494   I_       code;
495   P_       val;
496   FB_
497   w = (StgWeak*)R1.p;
498   if (w->header.info == &stg_WEAK_info) {
499     code = 1;
500     val = (P_)((StgWeak *)w)->value;
501   } else {
502     code = 0;
503     val = (P_)w;
504   }
505   RET_NP(code,val);
506   FE_
507 }
508
509 /* -----------------------------------------------------------------------------
510    Arbitrary-precision Integer operations.
511    -------------------------------------------------------------------------- */
512
513 FN_(int2Integerzh_fast)
514 {
515    /* arguments: R1 = Int# */
516
517    I_ val, s;           /* to avoid aliasing */
518    StgArrWords* p;      /* address of array result */
519    FB_
520
521    val = R1.i;
522    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
523    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
524    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
525
526    p = (StgArrWords *)Hp - 1;
527    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
528
529    /* mpz_set_si is inlined here, makes things simpler */
530    if (val < 0) { 
531         s  = -1;
532         *Hp = -val;
533    } else if (val > 0) {
534         s = 1;
535         *Hp = val;
536    } else {
537         s = 0;
538    }
539
540    /* returns (# size  :: Int#, 
541                  data  :: ByteArray# 
542                #)
543    */
544    TICK_RET_UNBOXED_TUP(2);
545    RET_NP(s,p);
546    FE_
547 }
548
549 FN_(word2Integerzh_fast)
550 {
551    /* arguments: R1 = Word# */
552
553    W_ val;              /* to avoid aliasing */
554    I_  s;
555    StgArrWords* p;      /* address of array result */
556    FB_
557
558    val = R1.w;
559    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
560    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
561    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
562
563    p = (StgArrWords *)Hp - 1;
564    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
565
566    if (val != 0) {
567         s = 1;
568         *Hp = val;
569    } else {
570         s = 0;
571    }
572
573    /* returns (# size  :: Int#, 
574                  data  :: ByteArray# 
575                #)
576    */
577    TICK_RET_UNBOXED_TUP(2);
578    RET_NP(s,p);
579    FE_
580 }
581
582
583 /*
584  * 'long long' primops for converting to/from Integers.
585  */
586
587 #ifdef SUPPORT_LONG_LONGS
588
589 FN_(int64ToIntegerzh_fast)
590 {
591    /* arguments: L1 = Int64# */
592
593    StgInt64  val; /* to avoid aliasing */
594    W_ hi;
595    I_  s, neg, words_needed;
596    StgArrWords* p;      /* address of array result */
597    FB_
598
599    val = (LI_)L1;
600    neg = 0;
601
602    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
603        words_needed = 2;
604    } else { 
605        /* minimum is one word */
606        words_needed = 1;
607    }
608    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
609    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
610    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
611
612    p = (StgArrWords *)(Hp-words_needed+1) - 1;
613    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
614
615    if ( val < 0LL ) {
616      neg = 1;
617      val = -val;
618    }
619
620    hi = (W_)((LW_)val / 0x100000000ULL);
621
622    if ( words_needed == 2 )  { 
623       s = 2;
624       Hp[-1] = (W_)val;
625       Hp[0] = hi;
626    } else if ( val != 0 ) {
627       s = 1;
628       Hp[0] = (W_)val;
629    }  else /* val==0 */   {
630       s = 0;
631    }
632    s = ( neg ? -s : s );
633
634    /* returns (# size  :: Int#, 
635                  data  :: ByteArray# 
636                #)
637    */
638    TICK_RET_UNBOXED_TUP(2);
639    RET_NP(s,p);
640    FE_
641 }
642
643 FN_(word64ToIntegerzh_fast)
644 {
645    /* arguments: L1 = Word64# */
646
647    StgWord64 val; /* to avoid aliasing */
648    StgWord hi;
649    I_  s, words_needed;
650    StgArrWords* p;      /* address of array result */
651    FB_
652
653    val = (LW_)L1;
654    if ( val >= 0x100000000ULL ) {
655       words_needed = 2;
656    } else {
657       words_needed = 1;
658    }
659    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
660    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
661    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
662
663    p = (StgArrWords *)(Hp-words_needed+1) - 1;
664    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
665
666    hi = (W_)((LW_)val / 0x100000000ULL);
667    if ( val >= 0x100000000ULL ) { 
668      s = 2;
669      Hp[-1] = ((W_)val);
670      Hp[0]  = (hi);
671    } else if ( val != 0 )      {
672       s = 1;
673       Hp[0] = ((W_)val);
674    } else /* val==0 */         {
675       s = 0;
676    }
677
678    /* returns (# size  :: Int#, 
679                  data  :: ByteArray# 
680                #)
681    */
682    TICK_RET_UNBOXED_TUP(2);
683    RET_NP(s,p);
684    FE_
685 }
686
687
688 #endif /* SUPPORT_LONG_LONGS */
689
690 /* ToDo: this is shockingly inefficient */
691
692 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
693 FN_(name)                                                               \
694 {                                                                       \
695   MP_INT arg1, arg2, result;                                            \
696   I_ s1, s2;                                                            \
697   StgArrWords* d1;                                                      \
698   StgArrWords* d2;                                                      \
699   FB_                                                                   \
700                                                                         \
701   /* call doYouWantToGC() */                                            \
702   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
703                                                                         \
704   d1 = (StgArrWords *)R2.p;                                             \
705   s1 = R1.i;                                                            \
706   d2 = (StgArrWords *)R4.p;                                             \
707   s2 = R3.i;                                                            \
708                                                                         \
709   arg1._mp_alloc        = d1->words;                                    \
710   arg1._mp_size         = (s1);                                         \
711   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
712   arg2._mp_alloc        = d2->words;                                    \
713   arg2._mp_size         = (s2);                                         \
714   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
715                                                                         \
716   STGCALL1(mpz_init,&result);                                           \
717                                                                         \
718   /* Perform the operation */                                           \
719   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
720                                                                         \
721   TICK_RET_UNBOXED_TUP(2);                                              \
722   RET_NP(result._mp_size,                                               \
723          result._mp_d-sizeofW(StgArrWords));                            \
724   FE_                                                                   \
725 }
726
727 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
728 FN_(name)                                                               \
729 {                                                                       \
730   MP_INT arg1, result;                                                  \
731   I_ s1;                                                                \
732   StgArrWords* d1;                                                      \
733   FB_                                                                   \
734                                                                         \
735   /* call doYouWantToGC() */                                            \
736   MAYBE_GC(R2_PTR, name);                                               \
737                                                                         \
738   d1 = (StgArrWords *)R2.p;                                             \
739   s1 = R1.i;                                                            \
740                                                                         \
741   arg1._mp_alloc        = d1->words;                                    \
742   arg1._mp_size         = (s1);                                         \
743   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
744                                                                         \
745   STGCALL1(mpz_init,&result);                                           \
746                                                                         \
747   /* Perform the operation */                                           \
748   STGCALL2(mp_fun,&result,&arg1);                                       \
749                                                                         \
750   TICK_RET_UNBOXED_TUP(2);                                              \
751   RET_NP(result._mp_size,                                               \
752          result._mp_d-sizeofW(StgArrWords));                            \
753   FE_                                                                   \
754 }
755
756 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
757 FN_(name)                                                               \
758 {                                                                       \
759   MP_INT arg1, arg2, result1, result2;                                  \
760   I_ s1, s2;                                                            \
761   StgArrWords* d1;                                                      \
762   StgArrWords* d2;                                                      \
763   FB_                                                                   \
764                                                                         \
765   /* call doYouWantToGC() */                                            \
766   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
767                                                                         \
768   d1 = (StgArrWords *)R2.p;                                             \
769   s1 = R1.i;                                                            \
770   d2 = (StgArrWords *)R4.p;                                             \
771   s2 = R3.i;                                                            \
772                                                                         \
773   arg1._mp_alloc        = d1->words;                                    \
774   arg1._mp_size         = (s1);                                         \
775   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
776   arg2._mp_alloc        = d2->words;                                    \
777   arg2._mp_size         = (s2);                                         \
778   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
779                                                                         \
780   STGCALL1(mpz_init,&result1);                                          \
781   STGCALL1(mpz_init,&result2);                                          \
782                                                                         \
783   /* Perform the operation */                                           \
784   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
785                                                                         \
786   TICK_RET_UNBOXED_TUP(4);                                              \
787   RET_NPNP(result1._mp_size,                                            \
788            result1._mp_d-sizeofW(StgArrWords),                          \
789            result2._mp_size,                                            \
790            result2._mp_d-sizeofW(StgArrWords));                         \
791   FE_                                                                   \
792 }
793
794 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
795 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
796 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
797 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
798 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
799 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
800 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
801 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
802 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
803 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
804 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
805
806 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
807 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
808
809
810 FN_(gcdIntzh_fast)
811 {
812   /* R1 = the first Int#; R2 = the second Int# */
813   mp_limb_t aa;
814   I_ r;
815   FB_
816   aa = (mp_limb_t)(R1.i);
817   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
818
819   R1.i = r;
820   /* Result parked in R1, return via info-pointer at TOS */
821   JMP_(ENTRY_CODE(Sp[0]));
822   FE_
823 }
824
825 FN_(gcdIntegerIntzh_fast)
826 {
827   /* R1 = s1; R2 = d1; R3 = the int */
828   I_ r;
829   FB_
830   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
831
832   R1.i = r;
833   /* Result parked in R1, return via info-pointer at TOS */
834   JMP_(ENTRY_CODE(Sp[0]));
835   FE_
836 }
837
838 FN_(cmpIntegerIntzh_fast)
839 {
840   /* R1 = s1; R2 = d1; R3 = the int */
841   I_ usize;
842   I_ vsize;
843   I_ v_digit;
844   mp_limb_t u_digit;
845   FB_
846
847   usize = R1.i;
848   vsize = 0;
849   v_digit = R3.i;
850
851   // paraphrased from mpz_cmp_si() in the GMP sources
852   if (v_digit > 0) {
853       vsize = 1;
854   } else if (v_digit < 0) {
855       vsize = -1;
856       v_digit = -v_digit;
857   }
858
859   if (usize != vsize) {
860     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
861   }
862
863   if (usize == 0) {
864     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
865   }
866
867   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
868
869   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
870     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
871   }
872
873   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
874     R1.i = usize; 
875   } else {
876     R1.i = -usize; 
877   }
878
879   JMP_(ENTRY_CODE(Sp[0]));
880   FE_
881 }
882
883 FN_(cmpIntegerzh_fast)
884 {
885   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
886   I_ usize;
887   I_ vsize;
888   I_ size;
889   StgPtr up, vp;
890   int cmp;
891   FB_
892
893   // paraphrased from mpz_cmp() in the GMP sources
894   usize = R1.i;
895   vsize = R3.i;
896
897   if (usize != vsize) {
898     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
899   }
900
901   if (usize == 0) {
902     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
903   }
904
905   size = abs(usize);
906
907   up = BYTE_ARR_CTS(R2.p);
908   vp = BYTE_ARR_CTS(R4.p);
909
910   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
911
912   if (cmp == 0) {
913     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
914   }
915
916   if ((cmp < 0) == (usize < 0)) {
917     R1.i = 1;
918   } else {
919     R1.i = (-1); 
920   }
921   /* Result parked in R1, return via info-pointer at TOS */
922   JMP_(ENTRY_CODE(Sp[0]));
923   FE_
924 }
925
926 FN_(integer2Intzh_fast)
927 {
928   /* R1 = s; R2 = d */
929   I_ r, s;
930   FB_
931   s = R1.i;
932   if (s == 0)
933     r = 0;
934   else {
935     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
936     if (s < 0) r = -r;
937   }
938   /* Result parked in R1, return via info-pointer at TOS */
939   R1.i = r;
940   JMP_(ENTRY_CODE(Sp[0]));
941   FE_
942 }
943
944 FN_(integer2Wordzh_fast)
945 {
946   /* R1 = s; R2 = d */
947   I_ s;
948   W_ r;
949   FB_
950   s = R1.i;
951   if (s == 0)
952     r = 0;
953   else {
954     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
955     if (s < 0) r = -r;
956   }
957   /* Result parked in R1, return via info-pointer at TOS */
958   R1.w = r;
959   JMP_(ENTRY_CODE(Sp[0]));
960   FE_
961 }
962
963
964 FN_(decodeFloatzh_fast)
965
966   MP_INT mantissa;
967   I_ exponent;
968   StgArrWords* p;
969   StgFloat arg;
970   FB_
971
972   /* arguments: F1 = Float# */
973   arg = F1;
974
975   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
976   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
977   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
978
979   /* Be prepared to tell Lennart-coded __decodeFloat    */
980   /* where mantissa._mp_d can be put (it does not care about the rest) */
981   p = (StgArrWords *)Hp - 1;
982   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
983   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
984
985   /* Perform the operation */
986   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
987
988   /* returns: (Int# (expn), Int#, ByteArray#) */
989   TICK_RET_UNBOXED_TUP(3);
990   RET_NNP(exponent,mantissa._mp_size,p);
991   FE_
992 }
993
994 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
995 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
996
997 FN_(decodeDoublezh_fast)
998 { MP_INT mantissa;
999   I_ exponent;
1000   StgDouble arg;
1001   StgArrWords* p;
1002   FB_
1003
1004   /* arguments: D1 = Double# */
1005   arg = D1;
1006
1007   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1008   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1009   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1010
1011   /* Be prepared to tell Lennart-coded __decodeDouble   */
1012   /* where mantissa.d can be put (it does not care about the rest) */
1013   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1014   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1015   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1016
1017   /* Perform the operation */
1018   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1019
1020   /* returns: (Int# (expn), Int#, ByteArray#) */
1021   TICK_RET_UNBOXED_TUP(3);
1022   RET_NNP(exponent,mantissa._mp_size,p);
1023   FE_
1024 }
1025
1026 /* -----------------------------------------------------------------------------
1027  * Concurrency primitives
1028  * -------------------------------------------------------------------------- */
1029
1030 FN_(forkzh_fast)
1031 {
1032   FB_
1033   /* args: R1 = closure to spark */
1034   
1035   MAYBE_GC(R1_PTR, forkzh_fast);
1036
1037   /* create it right now, return ThreadID in R1 */
1038   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1039                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1040   STGCALL1(scheduleThread, R1.t);
1041       
1042   /* switch at the earliest opportunity */ 
1043   context_switch = 1;
1044   
1045   RET_P(R1.t);
1046   FE_
1047 }
1048
1049 FN_(forkProcesszh_fast)
1050 {
1051   pid_t pid;
1052
1053   FB_
1054   /* args: none */
1055   /* result: Pid */
1056
1057   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1058
1059   JMP_(ENTRY_CODE(Sp[0]));
1060
1061   FE_
1062 }
1063
1064 FN_(yieldzh_fast)
1065 {
1066   FB_
1067   JMP_(stg_yield_noregs);
1068   FE_
1069 }
1070
1071 FN_(myThreadIdzh_fast)
1072 {
1073   /* no args. */
1074   FB_
1075   RET_P((P_)CurrentTSO);
1076   FE_
1077 }
1078
1079 FN_(labelThreadzh_fast)
1080 {
1081   FB_
1082   /* args: 
1083         R1.p = ThreadId#
1084         R2.p = Addr# */
1085 #ifdef DEBUG
1086   STGCALL2(labelThread,R1.p,(char *)R2.p);
1087 #endif
1088   JMP_(ENTRY_CODE(Sp[0]));
1089   FE_
1090 }
1091
1092
1093 /* -----------------------------------------------------------------------------
1094  * MVar primitives
1095  *
1096  * take & putMVar work as follows.  Firstly, an important invariant:
1097  *
1098  *    If the MVar is full, then the blocking queue contains only
1099  *    threads blocked on putMVar, and if the MVar is empty then the
1100  *    blocking queue contains only threads blocked on takeMVar.
1101  *
1102  * takeMvar:
1103  *    MVar empty : then add ourselves to the blocking queue
1104  *    MVar full  : remove the value from the MVar, and
1105  *                 blocking queue empty     : return
1106  *                 blocking queue non-empty : perform the first blocked putMVar
1107  *                                            from the queue, and wake up the
1108  *                                            thread (MVar is now full again)
1109  *
1110  * putMVar is just the dual of the above algorithm.
1111  *
1112  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1113  * the stack of the thread waiting to do the putMVar.  See
1114  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1115  * the stack layout, and the PerformPut and PerformTake macros below.
1116  *
1117  * It is important that a blocked take or put is woken up with the
1118  * take/put already performed, because otherwise there would be a
1119  * small window of vulnerability where the thread could receive an
1120  * exception and never perform its take or put, and we'd end up with a
1121  * deadlock.
1122  *
1123  * -------------------------------------------------------------------------- */
1124
1125 FN_(isEmptyMVarzh_fast)
1126 {
1127   /* args: R1 = MVar closure */
1128   I_ r;
1129   FB_
1130   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1131   RET_N(r);
1132   FE_
1133 }
1134
1135
1136 FN_(newMVarzh_fast)
1137 {
1138   StgMVar *mvar;
1139
1140   FB_
1141   /* args: none */
1142
1143   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1144   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1145                   1, 0);
1146   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1147   
1148   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1149   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1150   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1151   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1152
1153   TICK_RET_UNBOXED_TUP(1);
1154   RET_P(mvar);
1155   FE_
1156 }
1157
1158 /* If R1 isn't available, pass it on the stack */
1159 #ifdef REG_R1
1160 #define PerformTake(tso, value) ({              \
1161     (tso)->sp[1] = (W_)value;                   \
1162     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1163   })
1164 #else
1165 #define PerformTake(tso, value) ({              \
1166     (tso)->sp[1] = (W_)value;                   \
1167     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1168   })
1169 #endif
1170
1171
1172 #define PerformPut(tso) ({                              \
1173     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1174     (tso)->sp += 3;                                     \
1175     val;                                                \
1176   })
1177
1178 FN_(takeMVarzh_fast)
1179 {
1180   StgMVar *mvar;
1181   StgClosure *val;
1182   const StgInfoTable *info;
1183
1184   FB_
1185   /* args: R1 = MVar closure */
1186
1187   mvar = (StgMVar *)R1.p;
1188
1189 #ifdef SMP
1190   info = LOCK_CLOSURE(mvar);
1191 #else
1192   info = GET_INFO(mvar);
1193 #endif
1194
1195   /* If the MVar is empty, put ourselves on its blocking queue,
1196    * and wait until we're woken up.
1197    */
1198   if (info == &stg_EMPTY_MVAR_info) {
1199     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1200       mvar->head = CurrentTSO;
1201     } else {
1202       mvar->tail->link = CurrentTSO;
1203     }
1204     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1205     CurrentTSO->why_blocked = BlockedOnMVar;
1206     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1207     mvar->tail = CurrentTSO;
1208
1209 #ifdef SMP
1210     /* unlock the MVar */
1211     mvar->header.info = &stg_EMPTY_MVAR_info;
1212 #endif
1213     JMP_(stg_block_takemvar);
1214   }
1215
1216   /* we got the value... */
1217   val = mvar->value;
1218
1219   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1220       /* There are putMVar(s) waiting... 
1221        * wake up the first thread on the queue
1222        */
1223       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1224
1225       /* actually perform the putMVar for the thread that we just woke up */
1226       mvar->value = PerformPut(mvar->head);
1227
1228 #if defined(GRAN) || defined(PAR)
1229       /* ToDo: check 2nd arg (mvar) is right */
1230       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1231 #else
1232       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1233 #endif
1234       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1235           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1236       }
1237 #ifdef SMP
1238       /* unlock in the SMP case */
1239       SET_INFO(mvar,&stg_FULL_MVAR_info);
1240 #endif
1241       TICK_RET_UNBOXED_TUP(1);
1242       RET_P(val);
1243   } else {
1244       /* No further putMVars, MVar is now empty */
1245
1246       /* do this last... we might have locked the MVar in the SMP case,
1247        * and writing the info pointer will unlock it.
1248        */
1249       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1250       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1251       TICK_RET_UNBOXED_TUP(1);
1252       RET_P(val);
1253   }
1254   FE_
1255 }
1256
1257 FN_(tryTakeMVarzh_fast)
1258 {
1259   StgMVar *mvar;
1260   StgClosure *val;
1261   const StgInfoTable *info;
1262
1263   FB_
1264   /* args: R1 = MVar closure */
1265
1266   mvar = (StgMVar *)R1.p;
1267
1268 #ifdef SMP
1269   info = LOCK_CLOSURE(mvar);
1270 #else
1271   info = GET_INFO(mvar);
1272 #endif
1273
1274   if (info == &stg_EMPTY_MVAR_info) {
1275
1276 #ifdef SMP
1277       /* unlock the MVar */
1278       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1279 #endif
1280
1281       /* HACK: we need a pointer to pass back, 
1282        * so we abuse NO_FINALIZER_closure
1283        */
1284       RET_NP(0, &stg_NO_FINALIZER_closure);
1285   }
1286
1287   /* we got the value... */
1288   val = mvar->value;
1289
1290   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1291       /* There are putMVar(s) waiting... 
1292        * wake up the first thread on the queue
1293        */
1294       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1295
1296       /* actually perform the putMVar for the thread that we just woke up */
1297       mvar->value = PerformPut(mvar->head);
1298
1299 #if defined(GRAN) || defined(PAR)
1300       /* ToDo: check 2nd arg (mvar) is right */
1301       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1302 #else
1303       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1304 #endif
1305       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1306           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1307       }
1308 #ifdef SMP
1309       /* unlock in the SMP case */
1310       SET_INFO(mvar,&stg_FULL_MVAR_info);
1311 #endif
1312   } else {
1313       /* No further putMVars, MVar is now empty */
1314       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1315
1316       /* do this last... we might have locked the MVar in the SMP case,
1317        * and writing the info pointer will unlock it.
1318        */
1319       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1320   }
1321
1322   TICK_RET_UNBOXED_TUP(1);
1323   RET_NP((I_)1, val);
1324   FE_
1325 }
1326
1327 FN_(putMVarzh_fast)
1328 {
1329   StgMVar *mvar;
1330   const StgInfoTable *info;
1331
1332   FB_
1333   /* args: R1 = MVar, R2 = value */
1334
1335   mvar = (StgMVar *)R1.p;
1336
1337 #ifdef SMP
1338   info = LOCK_CLOSURE(mvar);
1339 #else
1340   info = GET_INFO(mvar);
1341 #endif
1342
1343   if (info == &stg_FULL_MVAR_info) {
1344     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1345       mvar->head = CurrentTSO;
1346     } else {
1347       mvar->tail->link = CurrentTSO;
1348     }
1349     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1350     CurrentTSO->why_blocked = BlockedOnMVar;
1351     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1352     mvar->tail = CurrentTSO;
1353
1354 #ifdef SMP
1355     /* unlock the MVar */
1356     SET_INFO(mvar,&stg_FULL_MVAR_info);
1357 #endif
1358     JMP_(stg_block_putmvar);
1359   }
1360   
1361   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1362       /* There are takeMVar(s) waiting: wake up the first one
1363        */
1364       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1365
1366       /* actually perform the takeMVar */
1367       PerformTake(mvar->head, R2.cl);
1368       
1369 #if defined(GRAN) || defined(PAR)
1370       /* ToDo: check 2nd arg (mvar) is right */
1371       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1372 #else
1373       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1374 #endif
1375       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1376           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1377       }
1378 #ifdef SMP
1379       /* unlocks the MVar in the SMP case */
1380       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1381 #endif
1382       JMP_(ENTRY_CODE(Sp[0]));
1383   } else {
1384       /* No further takes, the MVar is now full. */
1385       mvar->value = R2.cl;
1386       /* unlocks the MVar in the SMP case */
1387       SET_INFO(mvar,&stg_FULL_MVAR_info);
1388       JMP_(ENTRY_CODE(Sp[0]));
1389   }
1390
1391   /* ToDo: yield afterward for better communication performance? */
1392   FE_
1393 }
1394
1395 FN_(tryPutMVarzh_fast)
1396 {
1397   StgMVar *mvar;
1398   const StgInfoTable *info;
1399
1400   FB_
1401   /* args: R1 = MVar, R2 = value */
1402
1403   mvar = (StgMVar *)R1.p;
1404
1405 #ifdef SMP
1406   info = LOCK_CLOSURE(mvar);
1407 #else
1408   info = GET_INFO(mvar);
1409 #endif
1410
1411   if (info == &stg_FULL_MVAR_info) {
1412
1413 #ifdef SMP
1414     /* unlock the MVar */
1415     mvar->header.info = &stg_FULL_MVAR_info;
1416 #endif
1417
1418     RET_N(0);
1419   }
1420   
1421   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1422       /* There are takeMVar(s) waiting: wake up the first one
1423        */
1424       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1425
1426       /* actually perform the takeMVar */
1427       PerformTake(mvar->head, R2.cl);
1428       
1429 #if defined(GRAN) || defined(PAR)
1430       /* ToDo: check 2nd arg (mvar) is right */
1431       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1432 #else
1433       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1434 #endif
1435       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1436           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1437       }
1438 #ifdef SMP
1439       /* unlocks the MVar in the SMP case */
1440       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1441 #endif
1442       JMP_(ENTRY_CODE(Sp[0]));
1443   } else {
1444       /* No further takes, the MVar is now full. */
1445       mvar->value = R2.cl;
1446       /* unlocks the MVar in the SMP case */
1447       SET_INFO(mvar,&stg_FULL_MVAR_info);
1448       JMP_(ENTRY_CODE(Sp[0]));
1449   }
1450
1451   /* ToDo: yield afterward for better communication performance? */
1452   FE_
1453 }
1454
1455 /* -----------------------------------------------------------------------------
1456    Stable pointer primitives
1457    -------------------------------------------------------------------------  */
1458
1459 FN_(makeStableNamezh_fast)
1460 {
1461   StgWord index;
1462   StgStableName *sn_obj;
1463   FB_
1464
1465   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1466   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1467                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1468   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1469   
1470   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1471
1472   /* Is there already a StableName for this heap object? */
1473   if (stable_ptr_table[index].sn_obj == NULL) {
1474     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1475     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1476     sn_obj->sn = index;
1477     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1478   } else {
1479     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1480   }
1481
1482   TICK_RET_UNBOXED_TUP(1);
1483   RET_P(sn_obj);
1484 }
1485
1486
1487 FN_(makeStablePtrzh_fast)
1488 {
1489   /* Args: R1 = a */
1490   StgStablePtr sp;
1491   FB_
1492   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1493   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1494   RET_N(sp);
1495   FE_
1496 }
1497
1498 FN_(deRefStablePtrzh_fast)
1499 {
1500   /* Args: R1 = the stable ptr */
1501   P_ r;
1502   StgStablePtr sp;
1503   FB_
1504   sp = (StgStablePtr)R1.w;
1505   r = stable_ptr_table[(StgWord)sp].addr;
1506   RET_P(r);
1507   FE_
1508 }
1509
1510 /* -----------------------------------------------------------------------------
1511    Bytecode object primitives
1512    -------------------------------------------------------------------------  */
1513
1514 FN_(newBCOzh_fast)
1515 {
1516   /* R1.p = instrs
1517      R2.p = literals
1518      R3.p = ptrs
1519      R4.p = itbls
1520   */
1521   StgBCO *bco;
1522   FB_
1523
1524   HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast);
1525   TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1526   CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1527   bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1528   SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1529
1530   bco->instrs     = (StgArrWords*)R1.cl;
1531   bco->literals   = (StgArrWords*)R2.cl;
1532   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1533   bco->itbls      = (StgArrWords*)R4.cl;
1534
1535   TICK_RET_UNBOXED_TUP(1);
1536   RET_P(bco);
1537   FE_
1538 }
1539
1540 FN_(mkApUpd0zh_fast)
1541 {
1542   // R1.p = the BCO# for the AP
1543   //
1544   StgPAP* ap;
1545   FB_
1546
1547   // This function is *only* used to wrap zero-arity BCOs in an
1548   // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1549   // saturated and always points directly to a FUN or BCO.
1550   ASSERT(get_itbl(R1.cl)->type == BCO && BCO_ARITY(R1.p) == 0);
1551
1552   HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1553   TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1554   CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1555   ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1556   SET_HDR(ap, &stg_AP_info, CCCS);
1557
1558   ap->n_args = 0;
1559   ap->fun = R1.cl;
1560
1561   TICK_RET_UNBOXED_TUP(1);
1562   RET_P(ap);
1563   FE_
1564 }
1565
1566 /* -----------------------------------------------------------------------------
1567    Thread I/O blocking primitives
1568    -------------------------------------------------------------------------- */
1569
1570 FN_(waitReadzh_fast)
1571 {
1572   FB_
1573     /* args: R1.i */
1574     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1575     CurrentTSO->why_blocked = BlockedOnRead;
1576     CurrentTSO->block_info.fd = R1.i;
1577     ACQUIRE_LOCK(&sched_mutex);
1578     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1579     RELEASE_LOCK(&sched_mutex);
1580     JMP_(stg_block_noregs);
1581   FE_
1582 }
1583
1584 FN_(waitWritezh_fast)
1585 {
1586   FB_
1587     /* args: R1.i */
1588     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1589     CurrentTSO->why_blocked = BlockedOnWrite;
1590     CurrentTSO->block_info.fd = R1.i;
1591     ACQUIRE_LOCK(&sched_mutex);
1592     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1593     RELEASE_LOCK(&sched_mutex);
1594     JMP_(stg_block_noregs);
1595   FE_
1596 }
1597
1598 FN_(delayzh_fast)
1599 {
1600   StgTSO *t, *prev;
1601   nat target;
1602   FB_
1603     /* args: R1.i */
1604     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1605     CurrentTSO->why_blocked = BlockedOnDelay;
1606
1607     ACQUIRE_LOCK(&sched_mutex);
1608
1609     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1610     CurrentTSO->block_info.target = target;
1611
1612     /* Insert the new thread in the sleeping queue. */
1613     prev = NULL;
1614     t = sleeping_queue;
1615     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1616         prev = t;
1617         t = t->link;
1618     }
1619
1620     CurrentTSO->link = t;
1621     if (prev == NULL) {
1622         sleeping_queue = CurrentTSO;
1623     } else {
1624         prev->link = CurrentTSO;
1625     }
1626
1627     RELEASE_LOCK(&sched_mutex);
1628     JMP_(stg_block_noregs);
1629   FE_
1630 }
1631