[project @ 2003-07-23 13:39:11 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.110 2003/07/23 13:39:11 simonmar Exp $
3  *
4  * (c) The GHC Team, 1998-2002
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Timer.h"      /* TICK_MILLISECS */
23 #include "Prelude.h"
24 #ifndef mingw32_TARGET_OS
25 #include "Itimer.h"    /* getourtimeofday() */
26 #endif
27
28 #ifdef HAVE_SYS_TYPES_H
29 # include <sys/types.h>
30 #endif
31
32 #include <stdlib.h>
33
34 #ifdef mingw32_TARGET_OS
35 #include <windows.h>
36 #include "win32/AsyncIO.h"
37 #endif
38
39 /* ** temporary **
40
41    classes CCallable and CReturnable don't really exist, but the
42    compiler insists on generating dictionaries containing references
43    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
44    for these.  Some C compilers can't cope with zero-length static arrays,
45    so we have to make these one element long.
46 */
47
48 StgWord GHC_ZCCCallable_static_info[1];
49 StgWord GHC_ZCCReturnable_static_info[1];
50   
51 /* -----------------------------------------------------------------------------
52    Macros for Hand-written primitives.
53    -------------------------------------------------------------------------- */
54
55 /*
56  * Horrible macros for returning unboxed tuples.
57  *
58  * How an unboxed tuple is returned depends on two factors:
59  *    - the number of real registers we have available
60  *    - the boxedness of the returned fields.
61  *
62  * To return an unboxed tuple from a primitive operation, we have macros
63  * RET_<layout> where <layout> describes the boxedness of each field of the
64  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
65  *
66  * We only define the cases actually used, to avoid having too much
67  * garbage in this section.  Warning: any bugs in here will be hard to
68  * track down.
69  *
70  * The return convention for an unboxed tuple is as follows:
71  *   - fit as many fields as possible in registers (as per the
72  *     function fast-entry point calling convention).
73  *   - sort the rest of the fields into pointers and non-pointers.
74  *     push the pointers on the stack, followed by the non-pointers.
75  *     (so the pointers have higher addresses).
76  */
77
78 /*------ All Regs available */
79 #if MAX_REAL_VANILLA_REG == 8
80 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_N(a)     RET_P(a)
82
83 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
84 # define RET_NN(a,b)  RET_PP(a,b)
85 # define RET_NP(a,b)  RET_PP(a,b)
86
87 # define RET_PPP(a,b,c) \
88         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
89 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
90
91 # define RET_NNNP(a,b,c,d) \
92         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
93         JMP_(ENTRY_CODE(Sp[0]));
94
95 # define RET_NPNP(a,b,c,d) \
96         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
97         JMP_(ENTRY_CODE(Sp[0]));
98
99 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
100 # error RET_n macros not defined for this setup.
101
102 /*------ 2 Registers available */
103 #elif MAX_REAL_VANILLA_REG == 2
104
105 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
106 # define RET_N(a)     RET_P(a)
107
108 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
109                        JMP_(ENTRY_CODE(Sp[0]));
110 # define RET_NN(a,b)   RET_PP(a,b)
111 # define RET_NP(a,b)   RET_PP(a,b)
112
113 # define RET_PPP(a,b,c)                         \
114         R1.w = (W_)(a);                         \
115         R2.w = (W_)(b);                         \
116         Sp[-1] = (W_)(c);                       \
117         Sp -= 1;                                \
118         JMP_(ENTRY_CODE(Sp[1]));
119
120 # define RET_NNP(a,b,c)                         \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123         Sp[-1] = (W_)(c);                       \
124         Sp -= 1;                                \
125         JMP_(ENTRY_CODE(Sp[1]));
126
127 # define RET_NNNP(a,b,c,d)                      \
128         R1.w = (W_)(a);                         \
129         R2.w = (W_)(b);                         \
130         Sp[-2] = (W_)(c);                       \
131         Sp[-1] = (W_)(d);                       \
132         Sp -= 2;                                \
133         JMP_(ENTRY_CODE(Sp[2]));
134
135 # define RET_NPNP(a,b,c,d)                      \
136         R1.w = (W_)(a);                         \
137         R2.w = (W_)(b);                         \
138         Sp[-2] = (W_)(c);                       \
139         Sp[-1] = (W_)(d);                       \
140         Sp -= 2;                                \
141         JMP_(ENTRY_CODE(Sp[2]));
142
143 /*------ 1 Register available */
144 #elif MAX_REAL_VANILLA_REG == 1
145 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
146 # define RET_N(a)     RET_P(a)
147
148 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
149                        JMP_(ENTRY_CODE(Sp[1]));
150 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
151                        JMP_(ENTRY_CODE(Sp[2]));
152 # define RET_NP(a,b)   RET_PP(a,b)
153
154 # define RET_PPP(a,b,c)                         \
155         R1.w = (W_)(a);                         \
156         Sp[-2] = (W_)(b);                       \
157         Sp[-1] = (W_)(c);                       \
158         Sp -= 2;                                \
159         JMP_(ENTRY_CODE(Sp[2]));
160
161 # define RET_NNP(a,b,c)                         \
162         R1.w = (W_)(a);                         \
163         Sp[-2] = (W_)(b);                       \
164         Sp[-1] = (W_)(c);                       \
165         Sp -= 2;                                \
166         JMP_(ENTRY_CODE(Sp[2]));
167
168 # define RET_NNNP(a,b,c,d)                      \
169         R1.w = (W_)(a);                         \
170         Sp[-3] = (W_)(b);                       \
171         Sp[-2] = (W_)(c);                       \
172         Sp[-1] = (W_)(d);                       \
173         Sp -= 3;                                \
174         JMP_(ENTRY_CODE(Sp[3]));
175
176 # define RET_NPNP(a,b,c,d)                      \
177         R1.w = (W_)(a);                         \
178         Sp[-3] = (W_)(c);                       \
179         Sp[-2] = (W_)(b);                       \
180         Sp[-1] = (W_)(d);                       \
181         Sp -= 3;                                \
182         JMP_(ENTRY_CODE(Sp[3]));
183
184 #else /* 0 Regs available */
185
186 #define PUSH(o,x) Sp[-o] = (W_)(x)
187
188 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
189
190 # define RET_P(a)     PUSH(1,a); PUSHED(1)
191 # define RET_N(a)     PUSH(1,a); PUSHED(1)
192
193 # define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
194 # define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
195 # define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
196
197 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
198 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
199
200 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)        
201 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)        
202 #endif
203
204 /*-----------------------------------------------------------------------------
205   Array Primitives
206
207   Basically just new*Array - the others are all inline macros.
208
209   The size arg is always passed in R1, and the result returned in R1.
210
211   The slow entry point is for returning from a heap check, the saved
212   size argument must be re-loaded from the stack.
213   -------------------------------------------------------------------------- */
214
215 /* for objects that are *less* than the size of a word, make sure we
216  * round up to the nearest word for the size of the array.
217  */
218
219 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
220
221 FN_(newByteArrayzh_fast)
222  {
223    W_ size, stuff_size, n;
224    StgArrWords* p;
225    FB_
226      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
227      n = R1.w;
228      stuff_size = BYTES_TO_STGWORDS(n);
229      size = sizeofW(StgArrWords)+ stuff_size;
230      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
231      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
232      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
233      p->words = stuff_size;
234      TICK_RET_UNBOXED_TUP(1)
235      RET_P(p);
236    FE_
237  }
238
239 FN_(newPinnedByteArrayzh_fast)
240  {
241    W_ size, stuff_size, n;
242    StgArrWords* p;
243    FB_
244      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
245      n = R1.w;
246      stuff_size = BYTES_TO_STGWORDS(n);
247
248      // We want an 8-byte aligned array.  allocatePinned() gives us
249      // 8-byte aligned memory by default, but we want to align the
250      // *goods* inside the ArrWords object, so we have to check the
251      // size of the ArrWords header and adjust our size accordingly.
252      size = sizeofW(StgArrWords)+ stuff_size;
253      if ((sizeof(StgArrWords) & 7) != 0) {
254          size++;
255      }
256
257      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
258      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
259
260      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
261      // have to push the object forward one word so that the goods
262      // fall on an 8-byte boundary.
263      if ((sizeof(StgArrWords) & 7) != 0) {
264          ((StgPtr)p)++;
265      }
266
267      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
268      p->words = stuff_size;
269      TICK_RET_UNBOXED_TUP(1)
270      RET_P(p);
271    FE_
272  }
273
274 FN_(newArrayzh_fast)
275 {
276   W_ size, n, init;
277   StgMutArrPtrs* arr;
278   StgPtr p;
279   FB_
280     n = R1.w;
281
282     MAYBE_GC(R2_PTR,newArrayzh_fast);
283
284     size = sizeofW(StgMutArrPtrs) + n;
285     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
286     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
287
288     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
289     arr->ptrs = n;
290
291     init = R2.w;
292     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
293          p < (P_)arr + size; p++) {
294         *p = (W_)init;
295     }
296
297     TICK_RET_UNBOXED_TUP(1);
298     RET_P(arr);
299   FE_
300 }
301
302 FN_(newMutVarzh_fast)
303 {
304   StgMutVar* mv;
305   /* Args: R1.p = initialisation value */
306   FB_
307
308   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
309   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
310   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
311
312   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
313   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
314   mv->var = R1.cl;
315
316   TICK_RET_UNBOXED_TUP(1);
317   RET_P(mv);
318   FE_
319 }
320
321 FN_(atomicModifyMutVarzh_fast)
322 {
323    StgMutVar* mv;
324    StgClosure *z, *x, *y, *r;
325    FB_
326    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
327
328    /* If x is the current contents of the MutVar#, then 
329       We want to make the new contents point to
330
331          (sel_0 (f x))
332  
333       and the return value is
334
335          (sel_1 (f x))
336
337       obviously we can share (f x).
338
339          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
340          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
341          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
342    */
343
344 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
345 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
346
347    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
348    CCS_ALLOC(CCCS,SIZE);
349
350    x = ((StgMutVar *)R1.cl)->var;
351
352    TICK_ALLOC_UP_THK(2,0); // XXX
353    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
354    SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
355    z->payload[0] = R2.cl;
356    z->payload[1] = x;
357
358    TICK_ALLOC_UP_THK(1,1); // XXX
359    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
360    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
361    y->payload[0] = z;
362
363    ((StgMutVar *)R1.cl)->var = y;
364
365    TICK_ALLOC_UP_THK(1,1); // XXX
366    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
367    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
368    r->payload[0] = z;
369
370    RET_P(r);
371    FE_
372 }
373
374 /* -----------------------------------------------------------------------------
375    Foreign Object Primitives
376    -------------------------------------------------------------------------- */
377
378 FN_(mkForeignObjzh_fast)
379 {
380   /* R1.p = ptr to foreign object,
381   */
382   StgForeignObj *result;
383   FB_
384
385   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader),
387                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
388   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
389
390   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
391   SET_HDR(result,&stg_FOREIGN_info,CCCS);
392   result->data = R1.p;
393
394   /* returns (# s#, ForeignObj# #) */
395   TICK_RET_UNBOXED_TUP(1);
396   RET_P(result);
397   FE_
398 }
399
400 /* These two are out-of-line for the benefit of the NCG */
401 FN_(unsafeThawArrayzh_fast)
402 {
403   FB_
404   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
405   recordMutable((StgMutClosure*)R1.cl);
406
407   TICK_RET_UNBOXED_TUP(1);
408   RET_P(R1.p);
409   FE_
410 }
411
412 /* -----------------------------------------------------------------------------
413    Weak Pointer Primitives
414    -------------------------------------------------------------------------- */
415
416 FN_(mkWeakzh_fast)
417 {
418   /* R1.p = key
419      R2.p = value
420      R3.p = finalizer (or NULL)
421   */
422   StgWeak *w;
423   FB_
424
425   if (R3.cl == NULL) {
426     R3.cl = &stg_NO_FINALIZER_closure;
427   }
428
429   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
430   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
431                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
432   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
433
434   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
435   SET_HDR(w, &stg_WEAK_info, CCCS);
436
437   w->key        = R1.cl;
438   w->value      = R2.cl;
439   w->finalizer  = R3.cl;
440
441   w->link       = weak_ptr_list;
442   weak_ptr_list = w;
443   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
444
445   TICK_RET_UNBOXED_TUP(1);
446   RET_P(w);
447   FE_
448 }
449
450 FN_(finalizzeWeakzh_fast)
451 {
452   /* R1.p = weak ptr
453    */
454   StgDeadWeak *w;
455   StgClosure *f;
456   FB_
457   TICK_RET_UNBOXED_TUP(0);
458   w = (StgDeadWeak *)R1.p;
459
460   /* already dead? */
461   if (w->header.info == &stg_DEAD_WEAK_info) {
462       RET_NP(0,&stg_NO_FINALIZER_closure);
463   }
464
465   /* kill it */
466 #ifdef PROFILING
467   // @LDV profiling
468   // A weak pointer is inherently used, so we do not need to call
469   // LDV_recordDead_FILL_SLOP_DYNAMIC():
470   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
471   // or, LDV_recordDead():
472   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
473   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
474   // large as weak pointers, so there is no need to fill the slop, either.
475   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
476 #endif
477   //
478   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
479   //
480   w->header.info = &stg_DEAD_WEAK_info;
481 #ifdef PROFILING
482   // @LDV profiling
483   LDV_recordCreate((StgClosure *)w);
484 #endif
485   f = ((StgWeak *)w)->finalizer;
486   w->link = ((StgWeak *)w)->link;
487
488   /* return the finalizer */
489   if (f == &stg_NO_FINALIZER_closure) {
490       RET_NP(0,&stg_NO_FINALIZER_closure);
491   } else {
492       RET_NP(1,f);
493   }
494   FE_
495 }
496
497 FN_(deRefWeakzh_fast)
498 {
499   /* R1.p = weak ptr */
500   StgWeak* w;
501   I_       code;
502   P_       val;
503   FB_
504   w = (StgWeak*)R1.p;
505   if (w->header.info == &stg_WEAK_info) {
506     code = 1;
507     val = (P_)((StgWeak *)w)->value;
508   } else {
509     code = 0;
510     val = (P_)w;
511   }
512   RET_NP(code,val);
513   FE_
514 }
515
516 /* -----------------------------------------------------------------------------
517    Arbitrary-precision Integer operations.
518    -------------------------------------------------------------------------- */
519
520 FN_(int2Integerzh_fast)
521 {
522    /* arguments: R1 = Int# */
523
524    I_ val, s;           /* to avoid aliasing */
525    StgArrWords* p;      /* address of array result */
526    FB_
527
528    val = R1.i;
529    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
530    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
531    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
532
533    p = (StgArrWords *)Hp - 1;
534    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
535
536    /* mpz_set_si is inlined here, makes things simpler */
537    if (val < 0) { 
538         s  = -1;
539         *Hp = -val;
540    } else if (val > 0) {
541         s = 1;
542         *Hp = val;
543    } else {
544         s = 0;
545    }
546
547    /* returns (# size  :: Int#, 
548                  data  :: ByteArray# 
549                #)
550    */
551    TICK_RET_UNBOXED_TUP(2);
552    RET_NP(s,p);
553    FE_
554 }
555
556 FN_(word2Integerzh_fast)
557 {
558    /* arguments: R1 = Word# */
559
560    W_ val;              /* to avoid aliasing */
561    I_  s;
562    StgArrWords* p;      /* address of array result */
563    FB_
564
565    val = R1.w;
566    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
567    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
568    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
569
570    p = (StgArrWords *)Hp - 1;
571    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
572
573    if (val != 0) {
574         s = 1;
575         *Hp = val;
576    } else {
577         s = 0;
578    }
579
580    /* returns (# size  :: Int#, 
581                  data  :: ByteArray# 
582                #)
583    */
584    TICK_RET_UNBOXED_TUP(2);
585    RET_NP(s,p);
586    FE_
587 }
588
589
590 /*
591  * 'long long' primops for converting to/from Integers.
592  */
593
594 #ifdef SUPPORT_LONG_LONGS
595
596 FN_(int64ToIntegerzh_fast)
597 {
598    /* arguments: L1 = Int64# */
599
600    StgInt64  val; /* to avoid aliasing */
601    W_ hi;
602    I_  s, neg, words_needed;
603    StgArrWords* p;      /* address of array result */
604    FB_
605
606    val = (LI_)L1;
607    neg = 0;
608
609    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
610        words_needed = 2;
611    } else { 
612        /* minimum is one word */
613        words_needed = 1;
614    }
615    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
616    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
617    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
618
619    p = (StgArrWords *)(Hp-words_needed+1) - 1;
620    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
621
622    if ( val < 0LL ) {
623      neg = 1;
624      val = -val;
625    }
626
627    hi = (W_)((LW_)val / 0x100000000ULL);
628
629    if ( words_needed == 2 )  { 
630       s = 2;
631       Hp[-1] = (W_)val;
632       Hp[0] = hi;
633    } else if ( val != 0 ) {
634       s = 1;
635       Hp[0] = (W_)val;
636    }  else /* val==0 */   {
637       s = 0;
638    }
639    s = ( neg ? -s : s );
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650 FN_(word64ToIntegerzh_fast)
651 {
652    /* arguments: L1 = Word64# */
653
654    StgWord64 val; /* to avoid aliasing */
655    StgWord hi;
656    I_  s, words_needed;
657    StgArrWords* p;      /* address of array result */
658    FB_
659
660    val = (LW_)L1;
661    if ( val >= 0x100000000ULL ) {
662       words_needed = 2;
663    } else {
664       words_needed = 1;
665    }
666    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
667    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
668    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
669
670    p = (StgArrWords *)(Hp-words_needed+1) - 1;
671    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
672
673    hi = (W_)((LW_)val / 0x100000000ULL);
674    if ( val >= 0x100000000ULL ) { 
675      s = 2;
676      Hp[-1] = ((W_)val);
677      Hp[0]  = (hi);
678    } else if ( val != 0 )      {
679       s = 1;
680       Hp[0] = ((W_)val);
681    } else /* val==0 */         {
682       s = 0;
683    }
684
685    /* returns (# size  :: Int#, 
686                  data  :: ByteArray# 
687                #)
688    */
689    TICK_RET_UNBOXED_TUP(2);
690    RET_NP(s,p);
691    FE_
692 }
693
694
695 #endif /* SUPPORT_LONG_LONGS */
696
697 /* ToDo: this is shockingly inefficient */
698
699 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
700 FN_(name)                                                               \
701 {                                                                       \
702   MP_INT arg1, arg2, result;                                            \
703   I_ s1, s2;                                                            \
704   StgArrWords* d1;                                                      \
705   StgArrWords* d2;                                                      \
706   FB_                                                                   \
707                                                                         \
708   /* call doYouWantToGC() */                                            \
709   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
710                                                                         \
711   d1 = (StgArrWords *)R2.p;                                             \
712   s1 = R1.i;                                                            \
713   d2 = (StgArrWords *)R4.p;                                             \
714   s2 = R3.i;                                                            \
715                                                                         \
716   arg1._mp_alloc        = d1->words;                                    \
717   arg1._mp_size         = (s1);                                         \
718   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
719   arg2._mp_alloc        = d2->words;                                    \
720   arg2._mp_size         = (s2);                                         \
721   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
722                                                                         \
723   STGCALL1(mpz_init,&result);                                           \
724                                                                         \
725   /* Perform the operation */                                           \
726   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
727                                                                         \
728   TICK_RET_UNBOXED_TUP(2);                                              \
729   RET_NP(result._mp_size,                                               \
730          result._mp_d-sizeofW(StgArrWords));                            \
731   FE_                                                                   \
732 }
733
734 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
735 FN_(name)                                                               \
736 {                                                                       \
737   MP_INT arg1, result;                                                  \
738   I_ s1;                                                                \
739   StgArrWords* d1;                                                      \
740   FB_                                                                   \
741                                                                         \
742   /* call doYouWantToGC() */                                            \
743   MAYBE_GC(R2_PTR, name);                                               \
744                                                                         \
745   d1 = (StgArrWords *)R2.p;                                             \
746   s1 = R1.i;                                                            \
747                                                                         \
748   arg1._mp_alloc        = d1->words;                                    \
749   arg1._mp_size         = (s1);                                         \
750   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
751                                                                         \
752   STGCALL1(mpz_init,&result);                                           \
753                                                                         \
754   /* Perform the operation */                                           \
755   STGCALL2(mp_fun,&result,&arg1);                                       \
756                                                                         \
757   TICK_RET_UNBOXED_TUP(2);                                              \
758   RET_NP(result._mp_size,                                               \
759          result._mp_d-sizeofW(StgArrWords));                            \
760   FE_                                                                   \
761 }
762
763 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
764 FN_(name)                                                               \
765 {                                                                       \
766   MP_INT arg1, arg2, result1, result2;                                  \
767   I_ s1, s2;                                                            \
768   StgArrWords* d1;                                                      \
769   StgArrWords* d2;                                                      \
770   FB_                                                                   \
771                                                                         \
772   /* call doYouWantToGC() */                                            \
773   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
774                                                                         \
775   d1 = (StgArrWords *)R2.p;                                             \
776   s1 = R1.i;                                                            \
777   d2 = (StgArrWords *)R4.p;                                             \
778   s2 = R3.i;                                                            \
779                                                                         \
780   arg1._mp_alloc        = d1->words;                                    \
781   arg1._mp_size         = (s1);                                         \
782   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
783   arg2._mp_alloc        = d2->words;                                    \
784   arg2._mp_size         = (s2);                                         \
785   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
786                                                                         \
787   STGCALL1(mpz_init,&result1);                                          \
788   STGCALL1(mpz_init,&result2);                                          \
789                                                                         \
790   /* Perform the operation */                                           \
791   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
792                                                                         \
793   TICK_RET_UNBOXED_TUP(4);                                              \
794   RET_NPNP(result1._mp_size,                                            \
795            result1._mp_d-sizeofW(StgArrWords),                          \
796            result2._mp_size,                                            \
797            result2._mp_d-sizeofW(StgArrWords));                         \
798   FE_                                                                   \
799 }
800
801 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
802 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
803 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
804 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
805 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
806 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
807 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
808 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
809 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
810 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
811 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
812
813 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
814 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
815
816
817 FN_(gcdIntzh_fast)
818 {
819   /* R1 = the first Int#; R2 = the second Int# */
820   mp_limb_t aa;
821   I_ r;
822   FB_
823   aa = (mp_limb_t)(R1.i);
824   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
825
826   R1.i = r;
827   /* Result parked in R1, return via info-pointer at TOS */
828   JMP_(ENTRY_CODE(Sp[0]));
829   FE_
830 }
831
832 FN_(gcdIntegerIntzh_fast)
833 {
834   /* R1 = s1; R2 = d1; R3 = the int */
835   I_ r;
836   FB_
837   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
838
839   R1.i = r;
840   /* Result parked in R1, return via info-pointer at TOS */
841   JMP_(ENTRY_CODE(Sp[0]));
842   FE_
843 }
844
845 FN_(cmpIntegerIntzh_fast)
846 {
847   /* R1 = s1; R2 = d1; R3 = the int */
848   I_ usize;
849   I_ vsize;
850   I_ v_digit;
851   mp_limb_t u_digit;
852   FB_
853
854   usize = R1.i;
855   vsize = 0;
856   v_digit = R3.i;
857
858   // paraphrased from mpz_cmp_si() in the GMP sources
859   if (v_digit > 0) {
860       vsize = 1;
861   } else if (v_digit < 0) {
862       vsize = -1;
863       v_digit = -v_digit;
864   }
865
866   if (usize != vsize) {
867     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
868   }
869
870   if (usize == 0) {
871     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
872   }
873
874   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
875
876   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
877     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
878   }
879
880   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
881     R1.i = usize; 
882   } else {
883     R1.i = -usize; 
884   }
885
886   JMP_(ENTRY_CODE(Sp[0]));
887   FE_
888 }
889
890 FN_(cmpIntegerzh_fast)
891 {
892   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
893   I_ usize;
894   I_ vsize;
895   I_ size;
896   StgPtr up, vp;
897   int cmp;
898   FB_
899
900   // paraphrased from mpz_cmp() in the GMP sources
901   usize = R1.i;
902   vsize = R3.i;
903
904   if (usize != vsize) {
905     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
906   }
907
908   if (usize == 0) {
909     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
910   }
911
912   size = abs(usize);
913
914   up = BYTE_ARR_CTS(R2.p);
915   vp = BYTE_ARR_CTS(R4.p);
916
917   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
918
919   if (cmp == 0) {
920     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
921   }
922
923   if ((cmp < 0) == (usize < 0)) {
924     R1.i = 1;
925   } else {
926     R1.i = (-1); 
927   }
928   /* Result parked in R1, return via info-pointer at TOS */
929   JMP_(ENTRY_CODE(Sp[0]));
930   FE_
931 }
932
933 FN_(integer2Intzh_fast)
934 {
935   /* R1 = s; R2 = d */
936   I_ r, s;
937   FB_
938   s = R1.i;
939   if (s == 0)
940     r = 0;
941   else {
942     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
943     if (s < 0) r = -r;
944   }
945   /* Result parked in R1, return via info-pointer at TOS */
946   R1.i = r;
947   JMP_(ENTRY_CODE(Sp[0]));
948   FE_
949 }
950
951 FN_(integer2Wordzh_fast)
952 {
953   /* R1 = s; R2 = d */
954   I_ s;
955   W_ r;
956   FB_
957   s = R1.i;
958   if (s == 0)
959     r = 0;
960   else {
961     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
962     if (s < 0) r = -r;
963   }
964   /* Result parked in R1, return via info-pointer at TOS */
965   R1.w = r;
966   JMP_(ENTRY_CODE(Sp[0]));
967   FE_
968 }
969
970
971 FN_(decodeFloatzh_fast)
972
973   MP_INT mantissa;
974   I_ exponent;
975   StgArrWords* p;
976   StgFloat arg;
977   FB_
978
979   /* arguments: F1 = Float# */
980   arg = F1;
981
982   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
983   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
984   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
985
986   /* Be prepared to tell Lennart-coded __decodeFloat    */
987   /* where mantissa._mp_d can be put (it does not care about the rest) */
988   p = (StgArrWords *)Hp - 1;
989   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
990   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
991
992   /* Perform the operation */
993   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
994
995   /* returns: (Int# (expn), Int#, ByteArray#) */
996   TICK_RET_UNBOXED_TUP(3);
997   RET_NNP(exponent,mantissa._mp_size,p);
998   FE_
999 }
1000
1001 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
1002 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
1003
1004 FN_(decodeDoublezh_fast)
1005 { MP_INT mantissa;
1006   I_ exponent;
1007   StgDouble arg;
1008   StgArrWords* p;
1009   FB_
1010
1011   /* arguments: D1 = Double# */
1012   arg = D1;
1013
1014   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1015   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1016   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1017
1018   /* Be prepared to tell Lennart-coded __decodeDouble   */
1019   /* where mantissa.d can be put (it does not care about the rest) */
1020   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1021   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1022   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1023
1024   /* Perform the operation */
1025   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1026
1027   /* returns: (Int# (expn), Int#, ByteArray#) */
1028   TICK_RET_UNBOXED_TUP(3);
1029   RET_NNP(exponent,mantissa._mp_size,p);
1030   FE_
1031 }
1032
1033 /* -----------------------------------------------------------------------------
1034  * Concurrency primitives
1035  * -------------------------------------------------------------------------- */
1036
1037 FN_(forkzh_fast)
1038 {
1039   FB_
1040   /* args: R1 = closure to spark */
1041   
1042   MAYBE_GC(R1_PTR, forkzh_fast);
1043
1044   /* create it right now, return ThreadID in R1 */
1045   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1046                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1047   STGCALL1(scheduleThread, R1.t);
1048       
1049   /* switch at the earliest opportunity */ 
1050   context_switch = 1;
1051   
1052   RET_P(R1.t);
1053   FE_
1054 }
1055
1056 FN_(forkProcesszh_fast)
1057 {
1058   pid_t pid;
1059
1060   FB_
1061   /* args: none */
1062   /* result: Pid */
1063
1064   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1065
1066   RET_N(R1.i);
1067   FE_
1068 }
1069
1070 FN_(yieldzh_fast)
1071 {
1072   FB_
1073   JMP_(stg_yield_noregs);
1074   FE_
1075 }
1076
1077 FN_(myThreadIdzh_fast)
1078 {
1079   /* no args. */
1080   FB_
1081   RET_P((P_)CurrentTSO);
1082   FE_
1083 }
1084
1085 FN_(labelThreadzh_fast)
1086 {
1087   FB_
1088   /* args: 
1089         R1.p = ThreadId#
1090         R2.p = Addr# */
1091 #ifdef DEBUG
1092   STGCALL2(labelThread,R1.p,(char *)R2.p);
1093 #endif
1094   JMP_(ENTRY_CODE(Sp[0]));
1095   FE_
1096 }
1097
1098
1099 /* -----------------------------------------------------------------------------
1100  * MVar primitives
1101  *
1102  * take & putMVar work as follows.  Firstly, an important invariant:
1103  *
1104  *    If the MVar is full, then the blocking queue contains only
1105  *    threads blocked on putMVar, and if the MVar is empty then the
1106  *    blocking queue contains only threads blocked on takeMVar.
1107  *
1108  * takeMvar:
1109  *    MVar empty : then add ourselves to the blocking queue
1110  *    MVar full  : remove the value from the MVar, and
1111  *                 blocking queue empty     : return
1112  *                 blocking queue non-empty : perform the first blocked putMVar
1113  *                                            from the queue, and wake up the
1114  *                                            thread (MVar is now full again)
1115  *
1116  * putMVar is just the dual of the above algorithm.
1117  *
1118  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1119  * the stack of the thread waiting to do the putMVar.  See
1120  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1121  * the stack layout, and the PerformPut and PerformTake macros below.
1122  *
1123  * It is important that a blocked take or put is woken up with the
1124  * take/put already performed, because otherwise there would be a
1125  * small window of vulnerability where the thread could receive an
1126  * exception and never perform its take or put, and we'd end up with a
1127  * deadlock.
1128  *
1129  * -------------------------------------------------------------------------- */
1130
1131 FN_(isEmptyMVarzh_fast)
1132 {
1133   /* args: R1 = MVar closure */
1134   I_ r;
1135   FB_
1136   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1137   RET_N(r);
1138   FE_
1139 }
1140
1141
1142 FN_(newMVarzh_fast)
1143 {
1144   StgMVar *mvar;
1145
1146   FB_
1147   /* args: none */
1148
1149   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1150   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1151                   1, 0);
1152   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1153   
1154   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1155   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1156   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1157   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1158
1159   TICK_RET_UNBOXED_TUP(1);
1160   RET_P(mvar);
1161   FE_
1162 }
1163
1164 /* If R1 isn't available, pass it on the stack */
1165 #ifdef REG_R1
1166 #define PerformTake(tso, value) ({              \
1167     (tso)->sp[1] = (W_)value;                   \
1168     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1169   })
1170 #else
1171 #define PerformTake(tso, value) ({              \
1172     (tso)->sp[1] = (W_)value;                   \
1173     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1174   })
1175 #endif
1176
1177
1178 #define PerformPut(tso) ({                              \
1179     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1180     (tso)->sp += 3;                                     \
1181     val;                                                \
1182   })
1183
1184 FN_(takeMVarzh_fast)
1185 {
1186   StgMVar *mvar;
1187   StgClosure *val;
1188   const StgInfoTable *info;
1189
1190   FB_
1191   /* args: R1 = MVar closure */
1192
1193   mvar = (StgMVar *)R1.p;
1194
1195 #ifdef SMP
1196   info = LOCK_CLOSURE(mvar);
1197 #else
1198   info = GET_INFO(mvar);
1199 #endif
1200
1201   /* If the MVar is empty, put ourselves on its blocking queue,
1202    * and wait until we're woken up.
1203    */
1204   if (info == &stg_EMPTY_MVAR_info) {
1205     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1206       mvar->head = CurrentTSO;
1207     } else {
1208       mvar->tail->link = CurrentTSO;
1209     }
1210     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1211     CurrentTSO->why_blocked = BlockedOnMVar;
1212     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1213     mvar->tail = CurrentTSO;
1214
1215 #ifdef SMP
1216     /* unlock the MVar */
1217     mvar->header.info = &stg_EMPTY_MVAR_info;
1218 #endif
1219     JMP_(stg_block_takemvar);
1220   }
1221
1222   /* we got the value... */
1223   val = mvar->value;
1224
1225   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1226       /* There are putMVar(s) waiting... 
1227        * wake up the first thread on the queue
1228        */
1229       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1230
1231       /* actually perform the putMVar for the thread that we just woke up */
1232       mvar->value = PerformPut(mvar->head);
1233
1234 #if defined(GRAN) || defined(PAR)
1235       /* ToDo: check 2nd arg (mvar) is right */
1236       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1237 #else
1238       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1239 #endif
1240       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1241           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1242       }
1243 #ifdef SMP
1244       /* unlock in the SMP case */
1245       SET_INFO(mvar,&stg_FULL_MVAR_info);
1246 #endif
1247       TICK_RET_UNBOXED_TUP(1);
1248       RET_P(val);
1249   } else {
1250       /* No further putMVars, MVar is now empty */
1251
1252       /* do this last... we might have locked the MVar in the SMP case,
1253        * and writing the info pointer will unlock it.
1254        */
1255       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1256       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1257       TICK_RET_UNBOXED_TUP(1);
1258       RET_P(val);
1259   }
1260   FE_
1261 }
1262
1263 FN_(tryTakeMVarzh_fast)
1264 {
1265   StgMVar *mvar;
1266   StgClosure *val;
1267   const StgInfoTable *info;
1268
1269   FB_
1270   /* args: R1 = MVar closure */
1271
1272   mvar = (StgMVar *)R1.p;
1273
1274 #ifdef SMP
1275   info = LOCK_CLOSURE(mvar);
1276 #else
1277   info = GET_INFO(mvar);
1278 #endif
1279
1280   if (info == &stg_EMPTY_MVAR_info) {
1281
1282 #ifdef SMP
1283       /* unlock the MVar */
1284       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1285 #endif
1286
1287       /* HACK: we need a pointer to pass back, 
1288        * so we abuse NO_FINALIZER_closure
1289        */
1290       RET_NP(0, &stg_NO_FINALIZER_closure);
1291   }
1292
1293   /* we got the value... */
1294   val = mvar->value;
1295
1296   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1297       /* There are putMVar(s) waiting... 
1298        * wake up the first thread on the queue
1299        */
1300       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1301
1302       /* actually perform the putMVar for the thread that we just woke up */
1303       mvar->value = PerformPut(mvar->head);
1304
1305 #if defined(GRAN) || defined(PAR)
1306       /* ToDo: check 2nd arg (mvar) is right */
1307       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1308 #else
1309       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1310 #endif
1311       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1312           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1313       }
1314 #ifdef SMP
1315       /* unlock in the SMP case */
1316       SET_INFO(mvar,&stg_FULL_MVAR_info);
1317 #endif
1318   } else {
1319       /* No further putMVars, MVar is now empty */
1320       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1321
1322       /* do this last... we might have locked the MVar in the SMP case,
1323        * and writing the info pointer will unlock it.
1324        */
1325       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1326   }
1327
1328   TICK_RET_UNBOXED_TUP(1);
1329   RET_NP((I_)1, val);
1330   FE_
1331 }
1332
1333 FN_(putMVarzh_fast)
1334 {
1335   StgMVar *mvar;
1336   const StgInfoTable *info;
1337
1338   FB_
1339   /* args: R1 = MVar, R2 = value */
1340
1341   mvar = (StgMVar *)R1.p;
1342
1343 #ifdef SMP
1344   info = LOCK_CLOSURE(mvar);
1345 #else
1346   info = GET_INFO(mvar);
1347 #endif
1348
1349   if (info == &stg_FULL_MVAR_info) {
1350     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1351       mvar->head = CurrentTSO;
1352     } else {
1353       mvar->tail->link = CurrentTSO;
1354     }
1355     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1356     CurrentTSO->why_blocked = BlockedOnMVar;
1357     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1358     mvar->tail = CurrentTSO;
1359
1360 #ifdef SMP
1361     /* unlock the MVar */
1362     SET_INFO(mvar,&stg_FULL_MVAR_info);
1363 #endif
1364     JMP_(stg_block_putmvar);
1365   }
1366   
1367   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1368       /* There are takeMVar(s) waiting: wake up the first one
1369        */
1370       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1371
1372       /* actually perform the takeMVar */
1373       PerformTake(mvar->head, R2.cl);
1374       
1375 #if defined(GRAN) || defined(PAR)
1376       /* ToDo: check 2nd arg (mvar) is right */
1377       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1378 #else
1379       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1380 #endif
1381       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1382           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1383       }
1384 #ifdef SMP
1385       /* unlocks the MVar in the SMP case */
1386       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1387 #endif
1388       JMP_(ENTRY_CODE(Sp[0]));
1389   } else {
1390       /* No further takes, the MVar is now full. */
1391       mvar->value = R2.cl;
1392       /* unlocks the MVar in the SMP case */
1393       SET_INFO(mvar,&stg_FULL_MVAR_info);
1394       JMP_(ENTRY_CODE(Sp[0]));
1395   }
1396
1397   /* ToDo: yield afterward for better communication performance? */
1398   FE_
1399 }
1400
1401 FN_(tryPutMVarzh_fast)
1402 {
1403   StgMVar *mvar;
1404   const StgInfoTable *info;
1405
1406   FB_
1407   /* args: R1 = MVar, R2 = value */
1408
1409   mvar = (StgMVar *)R1.p;
1410
1411 #ifdef SMP
1412   info = LOCK_CLOSURE(mvar);
1413 #else
1414   info = GET_INFO(mvar);
1415 #endif
1416
1417   if (info == &stg_FULL_MVAR_info) {
1418
1419 #ifdef SMP
1420     /* unlock the MVar */
1421     mvar->header.info = &stg_FULL_MVAR_info;
1422 #endif
1423
1424     RET_N(0);
1425   }
1426   
1427   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1428       /* There are takeMVar(s) waiting: wake up the first one
1429        */
1430       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1431
1432       /* actually perform the takeMVar */
1433       PerformTake(mvar->head, R2.cl);
1434       
1435 #if defined(GRAN) || defined(PAR)
1436       /* ToDo: check 2nd arg (mvar) is right */
1437       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1438 #else
1439       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1440 #endif
1441       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1442           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1443       }
1444 #ifdef SMP
1445       /* unlocks the MVar in the SMP case */
1446       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1447 #endif
1448       JMP_(ENTRY_CODE(Sp[0]));
1449   } else {
1450       /* No further takes, the MVar is now full. */
1451       mvar->value = R2.cl;
1452       /* unlocks the MVar in the SMP case */
1453       SET_INFO(mvar,&stg_FULL_MVAR_info);
1454       JMP_(ENTRY_CODE(Sp[0]));
1455   }
1456
1457   /* ToDo: yield afterward for better communication performance? */
1458   FE_
1459 }
1460
1461 /* -----------------------------------------------------------------------------
1462    Stable pointer primitives
1463    -------------------------------------------------------------------------  */
1464
1465 FN_(makeStableNamezh_fast)
1466 {
1467   StgWord index;
1468   StgStableName *sn_obj;
1469   FB_
1470
1471   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1472   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1473                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1474   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1475   
1476   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1477
1478   /* Is there already a StableName for this heap object? */
1479   if (stable_ptr_table[index].sn_obj == NULL) {
1480     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1481     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1482     sn_obj->sn = index;
1483     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1484   } else {
1485     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1486   }
1487
1488   TICK_RET_UNBOXED_TUP(1);
1489   RET_P(sn_obj);
1490 }
1491
1492
1493 FN_(makeStablePtrzh_fast)
1494 {
1495   /* Args: R1 = a */
1496   StgStablePtr sp;
1497   FB_
1498   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1499   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1500   RET_N(sp);
1501   FE_
1502 }
1503
1504 FN_(deRefStablePtrzh_fast)
1505 {
1506   /* Args: R1 = the stable ptr */
1507   P_ r;
1508   StgStablePtr sp;
1509   FB_
1510   sp = (StgStablePtr)R1.w;
1511   r = stable_ptr_table[(StgWord)sp].addr;
1512   RET_P(r);
1513   FE_
1514 }
1515
1516 /* -----------------------------------------------------------------------------
1517    Bytecode object primitives
1518    -------------------------------------------------------------------------  */
1519
1520 FN_(newBCOzh_fast)
1521 {
1522   /* R1.p = instrs
1523      R2.p = literals
1524      R3.p = ptrs
1525      R4.p = itbls
1526      R5.i = arity
1527      R6.p = bitmap array
1528   */
1529   StgBCO *bco;
1530   nat size;
1531   StgArrWords *bitmap_arr;
1532   FB_
1533
1534   bitmap_arr = (StgArrWords *)R6.cl;
1535   size = sizeofW(StgBCO) + bitmap_arr->words;
1536   HP_CHK_GEN_TICKY(size,R1_PTR|R2_PTR|R3_PTR|R4_PTR|R6_PTR, newBCOzh_fast);
1537   TICK_ALLOC_PRIM(size, size-sizeofW(StgHeader), 0);
1538   CCS_ALLOC(CCCS,size); /* ccs prof */
1539   bco = (StgBCO *) (Hp + 1 - size);
1540   SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1541
1542   bco->instrs     = (StgArrWords*)R1.cl;
1543   bco->literals   = (StgArrWords*)R2.cl;
1544   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1545   bco->itbls      = (StgArrWords*)R4.cl;
1546   bco->arity      = R5.w;
1547   bco->size       = size;
1548
1549   // Copy the arity/bitmap info into the BCO
1550   { 
1551     int i;
1552     for (i = 0; i < bitmap_arr->words; i++) {
1553         bco->bitmap[i] = bitmap_arr->payload[i];
1554     }
1555   }
1556
1557   TICK_RET_UNBOXED_TUP(1);
1558   RET_P(bco);
1559   FE_
1560 }
1561
1562 FN_(mkApUpd0zh_fast)
1563 {
1564   // R1.p = the BCO# for the AP
1565   //
1566   StgPAP* ap;
1567   FB_
1568
1569   // This function is *only* used to wrap zero-arity BCOs in an
1570   // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1571   // saturated and always points directly to a FUN or BCO.
1572   ASSERT(get_itbl(R1.cl)->type == BCO && ((StgBCO *)R1.p)->arity == 0);
1573
1574   HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1575   TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1576   CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1577   ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1578   SET_HDR(ap, &stg_AP_info, CCCS);
1579
1580   ap->n_args = 0;
1581   ap->fun = R1.cl;
1582
1583   TICK_RET_UNBOXED_TUP(1);
1584   RET_P(ap);
1585   FE_
1586 }
1587
1588 /* -----------------------------------------------------------------------------
1589    Thread I/O blocking primitives
1590    -------------------------------------------------------------------------- */
1591
1592 FN_(waitReadzh_fast)
1593 {
1594   FB_
1595     /* args: R1.i */
1596     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1597     CurrentTSO->why_blocked = BlockedOnRead;
1598     CurrentTSO->block_info.fd = R1.i;
1599     ACQUIRE_LOCK(&sched_mutex);
1600     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1601     RELEASE_LOCK(&sched_mutex);
1602     JMP_(stg_block_noregs);
1603   FE_
1604 }
1605
1606 FN_(waitWritezh_fast)
1607 {
1608   FB_
1609     /* args: R1.i */
1610     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1611     CurrentTSO->why_blocked = BlockedOnWrite;
1612     CurrentTSO->block_info.fd = R1.i;
1613     ACQUIRE_LOCK(&sched_mutex);
1614     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1615     RELEASE_LOCK(&sched_mutex);
1616     JMP_(stg_block_noregs);
1617   FE_
1618 }
1619
1620 FN_(delayzh_fast)
1621 {
1622 #ifdef mingw32_TARGET_OS
1623   StgAsyncIOResult* ares;
1624   unsigned int reqID;
1625 #else
1626   StgTSO *t, *prev;
1627   nat target;
1628 #endif
1629   FB_
1630     /* args: R1.i */
1631     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1632     CurrentTSO->why_blocked = BlockedOnDelay;
1633
1634     ACQUIRE_LOCK(&sched_mutex);
1635 #ifdef mingw32_TARGET_OS
1636     /* could probably allocate this on the heap instead */
1637     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1638     reqID = RET_STGCALL1(W_,addDelayRequest,R1.i);
1639     ares->reqID   = reqID;
1640     ares->len     = 0;
1641     ares->errCode = 0;
1642     CurrentTSO->block_info.async_result = ares;
1643     /* Having all async-blocked threads reside on the blocked_queue simplifies matters, so
1644      * change the status to OnDoProc & put the delayed thread on the blocked_queue.
1645      */
1646     CurrentTSO->why_blocked = BlockedOnDoProc;
1647     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1648 #else
1649     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1650     CurrentTSO->block_info.target = target;
1651
1652     /* Insert the new thread in the sleeping queue. */
1653     prev = NULL;
1654     t = sleeping_queue;
1655     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1656         prev = t;
1657         t = t->link;
1658     }
1659
1660     CurrentTSO->link = t;
1661     if (prev == NULL) {
1662         sleeping_queue = CurrentTSO;
1663     } else {
1664         prev->link = CurrentTSO;
1665     }
1666 #endif
1667     RELEASE_LOCK(&sched_mutex);
1668     JMP_(stg_block_noregs);
1669   FE_
1670 }
1671
1672 #ifdef mingw32_TARGET_OS
1673 FN_(asyncReadzh_fast)
1674 {
1675   StgAsyncIOResult* ares;
1676   unsigned int reqID;
1677   FB_
1678     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1679     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1680     CurrentTSO->why_blocked = BlockedOnRead;
1681     ACQUIRE_LOCK(&sched_mutex);
1682     /* could probably allocate this on the heap instead */
1683     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncReadzh_fast");
1684     reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p);
1685     ares->reqID   = reqID;
1686     ares->len     = 0;
1687     ares->errCode = 0;
1688     CurrentTSO->block_info.async_result = ares;
1689     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1690     RELEASE_LOCK(&sched_mutex);
1691     JMP_(stg_block_async);
1692   FE_
1693 }
1694
1695 FN_(asyncWritezh_fast)
1696 {
1697   StgAsyncIOResult* ares;
1698   unsigned int reqID;
1699   FB_
1700     /* args: R1.i */
1701     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1702     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1703     CurrentTSO->why_blocked = BlockedOnWrite;
1704     ACQUIRE_LOCK(&sched_mutex);
1705     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1706     reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p);
1707     ares->reqID   = reqID;
1708     ares->len     = 0;
1709     ares->errCode = 0;
1710     CurrentTSO->block_info.async_result = ares;
1711     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1712     RELEASE_LOCK(&sched_mutex);
1713     JMP_(stg_block_async);
1714   FE_
1715 }
1716
1717 FN_(asyncDoProczh_fast)
1718 {
1719   StgAsyncIOResult* ares;
1720   unsigned int reqID;
1721   FB_
1722     /* args: R1.i = proc, R2.i = param */
1723     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1724     CurrentTSO->why_blocked = BlockedOnDoProc;
1725     ACQUIRE_LOCK(&sched_mutex);
1726     /* could probably allocate this on the heap instead */
1727     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncDoProczh_fast");
1728     reqID = RET_STGCALL2(W_,addDoProcRequest,R1.p,R2.p);
1729     ares->reqID   = reqID;
1730     ares->len     = 0;
1731     ares->errCode = 0;
1732     CurrentTSO->block_info.async_result = ares;
1733     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1734     RELEASE_LOCK(&sched_mutex);
1735     JMP_(stg_block_async);
1736   FE_
1737 }
1738 #endif