[project @ 2003-10-20 17:15:27 by sof]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.114 2003/10/01 10:57:41 wolfgang Exp $
3  *
4  * (c) The GHC Team, 1998-2002
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Timer.h"      /* TICK_MILLISECS */
23 #include "Prelude.h"
24 #ifndef mingw32_TARGET_OS
25 #include "Itimer.h"    /* getourtimeofday() */
26 #endif
27
28 #ifdef HAVE_SYS_TYPES_H
29 # include <sys/types.h>
30 #endif
31
32 #include <stdlib.h>
33
34 #ifdef mingw32_TARGET_OS
35 #include <windows.h>
36 #include "win32/AsyncIO.h"
37 #endif
38
39 /* ** temporary **
40
41    classes CCallable and CReturnable don't really exist, but the
42    compiler insists on generating dictionaries containing references
43    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
44    for these.  Some C compilers can't cope with zero-length static arrays,
45    so we have to make these one element long.
46 */
47
48 StgWord GHC_ZCCCallable_static_info[1];
49 StgWord GHC_ZCCReturnable_static_info[1];
50   
51 /* -----------------------------------------------------------------------------
52    Macros for Hand-written primitives.
53    -------------------------------------------------------------------------- */
54
55 /*
56  * Horrible macros for returning unboxed tuples.
57  *
58  * How an unboxed tuple is returned depends on two factors:
59  *    - the number of real registers we have available
60  *    - the boxedness of the returned fields.
61  *
62  * To return an unboxed tuple from a primitive operation, we have macros
63  * RET_<layout> where <layout> describes the boxedness of each field of the
64  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
65  *
66  * We only define the cases actually used, to avoid having too much
67  * garbage in this section.  Warning: any bugs in here will be hard to
68  * track down.
69  *
70  * The return convention for an unboxed tuple is as follows:
71  *   - fit as many fields as possible in registers (as per the
72  *     function fast-entry point calling convention).
73  *   - sort the rest of the fields into pointers and non-pointers.
74  *     push the pointers on the stack, followed by the non-pointers.
75  *     (so the pointers have higher addresses).
76  */
77
78 /*------ All Regs available */
79 #if MAX_REAL_VANILLA_REG == 8
80 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_N(a)     RET_P(a)
82
83 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
84 # define RET_NN(a,b)  RET_PP(a,b)
85 # define RET_NP(a,b)  RET_PP(a,b)
86
87 # define RET_PPP(a,b,c) \
88         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
89 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
90
91 # define RET_NNNP(a,b,c,d) \
92         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
93         JMP_(ENTRY_CODE(Sp[0]));
94
95 # define RET_NPNP(a,b,c,d) \
96         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
97         JMP_(ENTRY_CODE(Sp[0]));
98
99 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
100 # error RET_n macros not defined for this setup.
101
102 /*------ 2 Registers available */
103 #elif MAX_REAL_VANILLA_REG == 2
104
105 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
106 # define RET_N(a)     RET_P(a)
107
108 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
109                        JMP_(ENTRY_CODE(Sp[0]));
110 # define RET_NN(a,b)   RET_PP(a,b)
111 # define RET_NP(a,b)   RET_PP(a,b)
112
113 # define RET_PPP(a,b,c)                         \
114         R1.w = (W_)(a);                         \
115         R2.w = (W_)(b);                         \
116         Sp[-1] = (W_)(c);                       \
117         Sp -= 1;                                \
118         JMP_(ENTRY_CODE(Sp[1]));
119
120 # define RET_NNP(a,b,c)                         \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123         Sp[-1] = (W_)(c);                       \
124         Sp -= 1;                                \
125         JMP_(ENTRY_CODE(Sp[1]));
126
127 # define RET_NNNP(a,b,c,d)                      \
128         R1.w = (W_)(a);                         \
129         R2.w = (W_)(b);                         \
130         Sp[-2] = (W_)(c);                       \
131         Sp[-1] = (W_)(d);                       \
132         Sp -= 2;                                \
133         JMP_(ENTRY_CODE(Sp[2]));
134
135 # define RET_NPNP(a,b,c,d)                      \
136         R1.w = (W_)(a);                         \
137         R2.w = (W_)(b);                         \
138         Sp[-2] = (W_)(c);                       \
139         Sp[-1] = (W_)(d);                       \
140         Sp -= 2;                                \
141         JMP_(ENTRY_CODE(Sp[2]));
142
143 /*------ 1 Register available */
144 #elif MAX_REAL_VANILLA_REG == 1
145 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
146 # define RET_N(a)     RET_P(a)
147
148 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
149                        JMP_(ENTRY_CODE(Sp[1]));
150 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
151                        JMP_(ENTRY_CODE(Sp[2]));
152 # define RET_NP(a,b)   RET_PP(a,b)
153
154 # define RET_PPP(a,b,c)                         \
155         R1.w = (W_)(a);                         \
156         Sp[-2] = (W_)(b);                       \
157         Sp[-1] = (W_)(c);                       \
158         Sp -= 2;                                \
159         JMP_(ENTRY_CODE(Sp[2]));
160
161 # define RET_NNP(a,b,c)                         \
162         R1.w = (W_)(a);                         \
163         Sp[-2] = (W_)(b);                       \
164         Sp[-1] = (W_)(c);                       \
165         Sp -= 2;                                \
166         JMP_(ENTRY_CODE(Sp[2]));
167
168 # define RET_NNNP(a,b,c,d)                      \
169         R1.w = (W_)(a);                         \
170         Sp[-3] = (W_)(b);                       \
171         Sp[-2] = (W_)(c);                       \
172         Sp[-1] = (W_)(d);                       \
173         Sp -= 3;                                \
174         JMP_(ENTRY_CODE(Sp[3]));
175
176 # define RET_NPNP(a,b,c,d)                      \
177         R1.w = (W_)(a);                         \
178         Sp[-3] = (W_)(c);                       \
179         Sp[-2] = (W_)(b);                       \
180         Sp[-1] = (W_)(d);                       \
181         Sp -= 3;                                \
182         JMP_(ENTRY_CODE(Sp[3]));
183
184 #else /* 0 Regs available */
185
186 #define PUSH(o,x) Sp[-o] = (W_)(x)
187
188 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
189
190 # define RET_P(a)     PUSH(1,a); PUSHED(1)
191 # define RET_N(a)     PUSH(1,a); PUSHED(1)
192
193 # define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
194 # define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
195 # define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
196
197 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
198 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
199
200 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)        
201 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)        
202 #endif
203
204 /*-----------------------------------------------------------------------------
205   Array Primitives
206
207   Basically just new*Array - the others are all inline macros.
208
209   The size arg is always passed in R1, and the result returned in R1.
210
211   The slow entry point is for returning from a heap check, the saved
212   size argument must be re-loaded from the stack.
213   -------------------------------------------------------------------------- */
214
215 /* for objects that are *less* than the size of a word, make sure we
216  * round up to the nearest word for the size of the array.
217  */
218
219 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
220
221 FN_(newByteArrayzh_fast)
222  {
223    W_ size, stuff_size, n;
224    StgArrWords* p;
225    FB_
226      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
227      n = R1.w;
228      stuff_size = BYTES_TO_STGWORDS(n);
229      size = sizeofW(StgArrWords)+ stuff_size;
230      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
231      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
232      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
233      p->words = stuff_size;
234      TICK_RET_UNBOXED_TUP(1)
235      RET_P(p);
236    FE_
237  }
238
239 FN_(newPinnedByteArrayzh_fast)
240  {
241    W_ size, stuff_size, n;
242    StgArrWords* p;
243    FB_
244      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
245      n = R1.w;
246      stuff_size = BYTES_TO_STGWORDS(n);
247
248      // We want an 8-byte aligned array.  allocatePinned() gives us
249      // 8-byte aligned memory by default, but we want to align the
250      // *goods* inside the ArrWords object, so we have to check the
251      // size of the ArrWords header and adjust our size accordingly.
252      size = sizeofW(StgArrWords)+ stuff_size;
253      if ((sizeof(StgArrWords) & 7) != 0) {
254          size++;
255      }
256
257      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
258      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
259
260      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
261      // have to push the object forward one word so that the goods
262      // fall on an 8-byte boundary.
263      if ((sizeof(StgArrWords) & 7) != 0) {
264          ((StgPtr)p)++;
265      }
266
267      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
268      p->words = stuff_size;
269      TICK_RET_UNBOXED_TUP(1)
270      RET_P(p);
271    FE_
272  }
273
274 FN_(newArrayzh_fast)
275 {
276   W_ size, n, init;
277   StgMutArrPtrs* arr;
278   StgPtr p;
279   FB_
280     n = R1.w;
281
282     MAYBE_GC(R2_PTR,newArrayzh_fast);
283
284     size = sizeofW(StgMutArrPtrs) + n;
285     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
286     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
287
288     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
289     arr->ptrs = n;
290
291     init = R2.w;
292     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
293          p < (P_)arr + size; p++) {
294         *p = (W_)init;
295     }
296
297     TICK_RET_UNBOXED_TUP(1);
298     RET_P(arr);
299   FE_
300 }
301
302 FN_(newMutVarzh_fast)
303 {
304   StgMutVar* mv;
305   /* Args: R1.p = initialisation value */
306   FB_
307
308   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
309   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
310   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
311
312   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
313   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
314   mv->var = R1.cl;
315
316   TICK_RET_UNBOXED_TUP(1);
317   RET_P(mv);
318   FE_
319 }
320
321 FN_(atomicModifyMutVarzh_fast)
322 {
323    StgMutVar* mv;
324    StgClosure *z, *x, *y, *r;
325    FB_
326    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
327
328    /* If x is the current contents of the MutVar#, then 
329       We want to make the new contents point to
330
331          (sel_0 (f x))
332  
333       and the return value is
334
335          (sel_1 (f x))
336
337       obviously we can share (f x).
338
339          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
340          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
341          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
342    */
343
344 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
345 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
346
347    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
348    CCS_ALLOC(CCCS,SIZE);
349
350    x = ((StgMutVar *)R1.cl)->var;
351
352    TICK_ALLOC_UP_THK(2,0); // XXX
353    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
354    SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
355    z->payload[0] = R2.cl;
356    z->payload[1] = x;
357
358    TICK_ALLOC_UP_THK(1,1); // XXX
359    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
360    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
361    y->payload[0] = z;
362
363    ((StgMutVar *)R1.cl)->var = y;
364
365    TICK_ALLOC_UP_THK(1,1); // XXX
366    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
367    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
368    r->payload[0] = z;
369
370    RET_P(r);
371    FE_
372 }
373
374 /* -----------------------------------------------------------------------------
375    Foreign Object Primitives
376    -------------------------------------------------------------------------- */
377
378 FN_(mkForeignObjzh_fast)
379 {
380   /* R1.p = ptr to foreign object,
381   */
382   StgForeignObj *result;
383   FB_
384
385   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader),
387                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
388   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
389
390   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
391   SET_HDR(result,&stg_FOREIGN_info,CCCS);
392   result->data = R1.p;
393
394   /* returns (# s#, ForeignObj# #) */
395   TICK_RET_UNBOXED_TUP(1);
396   RET_P(result);
397   FE_
398 }
399
400 /* These two are out-of-line for the benefit of the NCG */
401 FN_(unsafeThawArrayzh_fast)
402 {
403   FB_
404   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
405   recordMutable((StgMutClosure*)R1.cl);
406
407   TICK_RET_UNBOXED_TUP(1);
408   RET_P(R1.p);
409   FE_
410 }
411
412 /* -----------------------------------------------------------------------------
413    Weak Pointer Primitives
414    -------------------------------------------------------------------------- */
415
416 FN_(mkWeakzh_fast)
417 {
418   /* R1.p = key
419      R2.p = value
420      R3.p = finalizer (or NULL)
421   */
422   StgWeak *w;
423   FB_
424
425   if (R3.cl == NULL) {
426     R3.cl = &stg_NO_FINALIZER_closure;
427   }
428
429   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
430   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
431                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
432   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
433
434   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
435   SET_HDR(w, &stg_WEAK_info, CCCS);
436
437   w->key        = R1.cl;
438   w->value      = R2.cl;
439   w->finalizer  = R3.cl;
440
441   w->link       = weak_ptr_list;
442   weak_ptr_list = w;
443   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
444
445   TICK_RET_UNBOXED_TUP(1);
446   RET_P(w);
447   FE_
448 }
449
450 FN_(finalizzeWeakzh_fast)
451 {
452   /* R1.p = weak ptr
453    */
454   StgDeadWeak *w;
455   StgClosure *f;
456   FB_
457   TICK_RET_UNBOXED_TUP(0);
458   w = (StgDeadWeak *)R1.p;
459
460   /* already dead? */
461   if (w->header.info == &stg_DEAD_WEAK_info) {
462       RET_NP(0,&stg_NO_FINALIZER_closure);
463   }
464
465   /* kill it */
466 #ifdef PROFILING
467   // @LDV profiling
468   // A weak pointer is inherently used, so we do not need to call
469   // LDV_recordDead_FILL_SLOP_DYNAMIC():
470   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
471   // or, LDV_recordDead():
472   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
473   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
474   // large as weak pointers, so there is no need to fill the slop, either.
475   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
476 #endif
477   //
478   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
479   //
480   w->header.info = &stg_DEAD_WEAK_info;
481 #ifdef PROFILING
482   // @LDV profiling
483   LDV_recordCreate((StgClosure *)w);
484 #endif
485   f = ((StgWeak *)w)->finalizer;
486   w->link = ((StgWeak *)w)->link;
487
488   /* return the finalizer */
489   if (f == &stg_NO_FINALIZER_closure) {
490       RET_NP(0,&stg_NO_FINALIZER_closure);
491   } else {
492       RET_NP(1,f);
493   }
494   FE_
495 }
496
497 FN_(deRefWeakzh_fast)
498 {
499   /* R1.p = weak ptr */
500   StgWeak* w;
501   I_       code;
502   P_       val;
503   FB_
504   w = (StgWeak*)R1.p;
505   if (w->header.info == &stg_WEAK_info) {
506     code = 1;
507     val = (P_)((StgWeak *)w)->value;
508   } else {
509     code = 0;
510     val = (P_)w;
511   }
512   RET_NP(code,val);
513   FE_
514 }
515
516 /* -----------------------------------------------------------------------------
517    Arbitrary-precision Integer operations.
518    -------------------------------------------------------------------------- */
519
520 FN_(int2Integerzh_fast)
521 {
522    /* arguments: R1 = Int# */
523
524    I_ val, s;           /* to avoid aliasing */
525    StgArrWords* p;      /* address of array result */
526    FB_
527
528    val = R1.i;
529    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
530    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
531    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
532
533    p = (StgArrWords *)Hp - 1;
534    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
535
536    /* mpz_set_si is inlined here, makes things simpler */
537    if (val < 0) { 
538         s  = -1;
539         *Hp = -val;
540    } else if (val > 0) {
541         s = 1;
542         *Hp = val;
543    } else {
544         s = 0;
545    }
546
547    /* returns (# size  :: Int#, 
548                  data  :: ByteArray# 
549                #)
550    */
551    TICK_RET_UNBOXED_TUP(2);
552    RET_NP(s,p);
553    FE_
554 }
555
556 FN_(word2Integerzh_fast)
557 {
558    /* arguments: R1 = Word# */
559
560    W_ val;              /* to avoid aliasing */
561    I_  s;
562    StgArrWords* p;      /* address of array result */
563    FB_
564
565    val = R1.w;
566    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
567    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
568    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
569
570    p = (StgArrWords *)Hp - 1;
571    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
572
573    if (val != 0) {
574         s = 1;
575         *Hp = val;
576    } else {
577         s = 0;
578    }
579
580    /* returns (# size  :: Int#, 
581                  data  :: ByteArray# 
582                #)
583    */
584    TICK_RET_UNBOXED_TUP(2);
585    RET_NP(s,p);
586    FE_
587 }
588
589
590 /*
591  * 'long long' primops for converting to/from Integers.
592  */
593
594 #ifdef SUPPORT_LONG_LONGS
595
596 FN_(int64ToIntegerzh_fast)
597 {
598    /* arguments: L1 = Int64# */
599
600    StgInt64  val; /* to avoid aliasing */
601    W_ hi;
602    I_  s, neg, words_needed;
603    StgArrWords* p;      /* address of array result */
604    FB_
605
606    val = (LI_)L1;
607    neg = 0;
608
609    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
610        words_needed = 2;
611    } else { 
612        /* minimum is one word */
613        words_needed = 1;
614    }
615    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
616    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
617    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
618
619    p = (StgArrWords *)(Hp-words_needed+1) - 1;
620    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
621
622    if ( val < 0LL ) {
623      neg = 1;
624      val = -val;
625    }
626
627    hi = (W_)((LW_)val / 0x100000000ULL);
628
629    if ( words_needed == 2 )  { 
630       s = 2;
631       Hp[-1] = (W_)val;
632       Hp[0] = hi;
633    } else if ( val != 0 ) {
634       s = 1;
635       Hp[0] = (W_)val;
636    }  else /* val==0 */   {
637       s = 0;
638    }
639    s = ( neg ? -s : s );
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650 FN_(word64ToIntegerzh_fast)
651 {
652    /* arguments: L1 = Word64# */
653
654    StgWord64 val; /* to avoid aliasing */
655    StgWord hi;
656    I_  s, words_needed;
657    StgArrWords* p;      /* address of array result */
658    FB_
659
660    val = (LW_)L1;
661    if ( val >= 0x100000000ULL ) {
662       words_needed = 2;
663    } else {
664       words_needed = 1;
665    }
666    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
667    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
668    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
669
670    p = (StgArrWords *)(Hp-words_needed+1) - 1;
671    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
672
673    hi = (W_)((LW_)val / 0x100000000ULL);
674    if ( val >= 0x100000000ULL ) { 
675      s = 2;
676      Hp[-1] = ((W_)val);
677      Hp[0]  = (hi);
678    } else if ( val != 0 )      {
679       s = 1;
680       Hp[0] = ((W_)val);
681    } else /* val==0 */         {
682       s = 0;
683    }
684
685    /* returns (# size  :: Int#, 
686                  data  :: ByteArray# 
687                #)
688    */
689    TICK_RET_UNBOXED_TUP(2);
690    RET_NP(s,p);
691    FE_
692 }
693
694
695 #endif /* SUPPORT_LONG_LONGS */
696
697 /* ToDo: this is shockingly inefficient */
698
699 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
700 FN_(name)                                                               \
701 {                                                                       \
702   MP_INT arg1, arg2, result;                                            \
703   I_ s1, s2;                                                            \
704   StgArrWords* d1;                                                      \
705   StgArrWords* d2;                                                      \
706   FB_                                                                   \
707                                                                         \
708   /* call doYouWantToGC() */                                            \
709   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
710                                                                         \
711   d1 = (StgArrWords *)R2.p;                                             \
712   s1 = R1.i;                                                            \
713   d2 = (StgArrWords *)R4.p;                                             \
714   s2 = R3.i;                                                            \
715                                                                         \
716   arg1._mp_alloc        = d1->words;                                    \
717   arg1._mp_size         = (s1);                                         \
718   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
719   arg2._mp_alloc        = d2->words;                                    \
720   arg2._mp_size         = (s2);                                         \
721   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
722                                                                         \
723   STGCALL1(mpz_init,&result);                                           \
724                                                                         \
725   /* Perform the operation */                                           \
726   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
727                                                                         \
728   TICK_RET_UNBOXED_TUP(2);                                              \
729   RET_NP(result._mp_size,                                               \
730          result._mp_d-sizeofW(StgArrWords));                            \
731   FE_                                                                   \
732 }
733
734 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
735 FN_(name)                                                               \
736 {                                                                       \
737   MP_INT arg1, result;                                                  \
738   I_ s1;                                                                \
739   StgArrWords* d1;                                                      \
740   FB_                                                                   \
741                                                                         \
742   /* call doYouWantToGC() */                                            \
743   MAYBE_GC(R2_PTR, name);                                               \
744                                                                         \
745   d1 = (StgArrWords *)R2.p;                                             \
746   s1 = R1.i;                                                            \
747                                                                         \
748   arg1._mp_alloc        = d1->words;                                    \
749   arg1._mp_size         = (s1);                                         \
750   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
751                                                                         \
752   STGCALL1(mpz_init,&result);                                           \
753                                                                         \
754   /* Perform the operation */                                           \
755   STGCALL2(mp_fun,&result,&arg1);                                       \
756                                                                         \
757   TICK_RET_UNBOXED_TUP(2);                                              \
758   RET_NP(result._mp_size,                                               \
759          result._mp_d-sizeofW(StgArrWords));                            \
760   FE_                                                                   \
761 }
762
763 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
764 FN_(name)                                                               \
765 {                                                                       \
766   MP_INT arg1, arg2, result1, result2;                                  \
767   I_ s1, s2;                                                            \
768   StgArrWords* d1;                                                      \
769   StgArrWords* d2;                                                      \
770   FB_                                                                   \
771                                                                         \
772   /* call doYouWantToGC() */                                            \
773   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
774                                                                         \
775   d1 = (StgArrWords *)R2.p;                                             \
776   s1 = R1.i;                                                            \
777   d2 = (StgArrWords *)R4.p;                                             \
778   s2 = R3.i;                                                            \
779                                                                         \
780   arg1._mp_alloc        = d1->words;                                    \
781   arg1._mp_size         = (s1);                                         \
782   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
783   arg2._mp_alloc        = d2->words;                                    \
784   arg2._mp_size         = (s2);                                         \
785   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
786                                                                         \
787   STGCALL1(mpz_init,&result1);                                          \
788   STGCALL1(mpz_init,&result2);                                          \
789                                                                         \
790   /* Perform the operation */                                           \
791   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
792                                                                         \
793   TICK_RET_UNBOXED_TUP(4);                                              \
794   RET_NPNP(result1._mp_size,                                            \
795            result1._mp_d-sizeofW(StgArrWords),                          \
796            result2._mp_size,                                            \
797            result2._mp_d-sizeofW(StgArrWords));                         \
798   FE_                                                                   \
799 }
800
801 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
802 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
803 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
804 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
805 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
806 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
807 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
808 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
809 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
810 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
811 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
812
813 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
814 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
815
816
817 FN_(gcdIntzh_fast)
818 {
819   /* R1 = the first Int#; R2 = the second Int# */
820   mp_limb_t aa;
821   I_ r;
822   FB_
823   aa = (mp_limb_t)(R1.i);
824   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
825
826   R1.i = r;
827   /* Result parked in R1, return via info-pointer at TOS */
828   JMP_(ENTRY_CODE(Sp[0]));
829   FE_
830 }
831
832 FN_(gcdIntegerIntzh_fast)
833 {
834   /* R1 = s1; R2 = d1; R3 = the int */
835   I_ r;
836   FB_
837   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
838
839   R1.i = r;
840   /* Result parked in R1, return via info-pointer at TOS */
841   JMP_(ENTRY_CODE(Sp[0]));
842   FE_
843 }
844
845 FN_(cmpIntegerIntzh_fast)
846 {
847   /* R1 = s1; R2 = d1; R3 = the int */
848   I_ usize;
849   I_ vsize;
850   I_ v_digit;
851   mp_limb_t u_digit;
852   FB_
853
854   usize = R1.i;
855   vsize = 0;
856   v_digit = R3.i;
857
858   // paraphrased from mpz_cmp_si() in the GMP sources
859   if (v_digit > 0) {
860       vsize = 1;
861   } else if (v_digit < 0) {
862       vsize = -1;
863       v_digit = -v_digit;
864   }
865
866   if (usize != vsize) {
867     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
868   }
869
870   if (usize == 0) {
871     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
872   }
873
874   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
875
876   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
877     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
878   }
879
880   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
881     R1.i = usize; 
882   } else {
883     R1.i = -usize; 
884   }
885
886   JMP_(ENTRY_CODE(Sp[0]));
887   FE_
888 }
889
890 FN_(cmpIntegerzh_fast)
891 {
892   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
893   I_ usize;
894   I_ vsize;
895   I_ size;
896   StgPtr up, vp;
897   int cmp;
898   FB_
899
900   // paraphrased from mpz_cmp() in the GMP sources
901   usize = R1.i;
902   vsize = R3.i;
903
904   if (usize != vsize) {
905     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
906   }
907
908   if (usize == 0) {
909     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
910   }
911
912   size = abs(usize);
913
914   up = BYTE_ARR_CTS(R2.p);
915   vp = BYTE_ARR_CTS(R4.p);
916
917   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
918
919   if (cmp == 0) {
920     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
921   }
922
923   if ((cmp < 0) == (usize < 0)) {
924     R1.i = 1;
925   } else {
926     R1.i = (-1); 
927   }
928   /* Result parked in R1, return via info-pointer at TOS */
929   JMP_(ENTRY_CODE(Sp[0]));
930   FE_
931 }
932
933 FN_(integer2Intzh_fast)
934 {
935   /* R1 = s; R2 = d */
936   I_ r, s;
937   FB_
938   s = R1.i;
939   if (s == 0)
940     r = 0;
941   else {
942     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
943     if (s < 0) r = -r;
944   }
945   /* Result parked in R1, return via info-pointer at TOS */
946   R1.i = r;
947   JMP_(ENTRY_CODE(Sp[0]));
948   FE_
949 }
950
951 FN_(integer2Wordzh_fast)
952 {
953   /* R1 = s; R2 = d */
954   I_ s;
955   W_ r;
956   FB_
957   s = R1.i;
958   if (s == 0)
959     r = 0;
960   else {
961     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
962     if (s < 0) r = -r;
963   }
964   /* Result parked in R1, return via info-pointer at TOS */
965   R1.w = r;
966   JMP_(ENTRY_CODE(Sp[0]));
967   FE_
968 }
969
970
971 FN_(decodeFloatzh_fast)
972
973   MP_INT mantissa;
974   I_ exponent;
975   StgArrWords* p;
976   StgFloat arg;
977   FB_
978
979   /* arguments: F1 = Float# */
980   arg = F1;
981
982   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
983   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
984   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
985
986   /* Be prepared to tell Lennart-coded __decodeFloat    */
987   /* where mantissa._mp_d can be put (it does not care about the rest) */
988   p = (StgArrWords *)Hp - 1;
989   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
990   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
991
992   /* Perform the operation */
993   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
994
995   /* returns: (Int# (expn), Int#, ByteArray#) */
996   TICK_RET_UNBOXED_TUP(3);
997   RET_NNP(exponent,mantissa._mp_size,p);
998   FE_
999 }
1000
1001 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
1002 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
1003
1004 FN_(decodeDoublezh_fast)
1005 { MP_INT mantissa;
1006   I_ exponent;
1007   StgDouble arg;
1008   StgArrWords* p;
1009   FB_
1010
1011   /* arguments: D1 = Double# */
1012   arg = D1;
1013
1014   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1015   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1016   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1017
1018   /* Be prepared to tell Lennart-coded __decodeDouble   */
1019   /* where mantissa.d can be put (it does not care about the rest) */
1020   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1021   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1022   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1023
1024   /* Perform the operation */
1025   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1026
1027   /* returns: (Int# (expn), Int#, ByteArray#) */
1028   TICK_RET_UNBOXED_TUP(3);
1029   RET_NNP(exponent,mantissa._mp_size,p);
1030   FE_
1031 }
1032
1033 /* -----------------------------------------------------------------------------
1034  * Concurrency primitives
1035  * -------------------------------------------------------------------------- */
1036
1037 FN_(forkzh_fast)
1038 {
1039   FB_
1040   /* args: R1 = closure to spark */
1041   
1042   MAYBE_GC(R1_PTR, forkzh_fast);
1043
1044   /* create it right now, return ThreadID in R1 */
1045   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1046                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1047   STGCALL1(scheduleThread, R1.t);
1048       
1049   /* switch at the earliest opportunity */ 
1050   context_switch = 1;
1051   
1052   RET_P(R1.t);
1053   FE_
1054 }
1055
1056 FN_(yieldzh_fast)
1057 {
1058   FB_
1059   JMP_(stg_yield_noregs);
1060   FE_
1061 }
1062
1063 FN_(myThreadIdzh_fast)
1064 {
1065   /* no args. */
1066   FB_
1067   RET_P((P_)CurrentTSO);
1068   FE_
1069 }
1070
1071 FN_(labelThreadzh_fast)
1072 {
1073   FB_
1074   /* args: 
1075         R1.p = ThreadId#
1076         R2.p = Addr# */
1077 #ifdef DEBUG
1078   STGCALL2(labelThread,R1.p,(char *)R2.p);
1079 #endif
1080   JMP_(ENTRY_CODE(Sp[0]));
1081   FE_
1082 }
1083
1084 FN_(isCurrentThreadBoundzh_fast)
1085 {
1086   /* no args */
1087   I_ r;
1088   FB_
1089   r = (I_)(RET_STGCALL1(StgBool, isThreadBound, CurrentTSO));
1090   RET_N(r);
1091   FE_
1092 }
1093
1094 /* -----------------------------------------------------------------------------
1095  * MVar primitives
1096  *
1097  * take & putMVar work as follows.  Firstly, an important invariant:
1098  *
1099  *    If the MVar is full, then the blocking queue contains only
1100  *    threads blocked on putMVar, and if the MVar is empty then the
1101  *    blocking queue contains only threads blocked on takeMVar.
1102  *
1103  * takeMvar:
1104  *    MVar empty : then add ourselves to the blocking queue
1105  *    MVar full  : remove the value from the MVar, and
1106  *                 blocking queue empty     : return
1107  *                 blocking queue non-empty : perform the first blocked putMVar
1108  *                                            from the queue, and wake up the
1109  *                                            thread (MVar is now full again)
1110  *
1111  * putMVar is just the dual of the above algorithm.
1112  *
1113  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1114  * the stack of the thread waiting to do the putMVar.  See
1115  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1116  * the stack layout, and the PerformPut and PerformTake macros below.
1117  *
1118  * It is important that a blocked take or put is woken up with the
1119  * take/put already performed, because otherwise there would be a
1120  * small window of vulnerability where the thread could receive an
1121  * exception and never perform its take or put, and we'd end up with a
1122  * deadlock.
1123  *
1124  * -------------------------------------------------------------------------- */
1125
1126 FN_(isEmptyMVarzh_fast)
1127 {
1128   /* args: R1 = MVar closure */
1129   I_ r;
1130   FB_
1131   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1132   RET_N(r);
1133   FE_
1134 }
1135
1136
1137 FN_(newMVarzh_fast)
1138 {
1139   StgMVar *mvar;
1140
1141   FB_
1142   /* args: none */
1143
1144   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1145   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1146                   1, 0);
1147   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1148   
1149   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1150   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1151   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1152   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1153
1154   TICK_RET_UNBOXED_TUP(1);
1155   RET_P(mvar);
1156   FE_
1157 }
1158
1159 /* If R1 isn't available, pass it on the stack */
1160 #ifdef REG_R1
1161 #define PerformTake(tso, value) ({              \
1162     (tso)->sp[1] = (W_)value;                   \
1163     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1164   })
1165 #else
1166 #define PerformTake(tso, value) ({              \
1167     (tso)->sp[1] = (W_)value;                   \
1168     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1169   })
1170 #endif
1171
1172
1173 #define PerformPut(tso) ({                              \
1174     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1175     (tso)->sp += 3;                                     \
1176     val;                                                \
1177   })
1178
1179 FN_(takeMVarzh_fast)
1180 {
1181   StgMVar *mvar;
1182   StgClosure *val;
1183   const StgInfoTable *info;
1184
1185   FB_
1186   /* args: R1 = MVar closure */
1187
1188   mvar = (StgMVar *)R1.p;
1189
1190 #ifdef SMP
1191   info = LOCK_CLOSURE(mvar);
1192 #else
1193   info = GET_INFO(mvar);
1194 #endif
1195
1196   /* If the MVar is empty, put ourselves on its blocking queue,
1197    * and wait until we're woken up.
1198    */
1199   if (info == &stg_EMPTY_MVAR_info) {
1200     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1201       mvar->head = CurrentTSO;
1202     } else {
1203       mvar->tail->link = CurrentTSO;
1204     }
1205     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1206     CurrentTSO->why_blocked = BlockedOnMVar;
1207     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1208     mvar->tail = CurrentTSO;
1209
1210 #ifdef SMP
1211     /* unlock the MVar */
1212     mvar->header.info = &stg_EMPTY_MVAR_info;
1213 #endif
1214     JMP_(stg_block_takemvar);
1215   }
1216
1217   /* we got the value... */
1218   val = mvar->value;
1219
1220   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1221       /* There are putMVar(s) waiting... 
1222        * wake up the first thread on the queue
1223        */
1224       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1225
1226       /* actually perform the putMVar for the thread that we just woke up */
1227       mvar->value = PerformPut(mvar->head);
1228
1229 #if defined(GRAN) || defined(PAR)
1230       /* ToDo: check 2nd arg (mvar) is right */
1231       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1232 #else
1233       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1234 #endif
1235       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1236           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1237       }
1238 #ifdef SMP
1239       /* unlock in the SMP case */
1240       SET_INFO(mvar,&stg_FULL_MVAR_info);
1241 #endif
1242       TICK_RET_UNBOXED_TUP(1);
1243       RET_P(val);
1244   } else {
1245       /* No further putMVars, MVar is now empty */
1246
1247       /* do this last... we might have locked the MVar in the SMP case,
1248        * and writing the info pointer will unlock it.
1249        */
1250       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1251       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1252       TICK_RET_UNBOXED_TUP(1);
1253       RET_P(val);
1254   }
1255   FE_
1256 }
1257
1258 FN_(tryTakeMVarzh_fast)
1259 {
1260   StgMVar *mvar;
1261   StgClosure *val;
1262   const StgInfoTable *info;
1263
1264   FB_
1265   /* args: R1 = MVar closure */
1266
1267   mvar = (StgMVar *)R1.p;
1268
1269 #ifdef SMP
1270   info = LOCK_CLOSURE(mvar);
1271 #else
1272   info = GET_INFO(mvar);
1273 #endif
1274
1275   if (info == &stg_EMPTY_MVAR_info) {
1276
1277 #ifdef SMP
1278       /* unlock the MVar */
1279       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1280 #endif
1281
1282       /* HACK: we need a pointer to pass back, 
1283        * so we abuse NO_FINALIZER_closure
1284        */
1285       RET_NP(0, &stg_NO_FINALIZER_closure);
1286   }
1287
1288   /* we got the value... */
1289   val = mvar->value;
1290
1291   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1292       /* There are putMVar(s) waiting... 
1293        * wake up the first thread on the queue
1294        */
1295       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1296
1297       /* actually perform the putMVar for the thread that we just woke up */
1298       mvar->value = PerformPut(mvar->head);
1299
1300 #if defined(GRAN) || defined(PAR)
1301       /* ToDo: check 2nd arg (mvar) is right */
1302       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1303 #else
1304       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1305 #endif
1306       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1307           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1308       }
1309 #ifdef SMP
1310       /* unlock in the SMP case */
1311       SET_INFO(mvar,&stg_FULL_MVAR_info);
1312 #endif
1313   } else {
1314       /* No further putMVars, MVar is now empty */
1315       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1316
1317       /* do this last... we might have locked the MVar in the SMP case,
1318        * and writing the info pointer will unlock it.
1319        */
1320       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1321   }
1322
1323   TICK_RET_UNBOXED_TUP(1);
1324   RET_NP((I_)1, val);
1325   FE_
1326 }
1327
1328 FN_(putMVarzh_fast)
1329 {
1330   StgMVar *mvar;
1331   const StgInfoTable *info;
1332
1333   FB_
1334   /* args: R1 = MVar, R2 = value */
1335
1336   mvar = (StgMVar *)R1.p;
1337
1338 #ifdef SMP
1339   info = LOCK_CLOSURE(mvar);
1340 #else
1341   info = GET_INFO(mvar);
1342 #endif
1343
1344   if (info == &stg_FULL_MVAR_info) {
1345     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1346       mvar->head = CurrentTSO;
1347     } else {
1348       mvar->tail->link = CurrentTSO;
1349     }
1350     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1351     CurrentTSO->why_blocked = BlockedOnMVar;
1352     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1353     mvar->tail = CurrentTSO;
1354
1355 #ifdef SMP
1356     /* unlock the MVar */
1357     SET_INFO(mvar,&stg_FULL_MVAR_info);
1358 #endif
1359     JMP_(stg_block_putmvar);
1360   }
1361   
1362   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1363       /* There are takeMVar(s) waiting: wake up the first one
1364        */
1365       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1366
1367       /* actually perform the takeMVar */
1368       PerformTake(mvar->head, R2.cl);
1369       
1370 #if defined(GRAN) || defined(PAR)
1371       /* ToDo: check 2nd arg (mvar) is right */
1372       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1373 #else
1374       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1375 #endif
1376       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1377           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1378       }
1379 #ifdef SMP
1380       /* unlocks the MVar in the SMP case */
1381       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1382 #endif
1383       JMP_(ENTRY_CODE(Sp[0]));
1384   } else {
1385       /* No further takes, the MVar is now full. */
1386       mvar->value = R2.cl;
1387       /* unlocks the MVar in the SMP case */
1388       SET_INFO(mvar,&stg_FULL_MVAR_info);
1389       JMP_(ENTRY_CODE(Sp[0]));
1390   }
1391
1392   /* ToDo: yield afterward for better communication performance? */
1393   FE_
1394 }
1395
1396 FN_(tryPutMVarzh_fast)
1397 {
1398   StgMVar *mvar;
1399   const StgInfoTable *info;
1400
1401   FB_
1402   /* args: R1 = MVar, R2 = value */
1403
1404   mvar = (StgMVar *)R1.p;
1405
1406 #ifdef SMP
1407   info = LOCK_CLOSURE(mvar);
1408 #else
1409   info = GET_INFO(mvar);
1410 #endif
1411
1412   if (info == &stg_FULL_MVAR_info) {
1413
1414 #ifdef SMP
1415     /* unlock the MVar */
1416     mvar->header.info = &stg_FULL_MVAR_info;
1417 #endif
1418
1419     RET_N(0);
1420   }
1421   
1422   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1423       /* There are takeMVar(s) waiting: wake up the first one
1424        */
1425       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1426
1427       /* actually perform the takeMVar */
1428       PerformTake(mvar->head, R2.cl);
1429       
1430 #if defined(GRAN) || defined(PAR)
1431       /* ToDo: check 2nd arg (mvar) is right */
1432       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1433 #else
1434       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1435 #endif
1436       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1437           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1438       }
1439 #ifdef SMP
1440       /* unlocks the MVar in the SMP case */
1441       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1442 #endif
1443       JMP_(ENTRY_CODE(Sp[0]));
1444   } else {
1445       /* No further takes, the MVar is now full. */
1446       mvar->value = R2.cl;
1447       /* unlocks the MVar in the SMP case */
1448       SET_INFO(mvar,&stg_FULL_MVAR_info);
1449       JMP_(ENTRY_CODE(Sp[0]));
1450   }
1451
1452   /* ToDo: yield afterward for better communication performance? */
1453   FE_
1454 }
1455
1456 /* -----------------------------------------------------------------------------
1457    Stable pointer primitives
1458    -------------------------------------------------------------------------  */
1459
1460 FN_(makeStableNamezh_fast)
1461 {
1462   StgWord index;
1463   StgStableName *sn_obj;
1464   FB_
1465
1466   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1467   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1468                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1469   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1470   
1471   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1472
1473   /* Is there already a StableName for this heap object? */
1474   if (stable_ptr_table[index].sn_obj == NULL) {
1475     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1476     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1477     sn_obj->sn = index;
1478     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1479   } else {
1480     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1481   }
1482
1483   TICK_RET_UNBOXED_TUP(1);
1484   RET_P(sn_obj);
1485 }
1486
1487
1488 FN_(makeStablePtrzh_fast)
1489 {
1490   /* Args: R1 = a */
1491   StgStablePtr sp;
1492   FB_
1493   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1494   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1495   RET_N(sp);
1496   FE_
1497 }
1498
1499 FN_(deRefStablePtrzh_fast)
1500 {
1501   /* Args: R1 = the stable ptr */
1502   P_ r;
1503   StgStablePtr sp;
1504   FB_
1505   sp = (StgStablePtr)R1.w;
1506   r = stable_ptr_table[(StgWord)sp].addr;
1507   RET_P(r);
1508   FE_
1509 }
1510
1511 /* -----------------------------------------------------------------------------
1512    Bytecode object primitives
1513    -------------------------------------------------------------------------  */
1514
1515 FN_(newBCOzh_fast)
1516 {
1517   /* R1.p = instrs
1518      R2.p = literals
1519      R3.p = ptrs
1520      R4.p = itbls
1521      R5.i = arity
1522      R6.p = bitmap array
1523   */
1524   StgBCO *bco;
1525   nat size;
1526   StgArrWords *bitmap_arr;
1527   FB_
1528
1529   bitmap_arr = (StgArrWords *)R6.cl;
1530   size = sizeofW(StgBCO) + bitmap_arr->words;
1531   HP_CHK_GEN_TICKY(size,R1_PTR|R2_PTR|R3_PTR|R4_PTR|R6_PTR, newBCOzh_fast);
1532   TICK_ALLOC_PRIM(size, size-sizeofW(StgHeader), 0);
1533   CCS_ALLOC(CCCS,size); /* ccs prof */
1534   bco = (StgBCO *) (Hp + 1 - size);
1535   SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1536
1537   bco->instrs     = (StgArrWords*)R1.cl;
1538   bco->literals   = (StgArrWords*)R2.cl;
1539   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1540   bco->itbls      = (StgArrWords*)R4.cl;
1541   bco->arity      = R5.w;
1542   bco->size       = size;
1543
1544   // Copy the arity/bitmap info into the BCO
1545   { 
1546     int i;
1547     for (i = 0; i < bitmap_arr->words; i++) {
1548         bco->bitmap[i] = bitmap_arr->payload[i];
1549     }
1550   }
1551
1552   TICK_RET_UNBOXED_TUP(1);
1553   RET_P(bco);
1554   FE_
1555 }
1556
1557 FN_(mkApUpd0zh_fast)
1558 {
1559   // R1.p = the BCO# for the AP
1560   //
1561   StgPAP* ap;
1562   FB_
1563
1564   // This function is *only* used to wrap zero-arity BCOs in an
1565   // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1566   // saturated and always points directly to a FUN or BCO.
1567   ASSERT(get_itbl(R1.cl)->type == BCO && ((StgBCO *)R1.p)->arity == 0);
1568
1569   HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1570   TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1571   CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1572   ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1573   SET_HDR(ap, &stg_AP_info, CCCS);
1574
1575   ap->n_args = 0;
1576   ap->fun = R1.cl;
1577
1578   TICK_RET_UNBOXED_TUP(1);
1579   RET_P(ap);
1580   FE_
1581 }
1582
1583 /* -----------------------------------------------------------------------------
1584    Thread I/O blocking primitives
1585    -------------------------------------------------------------------------- */
1586
1587 FN_(waitReadzh_fast)
1588 {
1589   FB_
1590     /* args: R1.i */
1591     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1592     CurrentTSO->why_blocked = BlockedOnRead;
1593     CurrentTSO->block_info.fd = R1.i;
1594     ACQUIRE_LOCK(&sched_mutex);
1595     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1596     RELEASE_LOCK(&sched_mutex);
1597     JMP_(stg_block_noregs);
1598   FE_
1599 }
1600
1601 FN_(waitWritezh_fast)
1602 {
1603   FB_
1604     /* args: R1.i */
1605     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1606     CurrentTSO->why_blocked = BlockedOnWrite;
1607     CurrentTSO->block_info.fd = R1.i;
1608     ACQUIRE_LOCK(&sched_mutex);
1609     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1610     RELEASE_LOCK(&sched_mutex);
1611     JMP_(stg_block_noregs);
1612   FE_
1613 }
1614
1615 FN_(delayzh_fast)
1616 {
1617 #ifdef mingw32_TARGET_OS
1618   StgAsyncIOResult* ares;
1619   unsigned int reqID;
1620 #else
1621   StgTSO *t, *prev;
1622   nat target;
1623 #endif
1624   FB_
1625     /* args: R1.i (microsecond delay amount) */
1626     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1627     CurrentTSO->why_blocked = BlockedOnDelay;
1628
1629     ACQUIRE_LOCK(&sched_mutex);
1630 #ifdef mingw32_TARGET_OS
1631     /* could probably allocate this on the heap instead */
1632     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "delayzh_fast");
1633     reqID = RET_STGCALL1(W_,addDelayRequest,R1.i);
1634     ares->reqID   = reqID;
1635     ares->len     = 0;
1636     ares->errCode = 0;
1637     CurrentTSO->block_info.async_result = ares;
1638     /* Having all async-blocked threads reside on the blocked_queue simplifies matters, so
1639      * change the status to OnDoProc & put the delayed thread on the blocked_queue.
1640      */
1641     CurrentTSO->why_blocked = BlockedOnDoProc;
1642     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1643 #else
1644     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1645     CurrentTSO->block_info.target = target;
1646
1647     /* Insert the new thread in the sleeping queue. */
1648     prev = NULL;
1649     t = sleeping_queue;
1650     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1651         prev = t;
1652         t = t->link;
1653     }
1654
1655     CurrentTSO->link = t;
1656     if (prev == NULL) {
1657         sleeping_queue = CurrentTSO;
1658     } else {
1659         prev->link = CurrentTSO;
1660     }
1661 #endif
1662     RELEASE_LOCK(&sched_mutex);
1663     JMP_(stg_block_noregs);
1664   FE_
1665 }
1666
1667 #ifdef mingw32_TARGET_OS
1668 FN_(asyncReadzh_fast)
1669 {
1670   StgAsyncIOResult* ares;
1671   unsigned int reqID;
1672   FB_
1673     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1674     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1675     CurrentTSO->why_blocked = BlockedOnRead;
1676     ACQUIRE_LOCK(&sched_mutex);
1677     /* could probably allocate this on the heap instead */
1678     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncReadzh_fast");
1679     reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p);
1680     ares->reqID   = reqID;
1681     ares->len     = 0;
1682     ares->errCode = 0;
1683     CurrentTSO->block_info.async_result = ares;
1684     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1685     RELEASE_LOCK(&sched_mutex);
1686     JMP_(stg_block_async);
1687   FE_
1688 }
1689
1690 FN_(asyncWritezh_fast)
1691 {
1692   StgAsyncIOResult* ares;
1693   unsigned int reqID;
1694   FB_
1695     /* args: R1.i */
1696     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1697     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1698     CurrentTSO->why_blocked = BlockedOnWrite;
1699     ACQUIRE_LOCK(&sched_mutex);
1700     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1701     reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p);
1702     ares->reqID   = reqID;
1703     ares->len     = 0;
1704     ares->errCode = 0;
1705     CurrentTSO->block_info.async_result = ares;
1706     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1707     RELEASE_LOCK(&sched_mutex);
1708     JMP_(stg_block_async);
1709   FE_
1710 }
1711
1712 FN_(asyncDoProczh_fast)
1713 {
1714   StgAsyncIOResult* ares;
1715   unsigned int reqID;
1716   FB_
1717     /* args: R1.i = proc, R2.i = param */
1718     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1719     CurrentTSO->why_blocked = BlockedOnDoProc;
1720     ACQUIRE_LOCK(&sched_mutex);
1721     /* could probably allocate this on the heap instead */
1722     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncDoProczh_fast");
1723     reqID = RET_STGCALL2(W_,addDoProcRequest,R1.p,R2.p);
1724     ares->reqID   = reqID;
1725     ares->len     = 0;
1726     ares->errCode = 0;
1727     CurrentTSO->block_info.async_result = ares;
1728     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1729     RELEASE_LOCK(&sched_mutex);
1730     JMP_(stg_block_async);
1731   FE_
1732 }
1733 #endif
1734