9b3f3a43f2109b2ddc562916f13b19123ca07689
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
1 /* -----------------------------------------------------------------------------
2  * $Id: PrimOps.hc,v 1.113 2003/09/21 22:20:55 wolfgang Exp $
3  *
4  * (c) The GHC Team, 1998-2002
5  *
6  * Primitive functions / data
7  *
8  * ---------------------------------------------------------------------------*/
9
10 #include "Stg.h"
11 #include "Rts.h"
12
13 #include "RtsFlags.h"
14 #include "StgStartup.h"
15 #include "SchedAPI.h"
16 #include "Schedule.h"
17 #include "RtsUtils.h"
18 #include "Storage.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
21 #include "StgRun.h"
22 #include "Timer.h"      /* TICK_MILLISECS */
23 #include "Prelude.h"
24 #ifndef mingw32_TARGET_OS
25 #include "Itimer.h"    /* getourtimeofday() */
26 #endif
27
28 #ifdef HAVE_SYS_TYPES_H
29 # include <sys/types.h>
30 #endif
31
32 #include <stdlib.h>
33
34 #ifdef mingw32_TARGET_OS
35 #include <windows.h>
36 #include "win32/AsyncIO.h"
37 #endif
38
39 /* ** temporary **
40
41    classes CCallable and CReturnable don't really exist, but the
42    compiler insists on generating dictionaries containing references
43    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
44    for these.  Some C compilers can't cope with zero-length static arrays,
45    so we have to make these one element long.
46 */
47
48 StgWord GHC_ZCCCallable_static_info[1];
49 StgWord GHC_ZCCReturnable_static_info[1];
50   
51 /* -----------------------------------------------------------------------------
52    Macros for Hand-written primitives.
53    -------------------------------------------------------------------------- */
54
55 /*
56  * Horrible macros for returning unboxed tuples.
57  *
58  * How an unboxed tuple is returned depends on two factors:
59  *    - the number of real registers we have available
60  *    - the boxedness of the returned fields.
61  *
62  * To return an unboxed tuple from a primitive operation, we have macros
63  * RET_<layout> where <layout> describes the boxedness of each field of the
64  * unboxed tuple:  N indicates a non-pointer field, and P indicates a pointer.
65  *
66  * We only define the cases actually used, to avoid having too much
67  * garbage in this section.  Warning: any bugs in here will be hard to
68  * track down.
69  *
70  * The return convention for an unboxed tuple is as follows:
71  *   - fit as many fields as possible in registers (as per the
72  *     function fast-entry point calling convention).
73  *   - sort the rest of the fields into pointers and non-pointers.
74  *     push the pointers on the stack, followed by the non-pointers.
75  *     (so the pointers have higher addresses).
76  */
77
78 /*------ All Regs available */
79 #if MAX_REAL_VANILLA_REG == 8
80 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_N(a)     RET_P(a)
82
83 # define RET_PP(a,b)  R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
84 # define RET_NN(a,b)  RET_PP(a,b)
85 # define RET_NP(a,b)  RET_PP(a,b)
86
87 # define RET_PPP(a,b,c) \
88         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
89 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
90
91 # define RET_NNNP(a,b,c,d) \
92         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
93         JMP_(ENTRY_CODE(Sp[0]));
94
95 # define RET_NPNP(a,b,c,d) \
96         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
97         JMP_(ENTRY_CODE(Sp[0]));
98
99 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
100 # error RET_n macros not defined for this setup.
101
102 /*------ 2 Registers available */
103 #elif MAX_REAL_VANILLA_REG == 2
104
105 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
106 # define RET_N(a)     RET_P(a)
107
108 # define RET_PP(a,b)   R1.w = (W_)(a); R2.w = (W_)(b); \
109                        JMP_(ENTRY_CODE(Sp[0]));
110 # define RET_NN(a,b)   RET_PP(a,b)
111 # define RET_NP(a,b)   RET_PP(a,b)
112
113 # define RET_PPP(a,b,c)                         \
114         R1.w = (W_)(a);                         \
115         R2.w = (W_)(b);                         \
116         Sp[-1] = (W_)(c);                       \
117         Sp -= 1;                                \
118         JMP_(ENTRY_CODE(Sp[1]));
119
120 # define RET_NNP(a,b,c)                         \
121         R1.w = (W_)(a);                         \
122         R2.w = (W_)(b);                         \
123         Sp[-1] = (W_)(c);                       \
124         Sp -= 1;                                \
125         JMP_(ENTRY_CODE(Sp[1]));
126
127 # define RET_NNNP(a,b,c,d)                      \
128         R1.w = (W_)(a);                         \
129         R2.w = (W_)(b);                         \
130         Sp[-2] = (W_)(c);                       \
131         Sp[-1] = (W_)(d);                       \
132         Sp -= 2;                                \
133         JMP_(ENTRY_CODE(Sp[2]));
134
135 # define RET_NPNP(a,b,c,d)                      \
136         R1.w = (W_)(a);                         \
137         R2.w = (W_)(b);                         \
138         Sp[-2] = (W_)(c);                       \
139         Sp[-1] = (W_)(d);                       \
140         Sp -= 2;                                \
141         JMP_(ENTRY_CODE(Sp[2]));
142
143 /*------ 1 Register available */
144 #elif MAX_REAL_VANILLA_REG == 1
145 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
146 # define RET_N(a)     RET_P(a)
147
148 # define RET_PP(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
149                        JMP_(ENTRY_CODE(Sp[1]));
150 # define RET_NN(a,b)   R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
151                        JMP_(ENTRY_CODE(Sp[2]));
152 # define RET_NP(a,b)   RET_PP(a,b)
153
154 # define RET_PPP(a,b,c)                         \
155         R1.w = (W_)(a);                         \
156         Sp[-2] = (W_)(b);                       \
157         Sp[-1] = (W_)(c);                       \
158         Sp -= 2;                                \
159         JMP_(ENTRY_CODE(Sp[2]));
160
161 # define RET_NNP(a,b,c)                         \
162         R1.w = (W_)(a);                         \
163         Sp[-2] = (W_)(b);                       \
164         Sp[-1] = (W_)(c);                       \
165         Sp -= 2;                                \
166         JMP_(ENTRY_CODE(Sp[2]));
167
168 # define RET_NNNP(a,b,c,d)                      \
169         R1.w = (W_)(a);                         \
170         Sp[-3] = (W_)(b);                       \
171         Sp[-2] = (W_)(c);                       \
172         Sp[-1] = (W_)(d);                       \
173         Sp -= 3;                                \
174         JMP_(ENTRY_CODE(Sp[3]));
175
176 # define RET_NPNP(a,b,c,d)                      \
177         R1.w = (W_)(a);                         \
178         Sp[-3] = (W_)(c);                       \
179         Sp[-2] = (W_)(b);                       \
180         Sp[-1] = (W_)(d);                       \
181         Sp -= 3;                                \
182         JMP_(ENTRY_CODE(Sp[3]));
183
184 #else /* 0 Regs available */
185
186 #define PUSH(o,x) Sp[-o] = (W_)(x)
187
188 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
189
190 # define RET_P(a)     PUSH(1,a); PUSHED(1)
191 # define RET_N(a)     PUSH(1,a); PUSHED(1)
192
193 # define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
194 # define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
195 # define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
196
197 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
198 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
199
200 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)        
201 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)        
202 #endif
203
204 /*-----------------------------------------------------------------------------
205   Array Primitives
206
207   Basically just new*Array - the others are all inline macros.
208
209   The size arg is always passed in R1, and the result returned in R1.
210
211   The slow entry point is for returning from a heap check, the saved
212   size argument must be re-loaded from the stack.
213   -------------------------------------------------------------------------- */
214
215 /* for objects that are *less* than the size of a word, make sure we
216  * round up to the nearest word for the size of the array.
217  */
218
219 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
220
221 FN_(newByteArrayzh_fast)
222  {
223    W_ size, stuff_size, n;
224    StgArrWords* p;
225    FB_
226      MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
227      n = R1.w;
228      stuff_size = BYTES_TO_STGWORDS(n);
229      size = sizeofW(StgArrWords)+ stuff_size;
230      p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
231      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
232      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
233      p->words = stuff_size;
234      TICK_RET_UNBOXED_TUP(1)
235      RET_P(p);
236    FE_
237  }
238
239 FN_(newPinnedByteArrayzh_fast)
240  {
241    W_ size, stuff_size, n;
242    StgArrWords* p;
243    FB_
244      MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
245      n = R1.w;
246      stuff_size = BYTES_TO_STGWORDS(n);
247
248      // We want an 8-byte aligned array.  allocatePinned() gives us
249      // 8-byte aligned memory by default, but we want to align the
250      // *goods* inside the ArrWords object, so we have to check the
251      // size of the ArrWords header and adjust our size accordingly.
252      size = sizeofW(StgArrWords)+ stuff_size;
253      if ((sizeof(StgArrWords) & 7) != 0) {
254          size++;
255      }
256
257      p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
258      TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
259
260      // Again, if the ArrWords header isn't a multiple of 8 bytes, we
261      // have to push the object forward one word so that the goods
262      // fall on an 8-byte boundary.
263      if ((sizeof(StgArrWords) & 7) != 0) {
264          ((StgPtr)p)++;
265      }
266
267      SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
268      p->words = stuff_size;
269      TICK_RET_UNBOXED_TUP(1)
270      RET_P(p);
271    FE_
272  }
273
274 FN_(newArrayzh_fast)
275 {
276   W_ size, n, init;
277   StgMutArrPtrs* arr;
278   StgPtr p;
279   FB_
280     n = R1.w;
281
282     MAYBE_GC(R2_PTR,newArrayzh_fast);
283
284     size = sizeofW(StgMutArrPtrs) + n;
285     arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
286     TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
287
288     SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
289     arr->ptrs = n;
290
291     init = R2.w;
292     for (p = (P_)arr + sizeofW(StgMutArrPtrs); 
293          p < (P_)arr + size; p++) {
294         *p = (W_)init;
295     }
296
297     TICK_RET_UNBOXED_TUP(1);
298     RET_P(arr);
299   FE_
300 }
301
302 FN_(newMutVarzh_fast)
303 {
304   StgMutVar* mv;
305   /* Args: R1.p = initialisation value */
306   FB_
307
308   HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
309   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
310   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
311
312   mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
313   SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
314   mv->var = R1.cl;
315
316   TICK_RET_UNBOXED_TUP(1);
317   RET_P(mv);
318   FE_
319 }
320
321 FN_(atomicModifyMutVarzh_fast)
322 {
323    StgMutVar* mv;
324    StgClosure *z, *x, *y, *r;
325    FB_
326    /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
327
328    /* If x is the current contents of the MutVar#, then 
329       We want to make the new contents point to
330
331          (sel_0 (f x))
332  
333       and the return value is
334
335          (sel_1 (f x))
336
337       obviously we can share (f x).
338
339          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
340          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
341          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
342    */
343
344 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
345 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
346
347    HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
348    CCS_ALLOC(CCCS,SIZE);
349
350    x = ((StgMutVar *)R1.cl)->var;
351
352    TICK_ALLOC_UP_THK(2,0); // XXX
353    z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
354    SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
355    z->payload[0] = R2.cl;
356    z->payload[1] = x;
357
358    TICK_ALLOC_UP_THK(1,1); // XXX
359    y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
360    SET_HDR(y, &stg_sel_0_upd_info, CCCS);
361    y->payload[0] = z;
362
363    ((StgMutVar *)R1.cl)->var = y;
364
365    TICK_ALLOC_UP_THK(1,1); // XXX
366    r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
367    SET_HDR(r, &stg_sel_1_upd_info, CCCS);
368    r->payload[0] = z;
369
370    RET_P(r);
371    FE_
372 }
373
374 /* -----------------------------------------------------------------------------
375    Foreign Object Primitives
376    -------------------------------------------------------------------------- */
377
378 FN_(mkForeignObjzh_fast)
379 {
380   /* R1.p = ptr to foreign object,
381   */
382   StgForeignObj *result;
383   FB_
384
385   HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
386   TICK_ALLOC_PRIM(sizeofW(StgHeader),
387                   sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
388   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
389
390   result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
391   SET_HDR(result,&stg_FOREIGN_info,CCCS);
392   result->data = R1.p;
393
394   /* returns (# s#, ForeignObj# #) */
395   TICK_RET_UNBOXED_TUP(1);
396   RET_P(result);
397   FE_
398 }
399
400 /* These two are out-of-line for the benefit of the NCG */
401 FN_(unsafeThawArrayzh_fast)
402 {
403   FB_
404   SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
405   recordMutable((StgMutClosure*)R1.cl);
406
407   TICK_RET_UNBOXED_TUP(1);
408   RET_P(R1.p);
409   FE_
410 }
411
412 /* -----------------------------------------------------------------------------
413    Weak Pointer Primitives
414    -------------------------------------------------------------------------- */
415
416 FN_(mkWeakzh_fast)
417 {
418   /* R1.p = key
419      R2.p = value
420      R3.p = finalizer (or NULL)
421   */
422   StgWeak *w;
423   FB_
424
425   if (R3.cl == NULL) {
426     R3.cl = &stg_NO_FINALIZER_closure;
427   }
428
429   HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
430   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
431                   sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
432   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
433
434   w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
435   SET_HDR(w, &stg_WEAK_info, CCCS);
436
437   w->key        = R1.cl;
438   w->value      = R2.cl;
439   w->finalizer  = R3.cl;
440
441   w->link       = weak_ptr_list;
442   weak_ptr_list = w;
443   IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
444
445   TICK_RET_UNBOXED_TUP(1);
446   RET_P(w);
447   FE_
448 }
449
450 FN_(finalizzeWeakzh_fast)
451 {
452   /* R1.p = weak ptr
453    */
454   StgDeadWeak *w;
455   StgClosure *f;
456   FB_
457   TICK_RET_UNBOXED_TUP(0);
458   w = (StgDeadWeak *)R1.p;
459
460   /* already dead? */
461   if (w->header.info == &stg_DEAD_WEAK_info) {
462       RET_NP(0,&stg_NO_FINALIZER_closure);
463   }
464
465   /* kill it */
466 #ifdef PROFILING
467   // @LDV profiling
468   // A weak pointer is inherently used, so we do not need to call
469   // LDV_recordDead_FILL_SLOP_DYNAMIC():
470   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
471   // or, LDV_recordDead():
472   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
473   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
474   // large as weak pointers, so there is no need to fill the slop, either.
475   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
476 #endif
477   //
478   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
479   //
480   w->header.info = &stg_DEAD_WEAK_info;
481 #ifdef PROFILING
482   // @LDV profiling
483   LDV_recordCreate((StgClosure *)w);
484 #endif
485   f = ((StgWeak *)w)->finalizer;
486   w->link = ((StgWeak *)w)->link;
487
488   /* return the finalizer */
489   if (f == &stg_NO_FINALIZER_closure) {
490       RET_NP(0,&stg_NO_FINALIZER_closure);
491   } else {
492       RET_NP(1,f);
493   }
494   FE_
495 }
496
497 FN_(deRefWeakzh_fast)
498 {
499   /* R1.p = weak ptr */
500   StgWeak* w;
501   I_       code;
502   P_       val;
503   FB_
504   w = (StgWeak*)R1.p;
505   if (w->header.info == &stg_WEAK_info) {
506     code = 1;
507     val = (P_)((StgWeak *)w)->value;
508   } else {
509     code = 0;
510     val = (P_)w;
511   }
512   RET_NP(code,val);
513   FE_
514 }
515
516 /* -----------------------------------------------------------------------------
517    Arbitrary-precision Integer operations.
518    -------------------------------------------------------------------------- */
519
520 FN_(int2Integerzh_fast)
521 {
522    /* arguments: R1 = Int# */
523
524    I_ val, s;           /* to avoid aliasing */
525    StgArrWords* p;      /* address of array result */
526    FB_
527
528    val = R1.i;
529    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
530    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
531    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
532
533    p = (StgArrWords *)Hp - 1;
534    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
535
536    /* mpz_set_si is inlined here, makes things simpler */
537    if (val < 0) { 
538         s  = -1;
539         *Hp = -val;
540    } else if (val > 0) {
541         s = 1;
542         *Hp = val;
543    } else {
544         s = 0;
545    }
546
547    /* returns (# size  :: Int#, 
548                  data  :: ByteArray# 
549                #)
550    */
551    TICK_RET_UNBOXED_TUP(2);
552    RET_NP(s,p);
553    FE_
554 }
555
556 FN_(word2Integerzh_fast)
557 {
558    /* arguments: R1 = Word# */
559
560    W_ val;              /* to avoid aliasing */
561    I_  s;
562    StgArrWords* p;      /* address of array result */
563    FB_
564
565    val = R1.w;
566    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
567    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
568    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
569
570    p = (StgArrWords *)Hp - 1;
571    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
572
573    if (val != 0) {
574         s = 1;
575         *Hp = val;
576    } else {
577         s = 0;
578    }
579
580    /* returns (# size  :: Int#, 
581                  data  :: ByteArray# 
582                #)
583    */
584    TICK_RET_UNBOXED_TUP(2);
585    RET_NP(s,p);
586    FE_
587 }
588
589
590 /*
591  * 'long long' primops for converting to/from Integers.
592  */
593
594 #ifdef SUPPORT_LONG_LONGS
595
596 FN_(int64ToIntegerzh_fast)
597 {
598    /* arguments: L1 = Int64# */
599
600    StgInt64  val; /* to avoid aliasing */
601    W_ hi;
602    I_  s, neg, words_needed;
603    StgArrWords* p;      /* address of array result */
604    FB_
605
606    val = (LI_)L1;
607    neg = 0;
608
609    if ( val >= 0x100000000LL || val <= -0x100000000LL )  { 
610        words_needed = 2;
611    } else { 
612        /* minimum is one word */
613        words_needed = 1;
614    }
615    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
616    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
617    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
618
619    p = (StgArrWords *)(Hp-words_needed+1) - 1;
620    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
621
622    if ( val < 0LL ) {
623      neg = 1;
624      val = -val;
625    }
626
627    hi = (W_)((LW_)val / 0x100000000ULL);
628
629    if ( words_needed == 2 )  { 
630       s = 2;
631       Hp[-1] = (W_)val;
632       Hp[0] = hi;
633    } else if ( val != 0 ) {
634       s = 1;
635       Hp[0] = (W_)val;
636    }  else /* val==0 */   {
637       s = 0;
638    }
639    s = ( neg ? -s : s );
640
641    /* returns (# size  :: Int#, 
642                  data  :: ByteArray# 
643                #)
644    */
645    TICK_RET_UNBOXED_TUP(2);
646    RET_NP(s,p);
647    FE_
648 }
649
650 FN_(word64ToIntegerzh_fast)
651 {
652    /* arguments: L1 = Word64# */
653
654    StgWord64 val; /* to avoid aliasing */
655    StgWord hi;
656    I_  s, words_needed;
657    StgArrWords* p;      /* address of array result */
658    FB_
659
660    val = (LW_)L1;
661    if ( val >= 0x100000000ULL ) {
662       words_needed = 2;
663    } else {
664       words_needed = 1;
665    }
666    HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
667    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
668    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
669
670    p = (StgArrWords *)(Hp-words_needed+1) - 1;
671    SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
672
673    hi = (W_)((LW_)val / 0x100000000ULL);
674    if ( val >= 0x100000000ULL ) { 
675      s = 2;
676      Hp[-1] = ((W_)val);
677      Hp[0]  = (hi);
678    } else if ( val != 0 )      {
679       s = 1;
680       Hp[0] = ((W_)val);
681    } else /* val==0 */         {
682       s = 0;
683    }
684
685    /* returns (# size  :: Int#, 
686                  data  :: ByteArray# 
687                #)
688    */
689    TICK_RET_UNBOXED_TUP(2);
690    RET_NP(s,p);
691    FE_
692 }
693
694
695 #endif /* SUPPORT_LONG_LONGS */
696
697 /* ToDo: this is shockingly inefficient */
698
699 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
700 FN_(name)                                                               \
701 {                                                                       \
702   MP_INT arg1, arg2, result;                                            \
703   I_ s1, s2;                                                            \
704   StgArrWords* d1;                                                      \
705   StgArrWords* d2;                                                      \
706   FB_                                                                   \
707                                                                         \
708   /* call doYouWantToGC() */                                            \
709   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
710                                                                         \
711   d1 = (StgArrWords *)R2.p;                                             \
712   s1 = R1.i;                                                            \
713   d2 = (StgArrWords *)R4.p;                                             \
714   s2 = R3.i;                                                            \
715                                                                         \
716   arg1._mp_alloc        = d1->words;                                    \
717   arg1._mp_size         = (s1);                                         \
718   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
719   arg2._mp_alloc        = d2->words;                                    \
720   arg2._mp_size         = (s2);                                         \
721   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
722                                                                         \
723   STGCALL1(mpz_init,&result);                                           \
724                                                                         \
725   /* Perform the operation */                                           \
726   STGCALL3(mp_fun,&result,&arg1,&arg2);                                 \
727                                                                         \
728   TICK_RET_UNBOXED_TUP(2);                                              \
729   RET_NP(result._mp_size,                                               \
730          result._mp_d-sizeofW(StgArrWords));                            \
731   FE_                                                                   \
732 }
733
734 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
735 FN_(name)                                                               \
736 {                                                                       \
737   MP_INT arg1, result;                                                  \
738   I_ s1;                                                                \
739   StgArrWords* d1;                                                      \
740   FB_                                                                   \
741                                                                         \
742   /* call doYouWantToGC() */                                            \
743   MAYBE_GC(R2_PTR, name);                                               \
744                                                                         \
745   d1 = (StgArrWords *)R2.p;                                             \
746   s1 = R1.i;                                                            \
747                                                                         \
748   arg1._mp_alloc        = d1->words;                                    \
749   arg1._mp_size         = (s1);                                         \
750   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
751                                                                         \
752   STGCALL1(mpz_init,&result);                                           \
753                                                                         \
754   /* Perform the operation */                                           \
755   STGCALL2(mp_fun,&result,&arg1);                                       \
756                                                                         \
757   TICK_RET_UNBOXED_TUP(2);                                              \
758   RET_NP(result._mp_size,                                               \
759          result._mp_d-sizeofW(StgArrWords));                            \
760   FE_                                                                   \
761 }
762
763 #define GMP_TAKE2_RET2(name,mp_fun)                                     \
764 FN_(name)                                                               \
765 {                                                                       \
766   MP_INT arg1, arg2, result1, result2;                                  \
767   I_ s1, s2;                                                            \
768   StgArrWords* d1;                                                      \
769   StgArrWords* d2;                                                      \
770   FB_                                                                   \
771                                                                         \
772   /* call doYouWantToGC() */                                            \
773   MAYBE_GC(R2_PTR | R4_PTR, name);                                      \
774                                                                         \
775   d1 = (StgArrWords *)R2.p;                                             \
776   s1 = R1.i;                                                            \
777   d2 = (StgArrWords *)R4.p;                                             \
778   s2 = R3.i;                                                            \
779                                                                         \
780   arg1._mp_alloc        = d1->words;                                    \
781   arg1._mp_size         = (s1);                                         \
782   arg1._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d1));     \
783   arg2._mp_alloc        = d2->words;                                    \
784   arg2._mp_size         = (s2);                                         \
785   arg2._mp_d            = (unsigned long int *) (BYTE_ARR_CTS(d2));     \
786                                                                         \
787   STGCALL1(mpz_init,&result1);                                          \
788   STGCALL1(mpz_init,&result2);                                          \
789                                                                         \
790   /* Perform the operation */                                           \
791   STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2);                       \
792                                                                         \
793   TICK_RET_UNBOXED_TUP(4);                                              \
794   RET_NPNP(result1._mp_size,                                            \
795            result1._mp_d-sizeofW(StgArrWords),                          \
796            result2._mp_size,                                            \
797            result2._mp_d-sizeofW(StgArrWords));                         \
798   FE_                                                                   \
799 }
800
801 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add);
802 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub);
803 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul);
804 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd);
805 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q);
806 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r);
807 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
808 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and);
809 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior);
810 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor);
811 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
812
813 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
814 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr);
815
816
817 FN_(gcdIntzh_fast)
818 {
819   /* R1 = the first Int#; R2 = the second Int# */
820   mp_limb_t aa;
821   I_ r;
822   FB_
823   aa = (mp_limb_t)(R1.i);
824   r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
825
826   R1.i = r;
827   /* Result parked in R1, return via info-pointer at TOS */
828   JMP_(ENTRY_CODE(Sp[0]));
829   FE_
830 }
831
832 FN_(gcdIntegerIntzh_fast)
833 {
834   /* R1 = s1; R2 = d1; R3 = the int */
835   I_ r;
836   FB_
837   r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
838
839   R1.i = r;
840   /* Result parked in R1, return via info-pointer at TOS */
841   JMP_(ENTRY_CODE(Sp[0]));
842   FE_
843 }
844
845 FN_(cmpIntegerIntzh_fast)
846 {
847   /* R1 = s1; R2 = d1; R3 = the int */
848   I_ usize;
849   I_ vsize;
850   I_ v_digit;
851   mp_limb_t u_digit;
852   FB_
853
854   usize = R1.i;
855   vsize = 0;
856   v_digit = R3.i;
857
858   // paraphrased from mpz_cmp_si() in the GMP sources
859   if (v_digit > 0) {
860       vsize = 1;
861   } else if (v_digit < 0) {
862       vsize = -1;
863       v_digit = -v_digit;
864   }
865
866   if (usize != vsize) {
867     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
868   }
869
870   if (usize == 0) {
871     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
872   }
873
874   u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
875
876   if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
877     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
878   }
879
880   if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
881     R1.i = usize; 
882   } else {
883     R1.i = -usize; 
884   }
885
886   JMP_(ENTRY_CODE(Sp[0]));
887   FE_
888 }
889
890 FN_(cmpIntegerzh_fast)
891 {
892   /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
893   I_ usize;
894   I_ vsize;
895   I_ size;
896   StgPtr up, vp;
897   int cmp;
898   FB_
899
900   // paraphrased from mpz_cmp() in the GMP sources
901   usize = R1.i;
902   vsize = R3.i;
903
904   if (usize != vsize) {
905     R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
906   }
907
908   if (usize == 0) {
909     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
910   }
911
912   size = abs(usize);
913
914   up = BYTE_ARR_CTS(R2.p);
915   vp = BYTE_ARR_CTS(R4.p);
916
917   cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
918
919   if (cmp == 0) {
920     R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
921   }
922
923   if ((cmp < 0) == (usize < 0)) {
924     R1.i = 1;
925   } else {
926     R1.i = (-1); 
927   }
928   /* Result parked in R1, return via info-pointer at TOS */
929   JMP_(ENTRY_CODE(Sp[0]));
930   FE_
931 }
932
933 FN_(integer2Intzh_fast)
934 {
935   /* R1 = s; R2 = d */
936   I_ r, s;
937   FB_
938   s = R1.i;
939   if (s == 0)
940     r = 0;
941   else {
942     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
943     if (s < 0) r = -r;
944   }
945   /* Result parked in R1, return via info-pointer at TOS */
946   R1.i = r;
947   JMP_(ENTRY_CODE(Sp[0]));
948   FE_
949 }
950
951 FN_(integer2Wordzh_fast)
952 {
953   /* R1 = s; R2 = d */
954   I_ s;
955   W_ r;
956   FB_
957   s = R1.i;
958   if (s == 0)
959     r = 0;
960   else {
961     r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
962     if (s < 0) r = -r;
963   }
964   /* Result parked in R1, return via info-pointer at TOS */
965   R1.w = r;
966   JMP_(ENTRY_CODE(Sp[0]));
967   FE_
968 }
969
970
971 FN_(decodeFloatzh_fast)
972
973   MP_INT mantissa;
974   I_ exponent;
975   StgArrWords* p;
976   StgFloat arg;
977   FB_
978
979   /* arguments: F1 = Float# */
980   arg = F1;
981
982   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
983   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
984   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
985
986   /* Be prepared to tell Lennart-coded __decodeFloat    */
987   /* where mantissa._mp_d can be put (it does not care about the rest) */
988   p = (StgArrWords *)Hp - 1;
989   SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
990   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
991
992   /* Perform the operation */
993   STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
994
995   /* returns: (Int# (expn), Int#, ByteArray#) */
996   TICK_RET_UNBOXED_TUP(3);
997   RET_NNP(exponent,mantissa._mp_size,p);
998   FE_
999 }
1000
1001 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
1002 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
1003
1004 FN_(decodeDoublezh_fast)
1005 { MP_INT mantissa;
1006   I_ exponent;
1007   StgDouble arg;
1008   StgArrWords* p;
1009   FB_
1010
1011   /* arguments: D1 = Double# */
1012   arg = D1;
1013
1014   HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1015   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1016   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1017
1018   /* Be prepared to tell Lennart-coded __decodeDouble   */
1019   /* where mantissa.d can be put (it does not care about the rest) */
1020   p = (StgArrWords *)(Hp-ARR_SIZE+1);
1021   SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1022   mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1023
1024   /* Perform the operation */
1025   STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1026
1027   /* returns: (Int# (expn), Int#, ByteArray#) */
1028   TICK_RET_UNBOXED_TUP(3);
1029   RET_NNP(exponent,mantissa._mp_size,p);
1030   FE_
1031 }
1032
1033 /* -----------------------------------------------------------------------------
1034  * Concurrency primitives
1035  * -------------------------------------------------------------------------- */
1036
1037 FN_(forkzh_fast)
1038 {
1039   FB_
1040   /* args: R1 = closure to spark */
1041   
1042   MAYBE_GC(R1_PTR, forkzh_fast);
1043
1044   /* create it right now, return ThreadID in R1 */
1045   R1.t = RET_STGCALL2(StgTSO *, createIOThread, 
1046                      RtsFlags.GcFlags.initialStkSize, R1.cl);
1047   STGCALL1(scheduleThread, R1.t);
1048       
1049   /* switch at the earliest opportunity */ 
1050   context_switch = 1;
1051   
1052   RET_P(R1.t);
1053   FE_
1054 }
1055
1056 FN_(forkProcesszh_fast)
1057 {
1058   pid_t pid;
1059
1060   FB_
1061   /* args: none */
1062   /* result: Pid */
1063
1064   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1065
1066   RET_N(R1.i);
1067   FE_
1068 }
1069
1070 FN_(yieldzh_fast)
1071 {
1072   FB_
1073   JMP_(stg_yield_noregs);
1074   FE_
1075 }
1076
1077 FN_(myThreadIdzh_fast)
1078 {
1079   /* no args. */
1080   FB_
1081   RET_P((P_)CurrentTSO);
1082   FE_
1083 }
1084
1085 FN_(labelThreadzh_fast)
1086 {
1087   FB_
1088   /* args: 
1089         R1.p = ThreadId#
1090         R2.p = Addr# */
1091 #ifdef DEBUG
1092   STGCALL2(labelThread,R1.p,(char *)R2.p);
1093 #endif
1094   JMP_(ENTRY_CODE(Sp[0]));
1095   FE_
1096 }
1097
1098 FN_(isCurrentThreadBoundzh_fast)
1099 {
1100   /* no args */
1101   I_ r;
1102   FB_
1103   r = (I_)(RET_STGCALL1(StgBool, isThreadBound, CurrentTSO));
1104   RET_N(r);
1105   FE_
1106 }
1107
1108 /* -----------------------------------------------------------------------------
1109  * MVar primitives
1110  *
1111  * take & putMVar work as follows.  Firstly, an important invariant:
1112  *
1113  *    If the MVar is full, then the blocking queue contains only
1114  *    threads blocked on putMVar, and if the MVar is empty then the
1115  *    blocking queue contains only threads blocked on takeMVar.
1116  *
1117  * takeMvar:
1118  *    MVar empty : then add ourselves to the blocking queue
1119  *    MVar full  : remove the value from the MVar, and
1120  *                 blocking queue empty     : return
1121  *                 blocking queue non-empty : perform the first blocked putMVar
1122  *                                            from the queue, and wake up the
1123  *                                            thread (MVar is now full again)
1124  *
1125  * putMVar is just the dual of the above algorithm.
1126  *
1127  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1128  * the stack of the thread waiting to do the putMVar.  See
1129  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1130  * the stack layout, and the PerformPut and PerformTake macros below.
1131  *
1132  * It is important that a blocked take or put is woken up with the
1133  * take/put already performed, because otherwise there would be a
1134  * small window of vulnerability where the thread could receive an
1135  * exception and never perform its take or put, and we'd end up with a
1136  * deadlock.
1137  *
1138  * -------------------------------------------------------------------------- */
1139
1140 FN_(isEmptyMVarzh_fast)
1141 {
1142   /* args: R1 = MVar closure */
1143   I_ r;
1144   FB_
1145   r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1146   RET_N(r);
1147   FE_
1148 }
1149
1150
1151 FN_(newMVarzh_fast)
1152 {
1153   StgMVar *mvar;
1154
1155   FB_
1156   /* args: none */
1157
1158   HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1159   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1160                   1, 0);
1161   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1162   
1163   mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1164   SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1165   mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1166   mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1167
1168   TICK_RET_UNBOXED_TUP(1);
1169   RET_P(mvar);
1170   FE_
1171 }
1172
1173 /* If R1 isn't available, pass it on the stack */
1174 #ifdef REG_R1
1175 #define PerformTake(tso, value) ({              \
1176     (tso)->sp[1] = (W_)value;                   \
1177     (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info;    \
1178   })
1179 #else
1180 #define PerformTake(tso, value) ({              \
1181     (tso)->sp[1] = (W_)value;                   \
1182     (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info;  \
1183   })
1184 #endif
1185
1186
1187 #define PerformPut(tso) ({                              \
1188     StgClosure *val = (StgClosure *)(tso)->sp[2];       \
1189     (tso)->sp += 3;                                     \
1190     val;                                                \
1191   })
1192
1193 FN_(takeMVarzh_fast)
1194 {
1195   StgMVar *mvar;
1196   StgClosure *val;
1197   const StgInfoTable *info;
1198
1199   FB_
1200   /* args: R1 = MVar closure */
1201
1202   mvar = (StgMVar *)R1.p;
1203
1204 #ifdef SMP
1205   info = LOCK_CLOSURE(mvar);
1206 #else
1207   info = GET_INFO(mvar);
1208 #endif
1209
1210   /* If the MVar is empty, put ourselves on its blocking queue,
1211    * and wait until we're woken up.
1212    */
1213   if (info == &stg_EMPTY_MVAR_info) {
1214     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1215       mvar->head = CurrentTSO;
1216     } else {
1217       mvar->tail->link = CurrentTSO;
1218     }
1219     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1220     CurrentTSO->why_blocked = BlockedOnMVar;
1221     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1222     mvar->tail = CurrentTSO;
1223
1224 #ifdef SMP
1225     /* unlock the MVar */
1226     mvar->header.info = &stg_EMPTY_MVAR_info;
1227 #endif
1228     JMP_(stg_block_takemvar);
1229   }
1230
1231   /* we got the value... */
1232   val = mvar->value;
1233
1234   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1235       /* There are putMVar(s) waiting... 
1236        * wake up the first thread on the queue
1237        */
1238       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1239
1240       /* actually perform the putMVar for the thread that we just woke up */
1241       mvar->value = PerformPut(mvar->head);
1242
1243 #if defined(GRAN) || defined(PAR)
1244       /* ToDo: check 2nd arg (mvar) is right */
1245       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1246 #else
1247       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1248 #endif
1249       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1250           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1251       }
1252 #ifdef SMP
1253       /* unlock in the SMP case */
1254       SET_INFO(mvar,&stg_FULL_MVAR_info);
1255 #endif
1256       TICK_RET_UNBOXED_TUP(1);
1257       RET_P(val);
1258   } else {
1259       /* No further putMVars, MVar is now empty */
1260
1261       /* do this last... we might have locked the MVar in the SMP case,
1262        * and writing the info pointer will unlock it.
1263        */
1264       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1265       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1266       TICK_RET_UNBOXED_TUP(1);
1267       RET_P(val);
1268   }
1269   FE_
1270 }
1271
1272 FN_(tryTakeMVarzh_fast)
1273 {
1274   StgMVar *mvar;
1275   StgClosure *val;
1276   const StgInfoTable *info;
1277
1278   FB_
1279   /* args: R1 = MVar closure */
1280
1281   mvar = (StgMVar *)R1.p;
1282
1283 #ifdef SMP
1284   info = LOCK_CLOSURE(mvar);
1285 #else
1286   info = GET_INFO(mvar);
1287 #endif
1288
1289   if (info == &stg_EMPTY_MVAR_info) {
1290
1291 #ifdef SMP
1292       /* unlock the MVar */
1293       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1294 #endif
1295
1296       /* HACK: we need a pointer to pass back, 
1297        * so we abuse NO_FINALIZER_closure
1298        */
1299       RET_NP(0, &stg_NO_FINALIZER_closure);
1300   }
1301
1302   /* we got the value... */
1303   val = mvar->value;
1304
1305   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1306       /* There are putMVar(s) waiting... 
1307        * wake up the first thread on the queue
1308        */
1309       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1310
1311       /* actually perform the putMVar for the thread that we just woke up */
1312       mvar->value = PerformPut(mvar->head);
1313
1314 #if defined(GRAN) || defined(PAR)
1315       /* ToDo: check 2nd arg (mvar) is right */
1316       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1317 #else
1318       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1319 #endif
1320       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1321           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1322       }
1323 #ifdef SMP
1324       /* unlock in the SMP case */
1325       SET_INFO(mvar,&stg_FULL_MVAR_info);
1326 #endif
1327   } else {
1328       /* No further putMVars, MVar is now empty */
1329       mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1330
1331       /* do this last... we might have locked the MVar in the SMP case,
1332        * and writing the info pointer will unlock it.
1333        */
1334       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1335   }
1336
1337   TICK_RET_UNBOXED_TUP(1);
1338   RET_NP((I_)1, val);
1339   FE_
1340 }
1341
1342 FN_(putMVarzh_fast)
1343 {
1344   StgMVar *mvar;
1345   const StgInfoTable *info;
1346
1347   FB_
1348   /* args: R1 = MVar, R2 = value */
1349
1350   mvar = (StgMVar *)R1.p;
1351
1352 #ifdef SMP
1353   info = LOCK_CLOSURE(mvar);
1354 #else
1355   info = GET_INFO(mvar);
1356 #endif
1357
1358   if (info == &stg_FULL_MVAR_info) {
1359     if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1360       mvar->head = CurrentTSO;
1361     } else {
1362       mvar->tail->link = CurrentTSO;
1363     }
1364     CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1365     CurrentTSO->why_blocked = BlockedOnMVar;
1366     CurrentTSO->block_info.closure = (StgClosure *)mvar;
1367     mvar->tail = CurrentTSO;
1368
1369 #ifdef SMP
1370     /* unlock the MVar */
1371     SET_INFO(mvar,&stg_FULL_MVAR_info);
1372 #endif
1373     JMP_(stg_block_putmvar);
1374   }
1375   
1376   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1377       /* There are takeMVar(s) waiting: wake up the first one
1378        */
1379       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1380
1381       /* actually perform the takeMVar */
1382       PerformTake(mvar->head, R2.cl);
1383       
1384 #if defined(GRAN) || defined(PAR)
1385       /* ToDo: check 2nd arg (mvar) is right */
1386       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1387 #else
1388       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1389 #endif
1390       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1391           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1392       }
1393 #ifdef SMP
1394       /* unlocks the MVar in the SMP case */
1395       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1396 #endif
1397       JMP_(ENTRY_CODE(Sp[0]));
1398   } else {
1399       /* No further takes, the MVar is now full. */
1400       mvar->value = R2.cl;
1401       /* unlocks the MVar in the SMP case */
1402       SET_INFO(mvar,&stg_FULL_MVAR_info);
1403       JMP_(ENTRY_CODE(Sp[0]));
1404   }
1405
1406   /* ToDo: yield afterward for better communication performance? */
1407   FE_
1408 }
1409
1410 FN_(tryPutMVarzh_fast)
1411 {
1412   StgMVar *mvar;
1413   const StgInfoTable *info;
1414
1415   FB_
1416   /* args: R1 = MVar, R2 = value */
1417
1418   mvar = (StgMVar *)R1.p;
1419
1420 #ifdef SMP
1421   info = LOCK_CLOSURE(mvar);
1422 #else
1423   info = GET_INFO(mvar);
1424 #endif
1425
1426   if (info == &stg_FULL_MVAR_info) {
1427
1428 #ifdef SMP
1429     /* unlock the MVar */
1430     mvar->header.info = &stg_FULL_MVAR_info;
1431 #endif
1432
1433     RET_N(0);
1434   }
1435   
1436   if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1437       /* There are takeMVar(s) waiting: wake up the first one
1438        */
1439       ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1440
1441       /* actually perform the takeMVar */
1442       PerformTake(mvar->head, R2.cl);
1443       
1444 #if defined(GRAN) || defined(PAR)
1445       /* ToDo: check 2nd arg (mvar) is right */
1446       mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1447 #else
1448       mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1449 #endif
1450       if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1451           mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1452       }
1453 #ifdef SMP
1454       /* unlocks the MVar in the SMP case */
1455       SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1456 #endif
1457       JMP_(ENTRY_CODE(Sp[0]));
1458   } else {
1459       /* No further takes, the MVar is now full. */
1460       mvar->value = R2.cl;
1461       /* unlocks the MVar in the SMP case */
1462       SET_INFO(mvar,&stg_FULL_MVAR_info);
1463       JMP_(ENTRY_CODE(Sp[0]));
1464   }
1465
1466   /* ToDo: yield afterward for better communication performance? */
1467   FE_
1468 }
1469
1470 /* -----------------------------------------------------------------------------
1471    Stable pointer primitives
1472    -------------------------------------------------------------------------  */
1473
1474 FN_(makeStableNamezh_fast)
1475 {
1476   StgWord index;
1477   StgStableName *sn_obj;
1478   FB_
1479
1480   HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1481   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
1482                   sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1483   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1484   
1485   index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1486
1487   /* Is there already a StableName for this heap object? */
1488   if (stable_ptr_table[index].sn_obj == NULL) {
1489     sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1490     SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1491     sn_obj->sn = index;
1492     stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1493   } else {
1494     (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1495   }
1496
1497   TICK_RET_UNBOXED_TUP(1);
1498   RET_P(sn_obj);
1499 }
1500
1501
1502 FN_(makeStablePtrzh_fast)
1503 {
1504   /* Args: R1 = a */
1505   StgStablePtr sp;
1506   FB_
1507   MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1508   sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1509   RET_N(sp);
1510   FE_
1511 }
1512
1513 FN_(deRefStablePtrzh_fast)
1514 {
1515   /* Args: R1 = the stable ptr */
1516   P_ r;
1517   StgStablePtr sp;
1518   FB_
1519   sp = (StgStablePtr)R1.w;
1520   r = stable_ptr_table[(StgWord)sp].addr;
1521   RET_P(r);
1522   FE_
1523 }
1524
1525 /* -----------------------------------------------------------------------------
1526    Bytecode object primitives
1527    -------------------------------------------------------------------------  */
1528
1529 FN_(newBCOzh_fast)
1530 {
1531   /* R1.p = instrs
1532      R2.p = literals
1533      R3.p = ptrs
1534      R4.p = itbls
1535      R5.i = arity
1536      R6.p = bitmap array
1537   */
1538   StgBCO *bco;
1539   nat size;
1540   StgArrWords *bitmap_arr;
1541   FB_
1542
1543   bitmap_arr = (StgArrWords *)R6.cl;
1544   size = sizeofW(StgBCO) + bitmap_arr->words;
1545   HP_CHK_GEN_TICKY(size,R1_PTR|R2_PTR|R3_PTR|R4_PTR|R6_PTR, newBCOzh_fast);
1546   TICK_ALLOC_PRIM(size, size-sizeofW(StgHeader), 0);
1547   CCS_ALLOC(CCCS,size); /* ccs prof */
1548   bco = (StgBCO *) (Hp + 1 - size);
1549   SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1550
1551   bco->instrs     = (StgArrWords*)R1.cl;
1552   bco->literals   = (StgArrWords*)R2.cl;
1553   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
1554   bco->itbls      = (StgArrWords*)R4.cl;
1555   bco->arity      = R5.w;
1556   bco->size       = size;
1557
1558   // Copy the arity/bitmap info into the BCO
1559   { 
1560     int i;
1561     for (i = 0; i < bitmap_arr->words; i++) {
1562         bco->bitmap[i] = bitmap_arr->payload[i];
1563     }
1564   }
1565
1566   TICK_RET_UNBOXED_TUP(1);
1567   RET_P(bco);
1568   FE_
1569 }
1570
1571 FN_(mkApUpd0zh_fast)
1572 {
1573   // R1.p = the BCO# for the AP
1574   //
1575   StgPAP* ap;
1576   FB_
1577
1578   // This function is *only* used to wrap zero-arity BCOs in an
1579   // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1580   // saturated and always points directly to a FUN or BCO.
1581   ASSERT(get_itbl(R1.cl)->type == BCO && ((StgBCO *)R1.p)->arity == 0);
1582
1583   HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1584   TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1585   CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1586   ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1587   SET_HDR(ap, &stg_AP_info, CCCS);
1588
1589   ap->n_args = 0;
1590   ap->fun = R1.cl;
1591
1592   TICK_RET_UNBOXED_TUP(1);
1593   RET_P(ap);
1594   FE_
1595 }
1596
1597 /* -----------------------------------------------------------------------------
1598    Thread I/O blocking primitives
1599    -------------------------------------------------------------------------- */
1600
1601 FN_(waitReadzh_fast)
1602 {
1603   FB_
1604     /* args: R1.i */
1605     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1606     CurrentTSO->why_blocked = BlockedOnRead;
1607     CurrentTSO->block_info.fd = R1.i;
1608     ACQUIRE_LOCK(&sched_mutex);
1609     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1610     RELEASE_LOCK(&sched_mutex);
1611     JMP_(stg_block_noregs);
1612   FE_
1613 }
1614
1615 FN_(waitWritezh_fast)
1616 {
1617   FB_
1618     /* args: R1.i */
1619     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1620     CurrentTSO->why_blocked = BlockedOnWrite;
1621     CurrentTSO->block_info.fd = R1.i;
1622     ACQUIRE_LOCK(&sched_mutex);
1623     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1624     RELEASE_LOCK(&sched_mutex);
1625     JMP_(stg_block_noregs);
1626   FE_
1627 }
1628
1629 FN_(delayzh_fast)
1630 {
1631 #ifdef mingw32_TARGET_OS
1632   StgAsyncIOResult* ares;
1633   unsigned int reqID;
1634 #else
1635   StgTSO *t, *prev;
1636   nat target;
1637 #endif
1638   FB_
1639     /* args: R1.i (microsecond delay amount) */
1640     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1641     CurrentTSO->why_blocked = BlockedOnDelay;
1642
1643     ACQUIRE_LOCK(&sched_mutex);
1644 #ifdef mingw32_TARGET_OS
1645     /* could probably allocate this on the heap instead */
1646     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "delayzh_fast");
1647     reqID = RET_STGCALL1(W_,addDelayRequest,R1.i);
1648     ares->reqID   = reqID;
1649     ares->len     = 0;
1650     ares->errCode = 0;
1651     CurrentTSO->block_info.async_result = ares;
1652     /* Having all async-blocked threads reside on the blocked_queue simplifies matters, so
1653      * change the status to OnDoProc & put the delayed thread on the blocked_queue.
1654      */
1655     CurrentTSO->why_blocked = BlockedOnDoProc;
1656     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1657 #else
1658     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1659     CurrentTSO->block_info.target = target;
1660
1661     /* Insert the new thread in the sleeping queue. */
1662     prev = NULL;
1663     t = sleeping_queue;
1664     while (t != END_TSO_QUEUE && t->block_info.target < target) {
1665         prev = t;
1666         t = t->link;
1667     }
1668
1669     CurrentTSO->link = t;
1670     if (prev == NULL) {
1671         sleeping_queue = CurrentTSO;
1672     } else {
1673         prev->link = CurrentTSO;
1674     }
1675 #endif
1676     RELEASE_LOCK(&sched_mutex);
1677     JMP_(stg_block_noregs);
1678   FE_
1679 }
1680
1681 #ifdef mingw32_TARGET_OS
1682 FN_(asyncReadzh_fast)
1683 {
1684   StgAsyncIOResult* ares;
1685   unsigned int reqID;
1686   FB_
1687     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1688     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1689     CurrentTSO->why_blocked = BlockedOnRead;
1690     ACQUIRE_LOCK(&sched_mutex);
1691     /* could probably allocate this on the heap instead */
1692     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncReadzh_fast");
1693     reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p);
1694     ares->reqID   = reqID;
1695     ares->len     = 0;
1696     ares->errCode = 0;
1697     CurrentTSO->block_info.async_result = ares;
1698     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1699     RELEASE_LOCK(&sched_mutex);
1700     JMP_(stg_block_async);
1701   FE_
1702 }
1703
1704 FN_(asyncWritezh_fast)
1705 {
1706   StgAsyncIOResult* ares;
1707   unsigned int reqID;
1708   FB_
1709     /* args: R1.i */
1710     /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
1711     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1712     CurrentTSO->why_blocked = BlockedOnWrite;
1713     ACQUIRE_LOCK(&sched_mutex);
1714     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
1715     reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p);
1716     ares->reqID   = reqID;
1717     ares->len     = 0;
1718     ares->errCode = 0;
1719     CurrentTSO->block_info.async_result = ares;
1720     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1721     RELEASE_LOCK(&sched_mutex);
1722     JMP_(stg_block_async);
1723   FE_
1724 }
1725
1726 FN_(asyncDoProczh_fast)
1727 {
1728   StgAsyncIOResult* ares;
1729   unsigned int reqID;
1730   FB_
1731     /* args: R1.i = proc, R2.i = param */
1732     ASSERT(CurrentTSO->why_blocked == NotBlocked);
1733     CurrentTSO->why_blocked = BlockedOnDoProc;
1734     ACQUIRE_LOCK(&sched_mutex);
1735     /* could probably allocate this on the heap instead */
1736     ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncDoProczh_fast");
1737     reqID = RET_STGCALL2(W_,addDoProcRequest,R1.p,R2.p);
1738     ares->reqID   = reqID;
1739     ares->len     = 0;
1740     ares->errCode = 0;
1741     CurrentTSO->block_info.async_result = ares;
1742     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1743     RELEASE_LOCK(&sched_mutex);
1744     JMP_(stg_block_async);
1745   FE_
1746 }
1747 #endif
1748