1 /* -----------------------------------------------------------------------------
2 * $Id: PrimOps.hc,v 1.103 2002/12/11 15:36:45 simonmar Exp $
4 * (c) The GHC Team, 1998-2002
6 * Primitive functions / data
8 * ---------------------------------------------------------------------------*/
14 #include "StgStartup.h"
19 #include "BlockAlloc.h" /* tmp */
20 #include "StablePriv.h"
25 #ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h>
33 classes CCallable and CReturnable don't really exist, but the
34 compiler insists on generating dictionaries containing references
35 to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
36 for these. Some C compilers can't cope with zero-length static arrays,
37 so we have to make these one element long.
40 StgWord GHC_ZCCCallable_static_info[1];
41 StgWord GHC_ZCCReturnable_static_info[1];
43 /* -----------------------------------------------------------------------------
44 Macros for Hand-written primitives.
45 -------------------------------------------------------------------------- */
48 * Horrible macros for returning unboxed tuples.
50 * How an unboxed tuple is returned depends on two factors:
51 * - the number of real registers we have available
52 * - the boxedness of the returned fields.
54 * To return an unboxed tuple from a primitive operation, we have macros
55 * RET_<layout> where <layout> describes the boxedness of each field of the
56 * unboxed tuple: N indicates a non-pointer field, and P indicates a pointer.
58 * We only define the cases actually used, to avoid having too much
59 * garbage in this section. Warning: any bugs in here will be hard to
62 * The return convention for an unboxed tuple is as follows:
63 * - fit as many fields as possible in registers (as per the
64 * function fast-entry point calling convention).
65 * - sort the rest of the fields into pointers and non-pointers.
66 * push the pointers on the stack, followed by the non-pointers.
67 * (so the pointers have higher addresses).
70 /*------ All Regs available */
71 #if MAX_REAL_VANILLA_REG == 8
72 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
73 # define RET_N(a) RET_P(a)
75 # define RET_PP(a,b) R1.w = (W_)(a); R2.w = (W_)(b); JMP_(ENTRY_CODE(Sp[0]));
76 # define RET_NN(a,b) RET_PP(a,b)
77 # define RET_NP(a,b) RET_PP(a,b)
79 # define RET_PPP(a,b,c) \
80 R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); JMP_(ENTRY_CODE(Sp[0]));
81 # define RET_NNP(a,b,c) RET_PPP(a,b,c)
83 # define RET_NNNP(a,b,c,d) \
84 R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)d; \
85 JMP_(ENTRY_CODE(Sp[0]));
87 # define RET_NPNP(a,b,c,d) \
88 R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
89 JMP_(ENTRY_CODE(Sp[0]));
91 #elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
92 # error RET_n macros not defined for this setup.
94 /*------ 2 Registers available */
95 #elif MAX_REAL_VANILLA_REG == 2
97 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
98 # define RET_N(a) RET_P(a)
100 # define RET_PP(a,b) R1.w = (W_)(a); R2.w = (W_)(b); \
101 JMP_(ENTRY_CODE(Sp[0]));
102 # define RET_NN(a,b) RET_PP(a,b)
103 # define RET_NP(a,b) RET_PP(a,b)
105 # define RET_PPP(a,b,c) \
110 JMP_(ENTRY_CODE(Sp[1]));
112 # define RET_NNP(a,b,c) \
117 JMP_(ENTRY_CODE(Sp[1]));
119 # define RET_NNNP(a,b,c,d) \
125 JMP_(ENTRY_CODE(Sp[2]));
127 # define RET_NPNP(a,b,c,d) \
133 JMP_(ENTRY_CODE(Sp[2]));
135 /*------ 1 Register available */
136 #elif MAX_REAL_VANILLA_REG == 1
137 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
138 # define RET_N(a) RET_P(a)
140 # define RET_PP(a,b) R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 1; \
141 JMP_(ENTRY_CODE(Sp[1]));
142 # define RET_NN(a,b) R1.w = (W_)(a); Sp[-1] = (W_)(b); Sp -= 2; \
143 JMP_(ENTRY_CODE(Sp[2]));
144 # define RET_NP(a,b) RET_PP(a,b)
146 # define RET_PPP(a,b,c) \
151 JMP_(ENTRY_CODE(Sp[2]));
153 # define RET_NNP(a,b,c) \
158 JMP_(ENTRY_CODE(Sp[2]));
160 # define RET_NNNP(a,b,c,d) \
166 JMP_(ENTRY_CODE(Sp[3]));
168 # define RET_NPNP(a,b,c,d) \
174 JMP_(ENTRY_CODE(Sp[3]));
176 #else /* 0 Regs available */
178 #define PUSH(o,x) Sp[-o] = (W_)(x)
180 #define PUSHED(m) Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
182 # define RET_P(a) PUSH(1,a); PUSHED(1)
183 # define RET_N(a) PUSH(1,a); PUSHED(2)
185 # define RET_PP(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2)
186 # define RET_NN(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2)
187 # define RET_NP(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2)
189 # define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
190 # define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
192 # define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)
193 # define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)
196 /*-----------------------------------------------------------------------------
199 Basically just new*Array - the others are all inline macros.
201 The size arg is always passed in R1, and the result returned in R1.
203 The slow entry point is for returning from a heap check, the saved
204 size argument must be re-loaded from the stack.
205 -------------------------------------------------------------------------- */
207 /* for objects that are *less* than the size of a word, make sure we
208 * round up to the nearest word for the size of the array.
211 #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_)
213 FN_(newByteArrayzh_fast)
215 W_ size, stuff_size, n;
218 MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
220 stuff_size = BYTES_TO_STGWORDS(n);
221 size = sizeofW(StgArrWords)+ stuff_size;
222 p = (StgArrWords *)RET_STGCALL1(P_,allocate,size);
223 TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
224 SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
225 p->words = stuff_size;
226 TICK_RET_UNBOXED_TUP(1)
231 FN_(newPinnedByteArrayzh_fast)
233 W_ size, stuff_size, n;
236 MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
238 stuff_size = BYTES_TO_STGWORDS(n);
240 // We want an 8-byte aligned array. allocatePinned() gives us
241 // 8-byte aligned memory by default, but we want to align the
242 // *goods* inside the ArrWords object, so we have to check the
243 // size of the ArrWords header and adjust our size accordingly.
244 size = sizeofW(StgArrWords)+ stuff_size;
245 if ((sizeof(StgArrWords) & 7) != 0) {
249 p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size);
250 TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0);
252 // Again, if the ArrWords header isn't a multiple of 8 bytes, we
253 // have to push the object forward one word so that the goods
254 // fall on an 8-byte boundary.
255 if ((sizeof(StgArrWords) & 7) != 0) {
259 SET_HDR(p, &stg_ARR_WORDS_info, CCCS);
260 p->words = stuff_size;
261 TICK_RET_UNBOXED_TUP(1)
274 MAYBE_GC(R2_PTR,newArrayzh_fast);
276 size = sizeofW(StgMutArrPtrs) + n;
277 arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size);
278 TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
280 SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS);
284 for (p = (P_)arr + sizeofW(StgMutArrPtrs);
285 p < (P_)arr + size; p++) {
289 TICK_RET_UNBOXED_TUP(1);
294 FN_(newMutVarzh_fast)
297 /* Args: R1.p = initialisation value */
300 HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
301 TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
302 CCS_ALLOC(CCCS,sizeofW(StgMutVar));
304 mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1);
305 SET_HDR(mv,&stg_MUT_VAR_info,CCCS);
308 TICK_RET_UNBOXED_TUP(1);
313 FN_(atomicModifyMutVarzh_fast)
316 StgClosure *z, *x, *y, *r;
318 /* Args: R1.p :: MutVar#, R2.p :: a -> (a,b) */
320 /* If x is the current contents of the MutVar#, then
321 We want to make the new contents point to
325 and the return value is
329 obviously we can share (f x).
331 z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
332 y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE)
333 r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE)
336 #define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
337 #define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
339 HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
340 CCS_ALLOC(CCCS,SIZE);
342 x = ((StgMutVar *)R1.cl)->var;
344 TICK_ALLOC_UP_THK(2,0); // XXX
345 z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
346 SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
347 z->payload[0] = R2.cl;
350 TICK_ALLOC_UP_THK(1,1); // XXX
351 y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
352 SET_HDR(y, &stg_sel_0_upd_info, CCCS);
355 ((StgMutVar *)R1.cl)->var = y;
357 TICK_ALLOC_UP_THK(1,1); // XXX
358 r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
359 SET_HDR(r, &stg_sel_1_upd_info, CCCS);
363 JMP_(ENTRY_CODE(Sp[0]));
367 /* -----------------------------------------------------------------------------
368 Foreign Object Primitives
369 -------------------------------------------------------------------------- */
371 FN_(mkForeignObjzh_fast)
373 /* R1.p = ptr to foreign object,
375 StgForeignObj *result;
378 HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
379 TICK_ALLOC_PRIM(sizeofW(StgHeader),
380 sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
381 CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
383 result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj));
384 SET_HDR(result,&stg_FOREIGN_info,CCCS);
387 /* returns (# s#, ForeignObj# #) */
388 TICK_RET_UNBOXED_TUP(1);
393 /* These two are out-of-line for the benefit of the NCG */
394 FN_(unsafeThawArrayzh_fast)
397 SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info);
398 recordMutable((StgMutClosure*)R1.cl);
400 TICK_RET_UNBOXED_TUP(1);
405 /* -----------------------------------------------------------------------------
406 Weak Pointer Primitives
407 -------------------------------------------------------------------------- */
413 R3.p = finalizer (or NULL)
419 R3.cl = &stg_NO_FINALIZER_closure;
422 HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
423 TICK_ALLOC_PRIM(sizeofW(StgHeader)+1, // +1 is for the link field
424 sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
425 CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
427 w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak));
428 SET_HDR(w, &stg_WEAK_info, CCCS);
432 w->finalizer = R3.cl;
434 w->link = weak_ptr_list;
436 IF_DEBUG(weak, fprintf(stderr,"New weak pointer at %p\n",w));
438 TICK_RET_UNBOXED_TUP(1);
443 FN_(finalizzeWeakzh_fast)
450 TICK_RET_UNBOXED_TUP(0);
451 w = (StgDeadWeak *)R1.p;
454 if (w->header.info == &stg_DEAD_WEAK_info) {
455 RET_NP(0,&stg_NO_FINALIZER_closure);
461 // A weak pointer is inherently used, so we do not need to call
462 // LDV_recordDead_FILL_SLOP_DYNAMIC():
463 // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
464 // or, LDV_recordDead():
465 // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
466 // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
467 // large as weak pointers, so there is no need to fill the slop, either.
468 // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
471 // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
473 w->header.info = &stg_DEAD_WEAK_info;
476 LDV_recordCreate((StgClosure *)w);
478 f = ((StgWeak *)w)->finalizer;
479 w->link = ((StgWeak *)w)->link;
481 /* return the finalizer */
482 if (f == &stg_NO_FINALIZER_closure) {
483 RET_NP(0,&stg_NO_FINALIZER_closure);
490 FN_(deRefWeakzh_fast)
492 /* R1.p = weak ptr */
498 if (w->header.info == &stg_WEAK_info) {
500 val = (P_)((StgWeak *)w)->value;
509 /* -----------------------------------------------------------------------------
510 Arbitrary-precision Integer operations.
511 -------------------------------------------------------------------------- */
513 FN_(int2Integerzh_fast)
515 /* arguments: R1 = Int# */
517 I_ val, s; /* to avoid aliasing */
518 StgArrWords* p; /* address of array result */
522 HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
523 TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
524 CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
526 p = (StgArrWords *)Hp - 1;
527 SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
529 /* mpz_set_si is inlined here, makes things simpler */
533 } else if (val > 0) {
540 /* returns (# size :: Int#,
544 TICK_RET_UNBOXED_TUP(2);
549 FN_(word2Integerzh_fast)
551 /* arguments: R1 = Word# */
553 W_ val; /* to avoid aliasing */
555 StgArrWords* p; /* address of array result */
559 HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
560 TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
561 CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
563 p = (StgArrWords *)Hp - 1;
564 SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1);
573 /* returns (# size :: Int#,
577 TICK_RET_UNBOXED_TUP(2);
584 * 'long long' primops for converting to/from Integers.
587 #ifdef SUPPORT_LONG_LONGS
589 FN_(int64ToIntegerzh_fast)
591 /* arguments: L1 = Int64# */
593 StgInt64 val; /* to avoid aliasing */
595 I_ s, neg, words_needed;
596 StgArrWords* p; /* address of array result */
602 if ( val >= 0x100000000LL || val <= -0x100000000LL ) {
605 /* minimum is one word */
608 HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
609 TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
610 CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
612 p = (StgArrWords *)(Hp-words_needed+1) - 1;
613 SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
620 hi = (W_)((LW_)val / 0x100000000ULL);
622 if ( words_needed == 2 ) {
626 } else if ( val != 0 ) {
629 } else /* val==0 */ {
632 s = ( neg ? -s : s );
634 /* returns (# size :: Int#,
638 TICK_RET_UNBOXED_TUP(2);
643 FN_(word64ToIntegerzh_fast)
645 /* arguments: L1 = Word64# */
647 StgWord64 val; /* to avoid aliasing */
650 StgArrWords* p; /* address of array result */
654 if ( val >= 0x100000000ULL ) {
659 HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
660 TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
661 CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
663 p = (StgArrWords *)(Hp-words_needed+1) - 1;
664 SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed);
666 hi = (W_)((LW_)val / 0x100000000ULL);
667 if ( val >= 0x100000000ULL ) {
671 } else if ( val != 0 ) {
674 } else /* val==0 */ {
678 /* returns (# size :: Int#,
682 TICK_RET_UNBOXED_TUP(2);
688 #endif /* SUPPORT_LONG_LONGS */
690 /* ToDo: this is shockingly inefficient */
692 #define GMP_TAKE2_RET1(name,mp_fun) \
695 MP_INT arg1, arg2, result; \
701 /* call doYouWantToGC() */ \
702 MAYBE_GC(R2_PTR | R4_PTR, name); \
704 d1 = (StgArrWords *)R2.p; \
706 d2 = (StgArrWords *)R4.p; \
709 arg1._mp_alloc = d1->words; \
710 arg1._mp_size = (s1); \
711 arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
712 arg2._mp_alloc = d2->words; \
713 arg2._mp_size = (s2); \
714 arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
716 STGCALL1(mpz_init,&result); \
718 /* Perform the operation */ \
719 STGCALL3(mp_fun,&result,&arg1,&arg2); \
721 TICK_RET_UNBOXED_TUP(2); \
722 RET_NP(result._mp_size, \
723 result._mp_d-sizeofW(StgArrWords)); \
727 #define GMP_TAKE1_RET1(name,mp_fun) \
730 MP_INT arg1, result; \
735 /* call doYouWantToGC() */ \
736 MAYBE_GC(R2_PTR, name); \
738 d1 = (StgArrWords *)R2.p; \
741 arg1._mp_alloc = d1->words; \
742 arg1._mp_size = (s1); \
743 arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
745 STGCALL1(mpz_init,&result); \
747 /* Perform the operation */ \
748 STGCALL2(mp_fun,&result,&arg1); \
750 TICK_RET_UNBOXED_TUP(2); \
751 RET_NP(result._mp_size, \
752 result._mp_d-sizeofW(StgArrWords)); \
756 #define GMP_TAKE2_RET2(name,mp_fun) \
759 MP_INT arg1, arg2, result1, result2; \
765 /* call doYouWantToGC() */ \
766 MAYBE_GC(R2_PTR | R4_PTR, name); \
768 d1 = (StgArrWords *)R2.p; \
770 d2 = (StgArrWords *)R4.p; \
773 arg1._mp_alloc = d1->words; \
774 arg1._mp_size = (s1); \
775 arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
776 arg2._mp_alloc = d2->words; \
777 arg2._mp_size = (s2); \
778 arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
780 STGCALL1(mpz_init,&result1); \
781 STGCALL1(mpz_init,&result2); \
783 /* Perform the operation */ \
784 STGCALL4(mp_fun,&result1,&result2,&arg1,&arg2); \
786 TICK_RET_UNBOXED_TUP(4); \
787 RET_NPNP(result1._mp_size, \
788 result1._mp_d-sizeofW(StgArrWords), \
790 result2._mp_d-sizeofW(StgArrWords)); \
794 GMP_TAKE2_RET1(plusIntegerzh_fast, mpz_add);
795 GMP_TAKE2_RET1(minusIntegerzh_fast, mpz_sub);
796 GMP_TAKE2_RET1(timesIntegerzh_fast, mpz_mul);
797 GMP_TAKE2_RET1(gcdIntegerzh_fast, mpz_gcd);
798 GMP_TAKE2_RET1(quotIntegerzh_fast, mpz_tdiv_q);
799 GMP_TAKE2_RET1(remIntegerzh_fast, mpz_tdiv_r);
800 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact);
801 GMP_TAKE2_RET1(andIntegerzh_fast, mpz_and);
802 GMP_TAKE2_RET1(orIntegerzh_fast, mpz_ior);
803 GMP_TAKE2_RET1(xorIntegerzh_fast, mpz_xor);
804 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com);
806 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr);
807 GMP_TAKE2_RET2(divModIntegerzh_fast, mpz_fdiv_qr);
812 /* R1 = the first Int#; R2 = the second Int# */
816 aa = (mp_limb_t)(R1.i);
817 r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i));
820 /* Result parked in R1, return via info-pointer at TOS */
821 JMP_(ENTRY_CODE(Sp[0]));
825 FN_(gcdIntegerIntzh_fast)
827 /* R1 = s1; R2 = d1; R3 = the int */
830 r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i);
833 /* Result parked in R1, return via info-pointer at TOS */
834 JMP_(ENTRY_CODE(Sp[0]));
838 FN_(cmpIntegerIntzh_fast)
840 /* R1 = s1; R2 = d1; R3 = the int */
851 // paraphrased from mpz_cmp_si() in the GMP sources
854 } else if (v_digit < 0) {
859 if (usize != vsize) {
860 R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
864 R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
867 u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p));
869 if (u_digit == (mp_limb_t) (unsigned long) v_digit) {
870 R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
873 if (u_digit > (mp_limb_t) (unsigned long) v_digit) {
879 JMP_(ENTRY_CODE(Sp[0]));
883 FN_(cmpIntegerzh_fast)
885 /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
893 // paraphrased from mpz_cmp() in the GMP sources
897 if (usize != vsize) {
898 R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0]));
902 R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
907 up = BYTE_ARR_CTS(R2.p);
908 vp = BYTE_ARR_CTS(R4.p);
910 cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size);
913 R1.i = 0; JMP_(ENTRY_CODE(Sp[0]));
916 if ((cmp < 0) == (usize < 0)) {
921 /* Result parked in R1, return via info-pointer at TOS */
922 JMP_(ENTRY_CODE(Sp[0]));
926 FN_(integer2Intzh_fast)
935 r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
938 /* Result parked in R1, return via info-pointer at TOS */
940 JMP_(ENTRY_CODE(Sp[0]));
944 FN_(integer2Wordzh_fast)
954 r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0];
957 /* Result parked in R1, return via info-pointer at TOS */
959 JMP_(ENTRY_CODE(Sp[0]));
964 FN_(decodeFloatzh_fast)
972 /* arguments: F1 = Float# */
975 HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
976 TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
977 CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
979 /* Be prepared to tell Lennart-coded __decodeFloat */
980 /* where mantissa._mp_d can be put (it does not care about the rest) */
981 p = (StgArrWords *)Hp - 1;
982 SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1)
983 mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
985 /* Perform the operation */
986 STGCALL3(__decodeFloat,&mantissa,&exponent,arg);
988 /* returns: (Int# (expn), Int#, ByteArray#) */
989 TICK_RET_UNBOXED_TUP(3);
990 RET_NNP(exponent,mantissa._mp_size,p);
994 #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble))
995 #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE)
997 FN_(decodeDoublezh_fast)
1004 /* arguments: D1 = Double# */
1007 HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
1008 TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
1009 CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
1011 /* Be prepared to tell Lennart-coded __decodeDouble */
1012 /* where mantissa.d can be put (it does not care about the rest) */
1013 p = (StgArrWords *)(Hp-ARR_SIZE+1);
1014 SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE);
1015 mantissa._mp_d = (void *)BYTE_ARR_CTS(p);
1017 /* Perform the operation */
1018 STGCALL3(__decodeDouble,&mantissa,&exponent,arg);
1020 /* returns: (Int# (expn), Int#, ByteArray#) */
1021 TICK_RET_UNBOXED_TUP(3);
1022 RET_NNP(exponent,mantissa._mp_size,p);
1026 /* -----------------------------------------------------------------------------
1027 * Concurrency primitives
1028 * -------------------------------------------------------------------------- */
1033 /* args: R1 = closure to spark */
1035 MAYBE_GC(R1_PTR, forkzh_fast);
1037 /* create it right now, return ThreadID in R1 */
1038 R1.t = RET_STGCALL2(StgTSO *, createIOThread,
1039 RtsFlags.GcFlags.initialStkSize, R1.cl);
1040 STGCALL1(scheduleThread, R1.t);
1042 /* switch at the earliest opportunity */
1049 FN_(forkProcesszh_fast)
1057 R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
1059 JMP_(ENTRY_CODE(Sp[0]));
1067 JMP_(stg_yield_noregs);
1071 FN_(myThreadIdzh_fast)
1075 RET_P((P_)CurrentTSO);
1079 FN_(labelThreadzh_fast)
1086 STGCALL2(labelThread,R1.p,(char *)R2.p);
1088 JMP_(ENTRY_CODE(Sp[0]));
1093 /* -----------------------------------------------------------------------------
1096 * take & putMVar work as follows. Firstly, an important invariant:
1098 * If the MVar is full, then the blocking queue contains only
1099 * threads blocked on putMVar, and if the MVar is empty then the
1100 * blocking queue contains only threads blocked on takeMVar.
1103 * MVar empty : then add ourselves to the blocking queue
1104 * MVar full : remove the value from the MVar, and
1105 * blocking queue empty : return
1106 * blocking queue non-empty : perform the first blocked putMVar
1107 * from the queue, and wake up the
1108 * thread (MVar is now full again)
1110 * putMVar is just the dual of the above algorithm.
1112 * How do we "perform a putMVar"? Well, we have to fiddle around with
1113 * the stack of the thread waiting to do the putMVar. See
1114 * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1115 * the stack layout, and the PerformPut and PerformTake macros below.
1117 * It is important that a blocked take or put is woken up with the
1118 * take/put already performed, because otherwise there would be a
1119 * small window of vulnerability where the thread could receive an
1120 * exception and never perform its take or put, and we'd end up with a
1123 * -------------------------------------------------------------------------- */
1125 FN_(isEmptyMVarzh_fast)
1127 /* args: R1 = MVar closure */
1130 r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info);
1143 HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
1144 TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
1146 CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
1148 mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1);
1149 SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS);
1150 mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1151 mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1153 TICK_RET_UNBOXED_TUP(1);
1158 /* If R1 isn't available, pass it on the stack */
1160 #define PerformTake(tso, value) ({ \
1161 (tso)->sp[1] = (W_)value; \
1162 (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info; \
1165 #define PerformTake(tso, value) ({ \
1166 (tso)->sp[1] = (W_)value; \
1167 (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info; \
1172 #define PerformPut(tso) ({ \
1173 StgClosure *val = (StgClosure *)(tso)->sp[2]; \
1178 FN_(takeMVarzh_fast)
1182 const StgInfoTable *info;
1185 /* args: R1 = MVar closure */
1187 mvar = (StgMVar *)R1.p;
1190 info = LOCK_CLOSURE(mvar);
1192 info = GET_INFO(mvar);
1195 /* If the MVar is empty, put ourselves on its blocking queue,
1196 * and wait until we're woken up.
1198 if (info == &stg_EMPTY_MVAR_info) {
1199 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1200 mvar->head = CurrentTSO;
1202 mvar->tail->link = CurrentTSO;
1204 CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1205 CurrentTSO->why_blocked = BlockedOnMVar;
1206 CurrentTSO->block_info.closure = (StgClosure *)mvar;
1207 mvar->tail = CurrentTSO;
1210 /* unlock the MVar */
1211 mvar->header.info = &stg_EMPTY_MVAR_info;
1213 JMP_(stg_block_takemvar);
1216 /* we got the value... */
1219 if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1220 /* There are putMVar(s) waiting...
1221 * wake up the first thread on the queue
1223 ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1225 /* actually perform the putMVar for the thread that we just woke up */
1226 mvar->value = PerformPut(mvar->head);
1228 #if defined(GRAN) || defined(PAR)
1229 /* ToDo: check 2nd arg (mvar) is right */
1230 mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1232 mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1234 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1235 mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1238 /* unlock in the SMP case */
1239 SET_INFO(mvar,&stg_FULL_MVAR_info);
1241 TICK_RET_UNBOXED_TUP(1);
1244 /* No further putMVars, MVar is now empty */
1246 /* do this last... we might have locked the MVar in the SMP case,
1247 * and writing the info pointer will unlock it.
1249 SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1250 mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1251 TICK_RET_UNBOXED_TUP(1);
1257 FN_(tryTakeMVarzh_fast)
1261 const StgInfoTable *info;
1264 /* args: R1 = MVar closure */
1266 mvar = (StgMVar *)R1.p;
1269 info = LOCK_CLOSURE(mvar);
1271 info = GET_INFO(mvar);
1274 if (info == &stg_EMPTY_MVAR_info) {
1277 /* unlock the MVar */
1278 SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1281 /* HACK: we need a pointer to pass back,
1282 * so we abuse NO_FINALIZER_closure
1284 RET_NP(0, &stg_NO_FINALIZER_closure);
1287 /* we got the value... */
1290 if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1291 /* There are putMVar(s) waiting...
1292 * wake up the first thread on the queue
1294 ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1296 /* actually perform the putMVar for the thread that we just woke up */
1297 mvar->value = PerformPut(mvar->head);
1299 #if defined(GRAN) || defined(PAR)
1300 /* ToDo: check 2nd arg (mvar) is right */
1301 mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1303 mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1305 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1306 mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1309 /* unlock in the SMP case */
1310 SET_INFO(mvar,&stg_FULL_MVAR_info);
1313 /* No further putMVars, MVar is now empty */
1314 mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure;
1316 /* do this last... we might have locked the MVar in the SMP case,
1317 * and writing the info pointer will unlock it.
1319 SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1322 TICK_RET_UNBOXED_TUP(1);
1330 const StgInfoTable *info;
1333 /* args: R1 = MVar, R2 = value */
1335 mvar = (StgMVar *)R1.p;
1338 info = LOCK_CLOSURE(mvar);
1340 info = GET_INFO(mvar);
1343 if (info == &stg_FULL_MVAR_info) {
1344 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1345 mvar->head = CurrentTSO;
1347 mvar->tail->link = CurrentTSO;
1349 CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1350 CurrentTSO->why_blocked = BlockedOnMVar;
1351 CurrentTSO->block_info.closure = (StgClosure *)mvar;
1352 mvar->tail = CurrentTSO;
1355 /* unlock the MVar */
1356 SET_INFO(mvar,&stg_FULL_MVAR_info);
1358 JMP_(stg_block_putmvar);
1361 if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1362 /* There are takeMVar(s) waiting: wake up the first one
1364 ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1366 /* actually perform the takeMVar */
1367 PerformTake(mvar->head, R2.cl);
1369 #if defined(GRAN) || defined(PAR)
1370 /* ToDo: check 2nd arg (mvar) is right */
1371 mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1373 mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1375 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1376 mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1379 /* unlocks the MVar in the SMP case */
1380 SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1382 JMP_(ENTRY_CODE(Sp[0]));
1384 /* No further takes, the MVar is now full. */
1385 mvar->value = R2.cl;
1386 /* unlocks the MVar in the SMP case */
1387 SET_INFO(mvar,&stg_FULL_MVAR_info);
1388 JMP_(ENTRY_CODE(Sp[0]));
1391 /* ToDo: yield afterward for better communication performance? */
1395 FN_(tryPutMVarzh_fast)
1398 const StgInfoTable *info;
1401 /* args: R1 = MVar, R2 = value */
1403 mvar = (StgMVar *)R1.p;
1406 info = LOCK_CLOSURE(mvar);
1408 info = GET_INFO(mvar);
1411 if (info == &stg_FULL_MVAR_info) {
1414 /* unlock the MVar */
1415 mvar->header.info = &stg_FULL_MVAR_info;
1421 if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1422 /* There are takeMVar(s) waiting: wake up the first one
1424 ASSERT(mvar->head->why_blocked == BlockedOnMVar);
1426 /* actually perform the takeMVar */
1427 PerformTake(mvar->head, R2.cl);
1429 #if defined(GRAN) || defined(PAR)
1430 /* ToDo: check 2nd arg (mvar) is right */
1431 mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
1433 mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
1435 if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) {
1436 mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure;
1439 /* unlocks the MVar in the SMP case */
1440 SET_INFO(mvar,&stg_EMPTY_MVAR_info);
1442 JMP_(ENTRY_CODE(Sp[0]));
1444 /* No further takes, the MVar is now full. */
1445 mvar->value = R2.cl;
1446 /* unlocks the MVar in the SMP case */
1447 SET_INFO(mvar,&stg_FULL_MVAR_info);
1448 JMP_(ENTRY_CODE(Sp[0]));
1451 /* ToDo: yield afterward for better communication performance? */
1455 /* -----------------------------------------------------------------------------
1456 Stable pointer primitives
1457 ------------------------------------------------------------------------- */
1459 FN_(makeStableNamezh_fast)
1462 StgStableName *sn_obj;
1465 HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
1466 TICK_ALLOC_PRIM(sizeofW(StgHeader),
1467 sizeofW(StgStableName)-sizeofW(StgHeader), 0);
1468 CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
1470 index = RET_STGCALL1(StgWord,lookupStableName,R1.p);
1472 /* Is there already a StableName for this heap object? */
1473 if (stable_ptr_table[index].sn_obj == NULL) {
1474 sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1);
1475 SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS);
1477 stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj;
1479 (StgClosure *)sn_obj = stable_ptr_table[index].sn_obj;
1482 TICK_RET_UNBOXED_TUP(1);
1487 FN_(makeStablePtrzh_fast)
1492 MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1493 sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p);
1498 FN_(deRefStablePtrzh_fast)
1500 /* Args: R1 = the stable ptr */
1504 sp = (StgStablePtr)R1.w;
1505 r = stable_ptr_table[(StgWord)sp].addr;
1510 /* -----------------------------------------------------------------------------
1511 Bytecode object primitives
1512 ------------------------------------------------------------------------- */
1524 HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast);
1525 TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
1526 CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
1527 bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
1528 SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
1530 bco->instrs = (StgArrWords*)R1.cl;
1531 bco->literals = (StgArrWords*)R2.cl;
1532 bco->ptrs = (StgMutArrPtrs*)R3.cl;
1533 bco->itbls = (StgArrWords*)R4.cl;
1535 TICK_RET_UNBOXED_TUP(1);
1540 FN_(mkApUpd0zh_fast)
1542 // R1.p = the BCO# for the AP
1547 // This function is *only* used to wrap zero-arity BCOs in an
1548 // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always
1549 // saturated and always points directly to a FUN or BCO.
1550 ASSERT(get_itbl(R1.cl)->type == BCO && BCO_ARITY(R1.p) == 0);
1552 HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
1553 TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
1554 CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
1555 ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
1556 SET_HDR(ap, &stg_AP_info, CCCS);
1561 TICK_RET_UNBOXED_TUP(1);
1566 /* -----------------------------------------------------------------------------
1567 Thread I/O blocking primitives
1568 -------------------------------------------------------------------------- */
1570 FN_(waitReadzh_fast)
1574 ASSERT(CurrentTSO->why_blocked == NotBlocked);
1575 CurrentTSO->why_blocked = BlockedOnRead;
1576 CurrentTSO->block_info.fd = R1.i;
1577 ACQUIRE_LOCK(&sched_mutex);
1578 APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1579 RELEASE_LOCK(&sched_mutex);
1580 JMP_(stg_block_noregs);
1584 FN_(waitWritezh_fast)
1588 ASSERT(CurrentTSO->why_blocked == NotBlocked);
1589 CurrentTSO->why_blocked = BlockedOnWrite;
1590 CurrentTSO->block_info.fd = R1.i;
1591 ACQUIRE_LOCK(&sched_mutex);
1592 APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1593 RELEASE_LOCK(&sched_mutex);
1594 JMP_(stg_block_noregs);
1604 ASSERT(CurrentTSO->why_blocked == NotBlocked);
1605 CurrentTSO->why_blocked = BlockedOnDelay;
1607 ACQUIRE_LOCK(&sched_mutex);
1609 target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
1610 CurrentTSO->block_info.target = target;
1612 /* Insert the new thread in the sleeping queue. */
1615 while (t != END_TSO_QUEUE && t->block_info.target < target) {
1620 CurrentTSO->link = t;
1622 sleeping_queue = CurrentTSO;
1624 prev->link = CurrentTSO;
1627 RELEASE_LOCK(&sched_mutex);
1628 JMP_(stg_block_noregs);