X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FPrimOps.hc;h=45717e5d1ad7c13b777c9e60e479b328e9323147;hb=7d71bf0b4e294a7cb62037aedd087519ead9ade8;hp=b571db3819ccafc1c0ce16f8dd17a9134cff66c0;hpb=f5448f5c5efe0630cb865ee0d21691a23ea932d3;p=ghc-hetmet.git diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc index b571db3..45717e5 100644 --- a/ghc/rts/PrimOps.hc +++ b/ghc/rts/PrimOps.hc @@ -1,12 +1,13 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.hc,v 1.56 2000/11/07 10:42:57 simonmar Exp $ + * $Id: PrimOps.hc,v 1.110 2003/07/23 13:39:11 simonmar Exp $ * - * (c) The GHC Team, 1998-2000 + * (c) The GHC Team, 1998-2002 * * Primitive functions / data * * ---------------------------------------------------------------------------*/ +#include "Stg.h" #include "Rts.h" #include "RtsFlags.h" @@ -17,23 +18,36 @@ #include "Storage.h" #include "BlockAlloc.h" /* tmp */ #include "StablePriv.h" -#include "HeapStackCheck.h" #include "StgRun.h" -#include "Itimer.h" +#include "Timer.h" /* TICK_MILLISECS */ #include "Prelude.h" +#ifndef mingw32_TARGET_OS +#include "Itimer.h" /* getourtimeofday() */ +#endif + +#ifdef HAVE_SYS_TYPES_H +# include +#endif + +#include + +#ifdef mingw32_TARGET_OS +#include +#include "win32/AsyncIO.h" +#endif /* ** temporary ** classes CCallable and CReturnable don't really exist, but the compiler insists on generating dictionaries containing references to GHC_ZcCCallable_static_info etc., so we provide dummy symbols - for these. + for these. Some C compilers can't cope with zero-length static arrays, + so we have to make these one element long. */ -W_ GHC_ZCCCallable_static_info[0]; -W_ GHC_ZCCReturnable_static_info[0]; - - +StgWord GHC_ZCCCallable_static_info[1]; +StgWord GHC_ZCCReturnable_static_info[1]; + /* ----------------------------------------------------------------------------- Macros for Hand-written primitives. -------------------------------------------------------------------------- */ @@ -52,10 +66,17 @@ W_ GHC_ZCCReturnable_static_info[0]; * We only define the cases actually used, to avoid having too much * garbage in this section. Warning: any bugs in here will be hard to * track down. + * + * The return convention for an unboxed tuple is as follows: + * - fit as many fields as possible in registers (as per the + * function fast-entry point calling convention). + * - sort the rest of the fields into pointers and non-pointers. + * push the pointers on the stack, followed by the non-pointers. + * (so the pointers have higher addresses). */ /*------ All Regs available */ -#if defined(REG_R8) +#if MAX_REAL_VANILLA_REG == 8 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0])); # define RET_N(a) RET_P(a) @@ -75,17 +96,11 @@ W_ GHC_ZCCReturnable_static_info[0]; R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \ JMP_(ENTRY_CODE(Sp[0])); -# define RET_NNPNNP(a,b,c,d,e,f) \ - R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \ - R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \ - JMP_(ENTRY_CODE(Sp[0])); - -#elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \ - defined(REG_R4) || defined(REG_R3) +#elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8 # error RET_n macros not defined for this setup. /*------ 2 Registers available */ -#elif defined(REG_R2) +#elif MAX_REAL_VANILLA_REG == 2 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0])); # define RET_N(a) RET_P(a) @@ -95,45 +110,38 @@ W_ GHC_ZCCReturnable_static_info[0]; # define RET_NN(a,b) RET_PP(a,b) # define RET_NP(a,b) RET_PP(a,b) -# define RET_PPP(a,b,c) \ - R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \ +# define RET_PPP(a,b,c) \ + R1.w = (W_)(a); \ + R2.w = (W_)(b); \ + Sp[-1] = (W_)(c); \ + Sp -= 1; \ JMP_(ENTRY_CODE(Sp[1])); -# define RET_NNP(a,b,c) \ - R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \ + +# define RET_NNP(a,b,c) \ + R1.w = (W_)(a); \ + R2.w = (W_)(b); \ + Sp[-1] = (W_)(c); \ + Sp -= 1; \ JMP_(ENTRY_CODE(Sp[1])); # define RET_NNNP(a,b,c,d) \ R1.w = (W_)(a); \ R2.w = (W_)(b); \ - /* Sp[-3] = ARGTAG(1); */ \ Sp[-2] = (W_)(c); \ Sp[-1] = (W_)(d); \ - Sp -= 3; \ - JMP_(ENTRY_CODE(Sp[3])); + Sp -= 2; \ + JMP_(ENTRY_CODE(Sp[2])); # define RET_NPNP(a,b,c,d) \ R1.w = (W_)(a); \ R2.w = (W_)(b); \ - /* Sp[-3] = ARGTAG(1); */ \ Sp[-2] = (W_)(c); \ Sp[-1] = (W_)(d); \ - Sp -= 3; \ - JMP_(ENTRY_CODE(Sp[3])); - -# define RET_NNPNNP(a,b,c,d,e,f) \ - R1.w = (W_)(a); \ - R2.w = (W_)(b); \ - Sp[-6] = (W_)(c); \ - /* Sp[-5] = ARGTAG(1); */ \ - Sp[-4] = (W_)(d); \ - /* Sp[-3] = ARGTAG(1); */ \ - Sp[-2] = (W_)(e); \ - Sp[-1] = (W_)(f); \ - Sp -= 6; \ - JMP_(ENTRY_CODE(Sp[6])); + Sp -= 2; \ + JMP_(ENTRY_CODE(Sp[2])); /*------ 1 Register available */ -#elif defined(REG_R1) +#elif MAX_REAL_VANILLA_REG == 1 # define RET_P(a) R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0])); # define RET_N(a) RET_P(a) @@ -143,88 +151,54 @@ W_ GHC_ZCCReturnable_static_info[0]; JMP_(ENTRY_CODE(Sp[2])); # define RET_NP(a,b) RET_PP(a,b) -# define RET_PPP(a,b,c) \ - R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \ +# define RET_PPP(a,b,c) \ + R1.w = (W_)(a); \ + Sp[-2] = (W_)(b); \ + Sp[-1] = (W_)(c); \ + Sp -= 2; \ + JMP_(ENTRY_CODE(Sp[2])); + +# define RET_NNP(a,b,c) \ + R1.w = (W_)(a); \ + Sp[-2] = (W_)(b); \ + Sp[-1] = (W_)(c); \ + Sp -= 2; \ JMP_(ENTRY_CODE(Sp[2])); -# define RET_NNP(a,b,c) \ - R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \ - JMP_(ENTRY_CODE(Sp[3])); # define RET_NNNP(a,b,c,d) \ R1.w = (W_)(a); \ - /* Sp[-5] = ARGTAG(1); */ \ - Sp[-4] = (W_)(b); \ - /* Sp[-3] = ARGTAG(1); */ \ + Sp[-3] = (W_)(b); \ Sp[-2] = (W_)(c); \ Sp[-1] = (W_)(d); \ - Sp -= 5; \ - JMP_(ENTRY_CODE(Sp[5])); + Sp -= 3; \ + JMP_(ENTRY_CODE(Sp[3])); # define RET_NPNP(a,b,c,d) \ R1.w = (W_)(a); \ - Sp[-4] = (W_)(b); \ - /* Sp[-3] = ARGTAG(1); */ \ - Sp[-2] = (W_)(c); \ + Sp[-3] = (W_)(c); \ + Sp[-2] = (W_)(b); \ Sp[-1] = (W_)(d); \ - Sp -= 4; \ - JMP_(ENTRY_CODE(Sp[4])); - -# define RET_NNPNNP(a,b,c,d,e,f) \ - R1.w = (W_)(a); \ - Sp[-1] = (W_)(f); \ - Sp[-2] = (W_)(e); \ - /* Sp[-3] = ARGTAG(1); */ \ - Sp[-4] = (W_)(d); \ - /* Sp[-5] = ARGTAG(1); */ \ - Sp[-6] = (W_)(c); \ - Sp[-7] = (W_)(b); \ - /* Sp[-8] = ARGTAG(1); */ \ - Sp -= 8; \ - JMP_(ENTRY_CODE(Sp[8])); + Sp -= 3; \ + JMP_(ENTRY_CODE(Sp[3])); #else /* 0 Regs available */ -#define PUSH_P(o,x) Sp[-o] = (W_)(x) - -#ifdef DEBUG -#define PUSH_N(o,x) Sp[1-o] = (W_)(x); Sp[-o] = ARG_TAG(1); -#else -#define PUSH_N(o,x) Sp[1-o] = (W_)(x); -#endif +#define PUSH(o,x) Sp[-o] = (W_)(x) #define PUSHED(m) Sp -= (m); JMP_(ENTRY_CODE(Sp[m])); -/* Here's how to construct these macros: - * - * N = number of N's in the name; - * P = number of P's in the name; - * s = N * 2 + P; - * while (nonNull(name)) { - * if (nextChar == 'P') { - * PUSH_P(s,_); - * s -= 1; - * } else { - * PUSH_N(s,_); - * s -= 2 - * } - * } - * PUSHED(N * 2 + P); - */ - -# define RET_P(a) PUSH_P(1,a); PUSHED(1) -# define RET_N(a) PUSH_N(2,a); PUSHED(2) +# define RET_P(a) PUSH(1,a); PUSHED(1) +# define RET_N(a) PUSH(1,a); PUSHED(1) -# define RET_PP(a,b) PUSH_P(2,a); PUSH_P(1,b); PUSHED(2) -# define RET_NN(a,b) PUSH_N(4,a); PUSH_N(2,b); PUSHED(4) -# define RET_NP(a,b) PUSH_N(3,a); PUSH_P(1,b); PUSHED(3) +# define RET_PP(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2) +# define RET_NN(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2) +# define RET_NP(a,b) PUSH(2,a); PUSH(1,b); PUSHED(2) -# define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3) -# define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5) - -# define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7) -# define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6) -# define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10) +# define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3) +# define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3) +# define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4) +# define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4) #endif /*----------------------------------------------------------------------------- @@ -244,33 +218,58 @@ W_ GHC_ZCCReturnable_static_info[0]; #define BYTES_TO_STGWORDS(n) ((n) + sizeof(W_) - 1)/sizeof(W_) -#define newByteArray(ty,scale) \ - FN_(new##ty##Arrayzh_fast) \ - { \ - W_ stuff_size, size, n; \ - StgArrWords* p; \ - FB_ \ - MAYBE_GC(NO_PTRS,new##ty##Arrayzh_fast); \ - n = R1.w; \ - stuff_size = BYTES_TO_STGWORDS(n*scale); \ - size = sizeofW(StgArrWords)+ stuff_size; \ - p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); \ - TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); \ - SET_HDR(p, &ARR_WORDS_info, CCCS); \ - p->words = stuff_size; \ - TICK_RET_UNBOXED_TUP(1) \ - RET_P(p); \ - FE_ \ +FN_(newByteArrayzh_fast) + { + W_ size, stuff_size, n; + StgArrWords* p; + FB_ + MAYBE_GC(NO_PTRS,newByteArrayzh_fast); + n = R1.w; + stuff_size = BYTES_TO_STGWORDS(n); + size = sizeofW(StgArrWords)+ stuff_size; + p = (StgArrWords *)RET_STGCALL1(P_,allocate,size); + TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); + SET_HDR(p, &stg_ARR_WORDS_info, CCCS); + p->words = stuff_size; + TICK_RET_UNBOXED_TUP(1) + RET_P(p); + FE_ } -newByteArray(Char, 1) -/* Char arrays really contain only 8-bit bytes for compatibility. */ -newByteArray(Int, sizeof(I_)) -newByteArray(Word, sizeof(W_)) -newByteArray(Addr, sizeof(P_)) -newByteArray(Float, sizeof(StgFloat)) -newByteArray(Double, sizeof(StgDouble)) -newByteArray(StablePtr, sizeof(StgStablePtr)) +FN_(newPinnedByteArrayzh_fast) + { + W_ size, stuff_size, n; + StgArrWords* p; + FB_ + MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); + n = R1.w; + stuff_size = BYTES_TO_STGWORDS(n); + + // We want an 8-byte aligned array. allocatePinned() gives us + // 8-byte aligned memory by default, but we want to align the + // *goods* inside the ArrWords object, so we have to check the + // size of the ArrWords header and adjust our size accordingly. + size = sizeofW(StgArrWords)+ stuff_size; + if ((sizeof(StgArrWords) & 7) != 0) { + size++; + } + + p = (StgArrWords *)RET_STGCALL1(P_,allocatePinned,size); + TICK_ALLOC_PRIM(sizeofW(StgArrWords),stuff_size,0); + + // Again, if the ArrWords header isn't a multiple of 8 bytes, we + // have to push the object forward one word so that the goods + // fall on an 8-byte boundary. + if ((sizeof(StgArrWords) & 7) != 0) { + ((StgPtr)p)++; + } + + SET_HDR(p, &stg_ARR_WORDS_info, CCCS); + p->words = stuff_size; + TICK_RET_UNBOXED_TUP(1) + RET_P(p); + FE_ + } FN_(newArrayzh_fast) { @@ -286,7 +285,7 @@ FN_(newArrayzh_fast) arr = (StgMutArrPtrs *)RET_STGCALL1(P_, allocate, size); TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0); - SET_HDR(arr,&MUT_ARR_PTRS_info,CCCS); + SET_HDR(arr,&stg_MUT_ARR_PTRS_info,CCCS); arr->ptrs = n; init = R2.w; @@ -306,12 +305,12 @@ FN_(newMutVarzh_fast) /* Args: R1.p = initialisation value */ FB_ - HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast); TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */ CCS_ALLOC(CCCS,sizeofW(StgMutVar)); mv = (StgMutVar *)(Hp-sizeofW(StgMutVar)+1); - SET_HDR(mv,&MUT_VAR_info,CCCS); + SET_HDR(mv,&stg_MUT_VAR_info,CCCS); mv->var = R1.cl; TICK_RET_UNBOXED_TUP(1); @@ -319,12 +318,63 @@ FN_(newMutVarzh_fast) FE_ } +FN_(atomicModifyMutVarzh_fast) +{ + StgMutVar* mv; + StgClosure *z, *x, *y, *r; + FB_ + /* Args: R1.p :: MutVar#, R2.p :: a -> (a,b) */ + + /* If x is the current contents of the MutVar#, then + We want to make the new contents point to + + (sel_0 (f x)) + + and the return value is + + (sel_1 (f x)) + + obviously we can share (f x). + + z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE) + y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE) + r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE) + */ + +#define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE)) +#define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1)) + + HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast); + CCS_ALLOC(CCCS,SIZE); + + x = ((StgMutVar *)R1.cl)->var; + + TICK_ALLOC_UP_THK(2,0); // XXX + z = (StgClosure *) Hp - THUNK_SIZE(2) + 1; + SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS); + z->payload[0] = R2.cl; + z->payload[1] = x; + + TICK_ALLOC_UP_THK(1,1); // XXX + y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1); + SET_HDR(y, &stg_sel_0_upd_info, CCCS); + y->payload[0] = z; + + ((StgMutVar *)R1.cl)->var = y; + + TICK_ALLOC_UP_THK(1,1); // XXX + r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1); + SET_HDR(r, &stg_sel_1_upd_info, CCCS); + r->payload[0] = z; + + RET_P(r); + FE_ +} + /* ----------------------------------------------------------------------------- Foreign Object Primitives - -------------------------------------------------------------------------- */ -#ifndef PAR FN_(mkForeignObjzh_fast) { /* R1.p = ptr to foreign object, @@ -332,13 +382,13 @@ FN_(mkForeignObjzh_fast) StgForeignObj *result; FB_ - HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast); TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgForeignObj)-sizeofW(StgHeader), 0); CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */ result = (StgForeignObj *) (Hp + 1 - sizeofW(StgForeignObj)); - SET_HDR(result,&FOREIGN_info,CCCS); + SET_HDR(result,&stg_FOREIGN_info,CCCS); result->data = R1.p; /* returns (# s#, ForeignObj# #) */ @@ -346,13 +396,12 @@ FN_(mkForeignObjzh_fast) RET_P(result); FE_ } -#endif /* These two are out-of-line for the benefit of the NCG */ FN_(unsafeThawArrayzh_fast) { FB_ - SET_INFO((StgClosure *)R1.cl,&MUT_ARR_PTRS_info); + SET_INFO((StgClosure *)R1.cl,&stg_MUT_ARR_PTRS_info); recordMutable((StgMutClosure*)R1.cl); TICK_RET_UNBOXED_TUP(1); @@ -364,8 +413,6 @@ FN_(unsafeThawArrayzh_fast) Weak Pointer Primitives -------------------------------------------------------------------------- */ -#ifndef PAR - FN_(mkWeakzh_fast) { /* R1.p = key @@ -376,16 +423,16 @@ FN_(mkWeakzh_fast) FB_ if (R3.cl == NULL) { - R3.cl = &NO_FINALIZER_closure; + R3.cl = &stg_NO_FINALIZER_closure; } - HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast); TICK_ALLOC_PRIM(sizeofW(StgHeader)+1, // +1 is for the link field sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0); CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */ w = (StgWeak *) (Hp + 1 - sizeofW(StgWeak)); - SET_HDR(w, &WEAK_info, CCCS); + SET_HDR(w, &stg_WEAK_info, CCCS); w->key = R1.cl; w->value = R2.cl; @@ -411,25 +458,60 @@ FN_(finalizzeWeakzh_fast) w = (StgDeadWeak *)R1.p; /* already dead? */ - if (w->header.info == &DEAD_WEAK_info) { - RET_NP(0,&NO_FINALIZER_closure); + if (w->header.info == &stg_DEAD_WEAK_info) { + RET_NP(0,&stg_NO_FINALIZER_closure); } /* kill it */ - w->header.info = &DEAD_WEAK_info; +#ifdef PROFILING + // @LDV profiling + // A weak pointer is inherently used, so we do not need to call + // LDV_recordDead_FILL_SLOP_DYNAMIC(): + // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w); + // or, LDV_recordDead(): + // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader)); + // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as + // large as weak pointers, so there is no need to fill the slop, either. + // See stg_DEAD_WEAK_info in StgMiscClosures.hc. +#endif + // + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + // + w->header.info = &stg_DEAD_WEAK_info; +#ifdef PROFILING + // @LDV profiling + LDV_recordCreate((StgClosure *)w); +#endif f = ((StgWeak *)w)->finalizer; w->link = ((StgWeak *)w)->link; /* return the finalizer */ - if (f == &NO_FINALIZER_closure) { - RET_NP(0,&NO_FINALIZER_closure); + if (f == &stg_NO_FINALIZER_closure) { + RET_NP(0,&stg_NO_FINALIZER_closure); } else { RET_NP(1,f); } FE_ } -#endif /* !PAR */ +FN_(deRefWeakzh_fast) +{ + /* R1.p = weak ptr */ + StgWeak* w; + I_ code; + P_ val; + FB_ + w = (StgWeak*)R1.p; + if (w->header.info == &stg_WEAK_info) { + code = 1; + val = (P_)((StgWeak *)w)->value; + } else { + code = 0; + val = (P_)w; + } + RET_NP(code,val); + FE_ +} /* ----------------------------------------------------------------------------- Arbitrary-precision Integer operations. @@ -444,12 +526,12 @@ FN_(int2Integerzh_fast) FB_ val = R1.i; - HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast); TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0); CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */ p = (StgArrWords *)Hp - 1; - SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, 1); + SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1); /* mpz_set_si is inlined here, makes things simpler */ if (val < 0) { @@ -481,12 +563,12 @@ FN_(word2Integerzh_fast) FB_ val = R1.w; - HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,) + HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast) TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0); CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */ p = (StgArrWords *)Hp - 1; - SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, 1); + SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, 1); if (val != 0) { s = 1; @@ -530,22 +612,22 @@ FN_(int64ToIntegerzh_fast) /* minimum is one word */ words_needed = 1; } - HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,) + HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast) TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0); CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */ p = (StgArrWords *)(Hp-words_needed+1) - 1; - SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, words_needed); + SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed); if ( val < 0LL ) { neg = 1; val = -val; - } + } hi = (W_)((LW_)val / 0x100000000ULL); if ( words_needed == 2 ) { - s = 2; + s = 2; Hp[-1] = (W_)val; Hp[0] = hi; } else if ( val != 0 ) { @@ -581,12 +663,12 @@ FN_(word64ToIntegerzh_fast) } else { words_needed = 1; } - HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,) + HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast) TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0); CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */ p = (StgArrWords *)(Hp-words_needed+1) - 1; - SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, words_needed); + SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, words_needed); hi = (W_)((LW_)val / 0x100000000ULL); if ( val >= 0x100000000ULL ) { @@ -610,7 +692,7 @@ FN_(word64ToIntegerzh_fast) } -#endif /* HAVE_LONG_LONG */ +#endif /* SUPPORT_LONG_LONGS */ /* ToDo: this is shockingly inefficient */ @@ -731,7 +813,161 @@ GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com); GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr); GMP_TAKE2_RET2(divModIntegerzh_fast, mpz_fdiv_qr); -#ifndef FLOATS_AS_DOUBLES + +FN_(gcdIntzh_fast) +{ + /* R1 = the first Int#; R2 = the second Int# */ + mp_limb_t aa; + I_ r; + FB_ + aa = (mp_limb_t)(R1.i); + r = RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(R2.i)); + + R1.i = r; + /* Result parked in R1, return via info-pointer at TOS */ + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + +FN_(gcdIntegerIntzh_fast) +{ + /* R1 = s1; R2 = d1; R3 = the int */ + I_ r; + FB_ + r = RET_STGCALL3(StgInt,mpn_gcd_1,(mp_limb_t *)(BYTE_ARR_CTS(R2.p)), R1.i, R3.i); + + R1.i = r; + /* Result parked in R1, return via info-pointer at TOS */ + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + +FN_(cmpIntegerIntzh_fast) +{ + /* R1 = s1; R2 = d1; R3 = the int */ + I_ usize; + I_ vsize; + I_ v_digit; + mp_limb_t u_digit; + FB_ + + usize = R1.i; + vsize = 0; + v_digit = R3.i; + + // paraphrased from mpz_cmp_si() in the GMP sources + if (v_digit > 0) { + vsize = 1; + } else if (v_digit < 0) { + vsize = -1; + v_digit = -v_digit; + } + + if (usize != vsize) { + R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0])); + } + + if (usize == 0) { + R1.i = 0; JMP_(ENTRY_CODE(Sp[0])); + } + + u_digit = *(mp_limb_t *)(BYTE_ARR_CTS(R2.p)); + + if (u_digit == (mp_limb_t) (unsigned long) v_digit) { + R1.i = 0; JMP_(ENTRY_CODE(Sp[0])); + } + + if (u_digit > (mp_limb_t) (unsigned long) v_digit) { + R1.i = usize; + } else { + R1.i = -usize; + } + + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + +FN_(cmpIntegerzh_fast) +{ + /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */ + I_ usize; + I_ vsize; + I_ size; + StgPtr up, vp; + int cmp; + FB_ + + // paraphrased from mpz_cmp() in the GMP sources + usize = R1.i; + vsize = R3.i; + + if (usize != vsize) { + R1.i = usize - vsize; JMP_(ENTRY_CODE(Sp[0])); + } + + if (usize == 0) { + R1.i = 0; JMP_(ENTRY_CODE(Sp[0])); + } + + size = abs(usize); + + up = BYTE_ARR_CTS(R2.p); + vp = BYTE_ARR_CTS(R4.p); + + cmp = RET_STGCALL3(I_, mpn_cmp, (mp_limb_t *)up, (mp_limb_t *)vp, size); + + if (cmp == 0) { + R1.i = 0; JMP_(ENTRY_CODE(Sp[0])); + } + + if ((cmp < 0) == (usize < 0)) { + R1.i = 1; + } else { + R1.i = (-1); + } + /* Result parked in R1, return via info-pointer at TOS */ + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + +FN_(integer2Intzh_fast) +{ + /* R1 = s; R2 = d */ + I_ r, s; + FB_ + s = R1.i; + if (s == 0) + r = 0; + else { + r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0]; + if (s < 0) r = -r; + } + /* Result parked in R1, return via info-pointer at TOS */ + R1.i = r; + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + +FN_(integer2Wordzh_fast) +{ + /* R1 = s; R2 = d */ + I_ s; + W_ r; + FB_ + s = R1.i; + if (s == 0) + r = 0; + else { + r = ((mp_limb_t *) (BYTE_ARR_CTS(R2.p)))[0]; + if (s < 0) r = -r; + } + /* Result parked in R1, return via info-pointer at TOS */ + R1.w = r; + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + + FN_(decodeFloatzh_fast) { MP_INT mantissa; @@ -743,14 +979,14 @@ FN_(decodeFloatzh_fast) /* arguments: F1 = Float# */ arg = F1; - HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast); TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0); CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */ /* Be prepared to tell Lennart-coded __decodeFloat */ /* where mantissa._mp_d can be put (it does not care about the rest) */ p = (StgArrWords *)Hp - 1; - SET_ARR_HDR(p,&ARR_WORDS_info,CCCS,1) + SET_ARR_HDR(p,&stg_ARR_WORDS_info,CCCS,1) mantissa._mp_d = (void *)BYTE_ARR_CTS(p); /* Perform the operation */ @@ -761,7 +997,6 @@ FN_(decodeFloatzh_fast) RET_NNP(exponent,mantissa._mp_size,p); FE_ } -#endif /* !FLOATS_AS_DOUBLES */ #define DOUBLE_MANTISSA_SIZE (sizeofW(StgDouble)) #define ARR_SIZE (sizeofW(StgArrWords) + DOUBLE_MANTISSA_SIZE) @@ -776,14 +1011,14 @@ FN_(decodeDoublezh_fast) /* arguments: D1 = Double# */ arg = D1; - HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,); + HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast); TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0); CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */ /* Be prepared to tell Lennart-coded __decodeDouble */ /* where mantissa.d can be put (it does not care about the rest) */ p = (StgArrWords *)(Hp-ARR_SIZE+1); - SET_ARR_HDR(p, &ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE); + SET_ARR_HDR(p, &stg_ARR_WORDS_info, CCCS, DOUBLE_MANTISSA_SIZE); mantissa._mp_d = (void *)BYTE_ARR_CTS(p); /* Perform the operation */ @@ -808,13 +1043,27 @@ FN_(forkzh_fast) /* create it right now, return ThreadID in R1 */ R1.t = RET_STGCALL2(StgTSO *, createIOThread, - RtsFlags.GcFlags.initialStkSize, R1.cl); + RtsFlags.GcFlags.initialStkSize, R1.cl); STGCALL1(scheduleThread, R1.t); /* switch at the earliest opportunity */ context_switch = 1; - JMP_(ENTRY_CODE(Sp[0])); + RET_P(R1.t); + FE_ +} + +FN_(forkProcesszh_fast) +{ + pid_t pid; + + FB_ + /* args: none */ + /* result: Pid */ + + R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO); + + RET_N(R1.i); FE_ } @@ -825,6 +1074,71 @@ FN_(yieldzh_fast) FE_ } +FN_(myThreadIdzh_fast) +{ + /* no args. */ + FB_ + RET_P((P_)CurrentTSO); + FE_ +} + +FN_(labelThreadzh_fast) +{ + FB_ + /* args: + R1.p = ThreadId# + R2.p = Addr# */ +#ifdef DEBUG + STGCALL2(labelThread,R1.p,(char *)R2.p); +#endif + JMP_(ENTRY_CODE(Sp[0])); + FE_ +} + + +/* ----------------------------------------------------------------------------- + * MVar primitives + * + * take & putMVar work as follows. Firstly, an important invariant: + * + * If the MVar is full, then the blocking queue contains only + * threads blocked on putMVar, and if the MVar is empty then the + * blocking queue contains only threads blocked on takeMVar. + * + * takeMvar: + * MVar empty : then add ourselves to the blocking queue + * MVar full : remove the value from the MVar, and + * blocking queue empty : return + * blocking queue non-empty : perform the first blocked putMVar + * from the queue, and wake up the + * thread (MVar is now full again) + * + * putMVar is just the dual of the above algorithm. + * + * How do we "perform a putMVar"? Well, we have to fiddle around with + * the stack of the thread waiting to do the putMVar. See + * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for + * the stack layout, and the PerformPut and PerformTake macros below. + * + * It is important that a blocked take or put is woken up with the + * take/put already performed, because otherwise there would be a + * small window of vulnerability where the thread could receive an + * exception and never perform its take or put, and we'd end up with a + * deadlock. + * + * -------------------------------------------------------------------------- */ + +FN_(isEmptyMVarzh_fast) +{ + /* args: R1 = MVar closure */ + I_ r; + FB_ + r = (I_)((GET_INFO((StgMVar*)(R1.p))) == &stg_EMPTY_MVAR_info); + RET_N(r); + FE_ +} + + FN_(newMVarzh_fast) { StgMVar *mvar; @@ -832,21 +1146,41 @@ FN_(newMVarzh_fast) FB_ /* args: none */ - HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast); TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds 1, 0); CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */ mvar = (StgMVar *) (Hp - sizeofW(StgMVar) + 1); - SET_HDR(mvar,&EMPTY_MVAR_info,CCCS); - mvar->head = mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure; - mvar->value = (StgClosure *)&END_TSO_QUEUE_closure; + SET_HDR(mvar,&stg_EMPTY_MVAR_info,CCCS); + mvar->head = mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure; + mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure; TICK_RET_UNBOXED_TUP(1); RET_P(mvar); FE_ } +/* If R1 isn't available, pass it on the stack */ +#ifdef REG_R1 +#define PerformTake(tso, value) ({ \ + (tso)->sp[1] = (W_)value; \ + (tso)->sp[0] = (W_)&stg_gc_unpt_r1_info; \ + }) +#else +#define PerformTake(tso, value) ({ \ + (tso)->sp[1] = (W_)value; \ + (tso)->sp[0] = (W_)&stg_ut_1_0_unreg_info; \ + }) +#endif + + +#define PerformPut(tso) ({ \ + StgClosure *val = (StgClosure *)(tso)->sp[2]; \ + (tso)->sp += 3; \ + val; \ + }) + FN_(takeMVarzh_fast) { StgMVar *mvar; @@ -867,34 +1201,62 @@ FN_(takeMVarzh_fast) /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ - if (info == &EMPTY_MVAR_info) { - if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) { + if (info == &stg_EMPTY_MVAR_info) { + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { mvar->head = CurrentTSO; } else { mvar->tail->link = CurrentTSO; } - CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure; + CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure; CurrentTSO->why_blocked = BlockedOnMVar; CurrentTSO->block_info.closure = (StgClosure *)mvar; mvar->tail = CurrentTSO; #ifdef SMP /* unlock the MVar */ - mvar->header.info = &EMPTY_MVAR_info; + mvar->header.info = &stg_EMPTY_MVAR_info; #endif - BLOCK(R1_PTR, takeMVarzh_fast); + JMP_(stg_block_takemvar); } + /* we got the value... */ val = mvar->value; - mvar->value = (StgClosure *)&END_TSO_QUEUE_closure; - /* do this last... we might have locked the MVar in the SMP case, - * and writing the info pointer will unlock it. - */ - SET_INFO(mvar,&EMPTY_MVAR_info); + if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) { + /* There are putMVar(s) waiting... + * wake up the first thread on the queue + */ + ASSERT(mvar->head->why_blocked == BlockedOnMVar); - TICK_RET_UNBOXED_TUP(1); - RET_P(val); + /* actually perform the putMVar for the thread that we just woke up */ + mvar->value = PerformPut(mvar->head); + +#if defined(GRAN) || defined(PAR) + /* ToDo: check 2nd arg (mvar) is right */ + mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); +#else + mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); +#endif + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { + mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure; + } +#ifdef SMP + /* unlock in the SMP case */ + SET_INFO(mvar,&stg_FULL_MVAR_info); +#endif + TICK_RET_UNBOXED_TUP(1); + RET_P(val); + } else { + /* No further putMVars, MVar is now empty */ + + /* do this last... we might have locked the MVar in the SMP case, + * and writing the info pointer will unlock it. + */ + SET_INFO(mvar,&stg_EMPTY_MVAR_info); + mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure; + TICK_RET_UNBOXED_TUP(1); + RET_P(val); + } FE_ } @@ -915,27 +1277,56 @@ FN_(tryTakeMVarzh_fast) info = GET_INFO(mvar); #endif - if (info == &EMPTY_MVAR_info) { + if (info == &stg_EMPTY_MVAR_info) { #ifdef SMP - /* unlock the MVar */ - mvar->header.info = &EMPTY_MVAR_info; + /* unlock the MVar */ + SET_INFO(mvar,&stg_EMPTY_MVAR_info); #endif - /* HACK: we need a pointer to pass back, so we abuse NO_FINALIZER_closure */ - RET_NP(0, &NO_FINALIZER_closure); + /* HACK: we need a pointer to pass back, + * so we abuse NO_FINALIZER_closure + */ + RET_NP(0, &stg_NO_FINALIZER_closure); } + /* we got the value... */ val = mvar->value; - mvar->value = (StgClosure *)&END_TSO_QUEUE_closure; - /* do this last... we might have locked the MVar in the SMP case, - * and writing the info pointer will unlock it. - */ - SET_INFO(mvar,&EMPTY_MVAR_info); + if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) { + /* There are putMVar(s) waiting... + * wake up the first thread on the queue + */ + ASSERT(mvar->head->why_blocked == BlockedOnMVar); + + /* actually perform the putMVar for the thread that we just woke up */ + mvar->value = PerformPut(mvar->head); + +#if defined(GRAN) || defined(PAR) + /* ToDo: check 2nd arg (mvar) is right */ + mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); +#else + mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); +#endif + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { + mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure; + } +#ifdef SMP + /* unlock in the SMP case */ + SET_INFO(mvar,&stg_FULL_MVAR_info); +#endif + } else { + /* No further putMVars, MVar is now empty */ + mvar->value = (StgClosure *)&stg_END_TSO_QUEUE_closure; + + /* do this last... we might have locked the MVar in the SMP case, + * and writing the info pointer will unlock it. + */ + SET_INFO(mvar,&stg_EMPTY_MVAR_info); + } TICK_RET_UNBOXED_TUP(1); - RET_NP(1,val); + RET_NP((I_)1, val); FE_ } @@ -955,41 +1346,115 @@ FN_(putMVarzh_fast) info = GET_INFO(mvar); #endif - if (info == &FULL_MVAR_info) { -#ifdef INTERPRETER - fprintf(stderr, "fatal: put on a full MVar in Hugs; aborting\n" ); - exit(1); -#else - R1.cl = (StgClosure *)PutFullMVar_closure; - JMP_(raisezh_fast); + if (info == &stg_FULL_MVAR_info) { + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { + mvar->head = CurrentTSO; + } else { + mvar->tail->link = CurrentTSO; + } + CurrentTSO->link = (StgTSO *)&stg_END_TSO_QUEUE_closure; + CurrentTSO->why_blocked = BlockedOnMVar; + CurrentTSO->block_info.closure = (StgClosure *)mvar; + mvar->tail = CurrentTSO; + +#ifdef SMP + /* unlock the MVar */ + SET_INFO(mvar,&stg_FULL_MVAR_info); #endif + JMP_(stg_block_putmvar); } - mvar->value = R2.cl; + if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) { + /* There are takeMVar(s) waiting: wake up the first one + */ + ASSERT(mvar->head->why_blocked == BlockedOnMVar); - /* wake up the first thread on the queue, it will continue with the - * takeMVar operation and mark the MVar empty again. - */ - if (mvar->head != (StgTSO *)&END_TSO_QUEUE_closure) { - ASSERT(mvar->head->why_blocked == BlockedOnMVar); -#if defined(GRAN) - mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); -#elif defined(PAR) - // ToDo: check 2nd arg (mvar) is right - mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); + /* actually perform the takeMVar */ + PerformTake(mvar->head, R2.cl); + +#if defined(GRAN) || defined(PAR) + /* ToDo: check 2nd arg (mvar) is right */ + mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); #else - mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); + mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); #endif - if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) { - mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure; - } + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { + mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure; + } +#ifdef SMP + /* unlocks the MVar in the SMP case */ + SET_INFO(mvar,&stg_EMPTY_MVAR_info); +#endif + JMP_(ENTRY_CODE(Sp[0])); + } else { + /* No further takes, the MVar is now full. */ + mvar->value = R2.cl; + /* unlocks the MVar in the SMP case */ + SET_INFO(mvar,&stg_FULL_MVAR_info); + JMP_(ENTRY_CODE(Sp[0])); } - /* unlocks the MVar in the SMP case */ - SET_INFO(mvar,&FULL_MVAR_info); + /* ToDo: yield afterward for better communication performance? */ + FE_ +} - /* ToDo: yield here for better communication performance? */ - JMP_(ENTRY_CODE(Sp[0])); +FN_(tryPutMVarzh_fast) +{ + StgMVar *mvar; + const StgInfoTable *info; + + FB_ + /* args: R1 = MVar, R2 = value */ + + mvar = (StgMVar *)R1.p; + +#ifdef SMP + info = LOCK_CLOSURE(mvar); +#else + info = GET_INFO(mvar); +#endif + + if (info == &stg_FULL_MVAR_info) { + +#ifdef SMP + /* unlock the MVar */ + mvar->header.info = &stg_FULL_MVAR_info; +#endif + + RET_N(0); + } + + if (mvar->head != (StgTSO *)&stg_END_TSO_QUEUE_closure) { + /* There are takeMVar(s) waiting: wake up the first one + */ + ASSERT(mvar->head->why_blocked == BlockedOnMVar); + + /* actually perform the takeMVar */ + PerformTake(mvar->head, R2.cl); + +#if defined(GRAN) || defined(PAR) + /* ToDo: check 2nd arg (mvar) is right */ + mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); +#else + mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); +#endif + if (mvar->head == (StgTSO *)&stg_END_TSO_QUEUE_closure) { + mvar->tail = (StgTSO *)&stg_END_TSO_QUEUE_closure; + } +#ifdef SMP + /* unlocks the MVar in the SMP case */ + SET_INFO(mvar,&stg_EMPTY_MVAR_info); +#endif + JMP_(ENTRY_CODE(Sp[0])); + } else { + /* No further takes, the MVar is now full. */ + mvar->value = R2.cl; + /* unlocks the MVar in the SMP case */ + SET_INFO(mvar,&stg_FULL_MVAR_info); + JMP_(ENTRY_CODE(Sp[0])); + } + + /* ToDo: yield afterward for better communication performance? */ FE_ } @@ -1003,7 +1468,7 @@ FN_(makeStableNamezh_fast) StgStableName *sn_obj; FB_ - HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,); + HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast); TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgStableName)-sizeofW(StgHeader), 0); CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */ @@ -1013,7 +1478,7 @@ FN_(makeStableNamezh_fast) /* Is there already a StableName for this heap object? */ if (stable_ptr_table[index].sn_obj == NULL) { sn_obj = (StgStableName *) (Hp - sizeofW(StgStableName) + 1); - sn_obj->header.info = &STABLE_NAME_info; + SET_HDR(sn_obj,&stg_STABLE_NAME_info,CCCS); sn_obj->sn = index; stable_ptr_table[index].sn_obj = (StgClosure *)sn_obj; } else { @@ -1024,6 +1489,102 @@ FN_(makeStableNamezh_fast) RET_P(sn_obj); } + +FN_(makeStablePtrzh_fast) +{ + /* Args: R1 = a */ + StgStablePtr sp; + FB_ + MAYBE_GC(R1_PTR, makeStablePtrzh_fast); + sp = RET_STGCALL1(StgStablePtr,getStablePtr,R1.p); + RET_N(sp); + FE_ +} + +FN_(deRefStablePtrzh_fast) +{ + /* Args: R1 = the stable ptr */ + P_ r; + StgStablePtr sp; + FB_ + sp = (StgStablePtr)R1.w; + r = stable_ptr_table[(StgWord)sp].addr; + RET_P(r); + FE_ +} + +/* ----------------------------------------------------------------------------- + Bytecode object primitives + ------------------------------------------------------------------------- */ + +FN_(newBCOzh_fast) +{ + /* R1.p = instrs + R2.p = literals + R3.p = ptrs + R4.p = itbls + R5.i = arity + R6.p = bitmap array + */ + StgBCO *bco; + nat size; + StgArrWords *bitmap_arr; + FB_ + + bitmap_arr = (StgArrWords *)R6.cl; + size = sizeofW(StgBCO) + bitmap_arr->words; + HP_CHK_GEN_TICKY(size,R1_PTR|R2_PTR|R3_PTR|R4_PTR|R6_PTR, newBCOzh_fast); + TICK_ALLOC_PRIM(size, size-sizeofW(StgHeader), 0); + CCS_ALLOC(CCCS,size); /* ccs prof */ + bco = (StgBCO *) (Hp + 1 - size); + SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS); + + bco->instrs = (StgArrWords*)R1.cl; + bco->literals = (StgArrWords*)R2.cl; + bco->ptrs = (StgMutArrPtrs*)R3.cl; + bco->itbls = (StgArrWords*)R4.cl; + bco->arity = R5.w; + bco->size = size; + + // Copy the arity/bitmap info into the BCO + { + int i; + for (i = 0; i < bitmap_arr->words; i++) { + bco->bitmap[i] = bitmap_arr->payload[i]; + } + } + + TICK_RET_UNBOXED_TUP(1); + RET_P(bco); + FE_ +} + +FN_(mkApUpd0zh_fast) +{ + // R1.p = the BCO# for the AP + // + StgPAP* ap; + FB_ + + // This function is *only* used to wrap zero-arity BCOs in an + // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always + // saturated and always points directly to a FUN or BCO. + ASSERT(get_itbl(R1.cl)->type == BCO && ((StgBCO *)R1.p)->arity == 0); + + HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast); + TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0); + CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */ + ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0)); + SET_HDR(ap, &stg_AP_info, CCCS); + + ap->n_args = 0; + ap->fun = R1.cl; + + TICK_RET_UNBOXED_TUP(1); + RET_P(ap); + FE_ +} + /* ----------------------------------------------------------------------------- Thread I/O blocking primitives -------------------------------------------------------------------------- */ @@ -1058,16 +1619,34 @@ FN_(waitWritezh_fast) FN_(delayzh_fast) { +#ifdef mingw32_TARGET_OS + StgAsyncIOResult* ares; + unsigned int reqID; +#else StgTSO *t, *prev; nat target; +#endif FB_ /* args: R1.i */ ASSERT(CurrentTSO->why_blocked == NotBlocked); CurrentTSO->why_blocked = BlockedOnDelay; ACQUIRE_LOCK(&sched_mutex); - - target = (R1.i / (TICK_MILLISECS*1000)) + timestamp + ticks_since_timestamp; +#ifdef mingw32_TARGET_OS + /* could probably allocate this on the heap instead */ + ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast"); + reqID = RET_STGCALL1(W_,addDelayRequest,R1.i); + ares->reqID = reqID; + ares->len = 0; + ares->errCode = 0; + CurrentTSO->block_info.async_result = ares; + /* Having all async-blocked threads reside on the blocked_queue simplifies matters, so + * change the status to OnDoProc & put the delayed thread on the blocked_queue. + */ + CurrentTSO->why_blocked = BlockedOnDoProc; + APPEND_TO_BLOCKED_QUEUE(CurrentTSO); +#else + target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday(); CurrentTSO->block_info.target = target; /* Insert the new thread in the sleeping queue. */ @@ -1084,9 +1663,76 @@ FN_(delayzh_fast) } else { prev->link = CurrentTSO; } - +#endif RELEASE_LOCK(&sched_mutex); JMP_(stg_block_noregs); FE_ } +#ifdef mingw32_TARGET_OS +FN_(asyncReadzh_fast) +{ + StgAsyncIOResult* ares; + unsigned int reqID; + FB_ + /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */ + ASSERT(CurrentTSO->why_blocked == NotBlocked); + CurrentTSO->why_blocked = BlockedOnRead; + ACQUIRE_LOCK(&sched_mutex); + /* could probably allocate this on the heap instead */ + ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncReadzh_fast"); + reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p); + ares->reqID = reqID; + ares->len = 0; + ares->errCode = 0; + CurrentTSO->block_info.async_result = ares; + APPEND_TO_BLOCKED_QUEUE(CurrentTSO); + RELEASE_LOCK(&sched_mutex); + JMP_(stg_block_async); + FE_ +} + +FN_(asyncWritezh_fast) +{ + StgAsyncIOResult* ares; + unsigned int reqID; + FB_ + /* args: R1.i */ + /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */ + ASSERT(CurrentTSO->why_blocked == NotBlocked); + CurrentTSO->why_blocked = BlockedOnWrite; + ACQUIRE_LOCK(&sched_mutex); + ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast"); + reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p); + ares->reqID = reqID; + ares->len = 0; + ares->errCode = 0; + CurrentTSO->block_info.async_result = ares; + APPEND_TO_BLOCKED_QUEUE(CurrentTSO); + RELEASE_LOCK(&sched_mutex); + JMP_(stg_block_async); + FE_ +} + +FN_(asyncDoProczh_fast) +{ + StgAsyncIOResult* ares; + unsigned int reqID; + FB_ + /* args: R1.i = proc, R2.i = param */ + ASSERT(CurrentTSO->why_blocked == NotBlocked); + CurrentTSO->why_blocked = BlockedOnDoProc; + ACQUIRE_LOCK(&sched_mutex); + /* could probably allocate this on the heap instead */ + ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncDoProczh_fast"); + reqID = RET_STGCALL2(W_,addDoProcRequest,R1.p,R2.p); + ares->reqID = reqID; + ares->len = 0; + ares->errCode = 0; + CurrentTSO->block_info.async_result = ares; + APPEND_TO_BLOCKED_QUEUE(CurrentTSO); + RELEASE_LOCK(&sched_mutex); + JMP_(stg_block_async); + FE_ +} +#endif