%
% (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
%
\section[StgMacros]{C macros used in GHC-generated \tr{.hc} files}

\begin{code}
#ifndef STGMACROS_H
#define STGMACROS_H
\end{code}

%************************************************************************
%*									*
\subsection[StgMacros-abbrev]{Abbreviatory(?) and general macros}
%*									*
%************************************************************************

Mere abbreviations:
\begin{code}
/* for function declarations */
#define STGFUN(f)  F_ f(STG_NO_ARGS)
#define STATICFUN(f) static F_ f(STG_NO_ARGS)

/* for functions/data that are really external to this module */
#define EXTFUN(f)	extern F_ f(STG_NO_ARGS)
#define EXTDATA(d)	extern W_ d[]
#define EXTDATA_RO(d)	extern const W_ d[] /* read-only */

/* for fwd decls to functions/data somewhere else in this module */
/* (identical for the mo') */
#define INTFUN(f)	static F_ f(STG_NO_ARGS)
#define INTDATA(d)	extern W_ d[]
#define INTDATA_RO(d)	extern const W_ d[] /* read-only */

/* short forms of most of the above */

#define FN_(f)		F_ f(STG_NO_ARGS)
#define IFN_(f)		static F_ f(STG_NO_ARGS)
#define EF_(f)		extern F_ f(STG_NO_ARGS)
#define ED_(d)		extern W_ d[]
#define ED_RO_(d)	extern const W_ d[] /* read-only */
#define IF_(f)		static F_ f(STG_NO_ARGS)

/* GCC is uncooperative about the next one: */
/* But, the "extern" prevents initialisation... ADR */
#if defined(__GNUC__)
#define ID_(d)		extern W_ d[]
#define ID_RO_(d)	extern const W_ d[] /* read-only */
#else
#define ID_(d)		static W_ d[]
#define ID_RO_(d)	static const W_ d[] /* read-only */
#endif /* not GCC */
\end{code}

General things; note: general-but-``machine-dependent'' macros are
given in \tr{StgMachDeps.lh}.
\begin{code}
#define STG_MAX(a,b)	(((a)>=(b)) ? (a) : (b))

/*
Macros to combine two short words into a single
word and split such a word back into two.

Dependent on machine word size :-)
*/

#define	COMBINE_WORDS(word,short1,short2)		\
	do {						\
	    ((packed_shorts *)&(word))->wu.s1 = short1;	\
	    ((packed_shorts *)&(word))->wu.s2 = short2;	\
	} while(0)

#define SPLIT_WORD(word,short1,short2)			\
	do {						\
	    short1 = ((packed_shorts *)&(word))->wu.s1;	\
	    short2 = ((packed_shorts *)&(word))->wu.s2;	\
	} while(0)

\end{code}

%************************************************************************
%*									*
\subsection[StgMacros-gen-stg]{General STGish macros}
%*									*
%************************************************************************

Common sizes of vector-return tables.

Claim: don't need fwd decls for return pts in \tr{VECTBL*}, because
the AbsC flattener ensures that things come out sufficiently
``backwards''.

\begin{code}
#ifdef __STG_REV_TBLS__
#define UNVECTBL(staticp,label,a)   /* nothing */
#else
#define UNVECTBL(staticp,label,a) \
EXTFUN(a); \
staticp const W_ label[] = { \
  (W_) a \
};
#endif
\end{code}

\begin{code}
#if defined(USE_SPLIT_MARKERS)
#define __STG_SPLIT_MARKER(n) FN_(CAT2(__stg_split_marker,n)){ }
#else
#define __STG_SPLIT_MARKER(n) /* nothing */
#endif
\end{code}

%************************************************************************
%*									*
\subsection[StgMacros-exceptions]{Exception-checking macros}
%*									*
%************************************************************************

Argument-satisfaction check, stack(s) overflow check, heap overflow
check.

The @SUBTRACT(upper, lower)@ macros return a positive result in words
indicating the amount by which upper is above lower on the stack.

\begin{code}
#define SUBTRACT_A_STK( upper, lower ) AREL( (lower) - (upper) )
#define SUBTRACT_B_STK( upper, lower ) BREL( (lower) - (upper) )
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-arg-satis]{Argument-satisfaction checks}
%*									*
%************************************************************************

@ARGS_CHK(n)@ sees of there are @n@ words of args on the A/B stack.
If not, it jumps to @UpdatePAP@.

@ARGS_CHK@ args are pre-directionified.
Notice that we do the comparisons in the form (x < a+n), for
some constant n.  This generates more efficient code (with GCC at least)
than (x-a < n).

\begin{code}
#define ARGS_CHK_A(n)						\
	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
	if (SuA /*SUBTRACT_A_STK( SpA, SuA )*/ < (SpA+(n))) {	\
		JMP_( UpdatePAP );				\
	}							\
	SET_ACTIVITY(ACT_TAILCALL)

#define ARGS_CHK_A_LOAD_NODE(n, closure_addr)			\
	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
	if (SuA /*SUBTRACT_A_STK( SpA, SuA )*/ < (SpA+(n))) {	\
		Node = (P_) closure_addr;			\
		JMP_( UpdatePAP );				\
	}							\
	SET_ACTIVITY(ACT_TAILCALL)


#define ARGS_CHK_B(n)						\
	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
	if (SpB /*SUBTRACT_B_STK( SpB, SuB )*/ < (SuB-(n))) {	\
		JMP_( UpdatePAP );				\
	}							\
	SET_ACTIVITY(ACT_TAILCALL)


#define ARGS_CHK_B_LOAD_NODE(n, closure_addr)			\
	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
	if (SpB /*SUBTRACT_B_STK( SpB, SuB )*/ < (SuB-(n))) {	\
		Node = (P_) closure_addr;			\
		JMP_( UpdatePAP );				\
	}							\
	SET_ACTIVITY(ACT_TAILCALL)

\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-stk-chks]{Stack-overflow check}
%*									*
%************************************************************************

@STK_CHK(a,b)@ [misc args omitted...] checks that we can allocate @a@
words of A stack and @b@ words of B stack.  If not, it calls
@StackOverflow@ (which dies).

(It will be different in the parallel case.)

NB: args @a@ and @b@ are pre-direction-ified!
\begin{code}
extern I_ SqueezeUpdateFrames PROTO((P_, P_, P_));

#if ! defined(CONCURRENT)

extern void StackOverflow(STG_NO_ARGS) STG_NORETURN;

#if STACK_CHECK_BY_PAGE_FAULT

#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter)	\
    /* use memory protection instead; still need ticky-ness */

#else

#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter)	\
    ULTRASAFESTGCALL0(void,(void *),StackOverflow)

#endif /* not using page-faulting */

#else /* threaded */

extern I_ StackOverflow PROTO((W_, W_));

/*
 * On a uniprocessor, we do *NOT* context switch on a stack overflow 
 * (though we may GC).  Therefore, we never have to reenter node.
 */

#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter) \
    DO_STACKOVERFLOW((hda+hdb)<<2|((rtype)<<1)|(reenter),((spa)<<20)|((spb)<<8)|(liveness))

#define STACK_OVERFLOW_HEADROOM(args,y)	    ((args) >> 2)
#define STACK_OVERFLOW_PRIM_RETURN(args,y)  ((args) & 2)
#define STACK_OVERFLOW_REENTER(args,y)	    ((args) & 1)

#define STACK_OVERFLOW_AWORDS(x,args)	    (((args) >> 20) & 0x0fff)
#define STACK_OVERFLOW_BWORDS(x,args)	    (((args) >> 8) & 0x0fff)
#define STACK_OVERFLOW_LIVENESS(x,args)	    ((args) & 0xff)

#endif	/* CONCURRENT */

#define STK_CHK(liveness_mask,a_headroom,b_headroom,spa,spb,ret_type,reenter)\
do {								\
  DO_ASTK_HWM(); /* ticky-ticky profiling */			\
  DO_BSTK_HWM();						\
  /* SET_ACTIVITY(ACT_STK_CHK); /? SPAT counting -- no, using page faulting */	\
  if (STKS_OVERFLOW_OP((a_headroom) + 1, (b_headroom) + 1)) {	\
    STACK_OVERFLOW(liveness_mask,a_headroom,b_headroom,spa,spb,ret_type,reenter);\
  }								\
}while(0)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-arity-chks]{Arity checks (for debugging)}
%*									*
%************************************************************************

This is a debugging feature.  Each call to fast-entry-point code sets
@ExpectedArity@ to some value, and the callee then checks that the
value is as expected.

\begin{code}
#if defined(__DO_ARITY_CHKS__)

extern I_			 ExpectedArity;
extern void ArityError PROTO((I_)) STG_NORETURN;

#define SET_ARITY(n) do { ExpectedArity = (n); } while(0)
#define CHK_ARITY(n)			\
	do {				\
	if (ExpectedArity != (n)) {	\
	    ULTRASAFESTGCALL1(void,(void *, I_),ArityError,n);	\
	}}while(0)

#else /* ! __DO_ARITY_CHKS__: normal case */

#define SET_ARITY(n) /* nothing */
#define CHK_ARITY(n) /* nothing */

#endif /* ! __DO_ARITY_CHKS__ */
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-heap-chks]{Heap-overflow checks}
%*									*
%************************************************************************

Please see the general discussion/commentary about ``what really
happens in a GC,'' in \tr{SMinterface.lh}.

\begin{code}
extern void PerformGC PROTO((W_));
void RealPerformGC PROTO((W_ liveness, W_ reqsize, W_  always_reenter_node, rtsBool do_full_collection));
void checkInCCallGC(STG_NO_ARGS);

#ifndef PAR
void StgPerformGarbageCollection(STG_NO_ARGS);
#endif

#ifndef CONCURRENT

#define OR_MSG_PENDING	/* never */

#define HEAP_OVERFLOW(liveness,n,reenter)	\
    do {					\
    SET_ACTIVITY(ACT_GC); /* SPAT profiling */	\
    DO_GC((((W_)n)<<8)|(liveness));		\
    SET_ACTIVITY(ACT_GC_STOP);			\
    } while (0)

#define REQSIZE_BITMASK	((1L << ((BITS_IN(W_) - 8 + 1))) - 1)
#define HEAP_OVERFLOW_REQSIZE(args)	(((args) >> 8) & REQSIZE_BITMASK)
#define HEAP_OVERFLOW_REENTER(args)	0
#define HEAP_OVERFLOW_LIVENESS(args)	((args) & 0xff)

#else /* CONCURRENT */

extern void ReallyPerformThreadGC PROTO((W_, rtsBool));

#define HEAP_OVERFLOW(liveness,n,reenter) 	\
    do {					\
    SET_ACTIVITY(ACT_GC); /* SPAT profiling */	\
    DO_GC((((W_)(n))<<9)|((reenter)<<8)|(liveness)); \
    SET_ACTIVITY(ACT_GC_STOP);			\
    } while (0)

#define REQSIZE_BITMASK	((1L << ((BITS_IN(W_) - 9 + 1))) - 1)
#define HEAP_OVERFLOW_REQSIZE(args)	(((args) >> 9) & REQSIZE_BITMASK)
#define HEAP_OVERFLOW_REENTER(args)	(((args) >> 8) & 0x1)
#define HEAP_OVERFLOW_LIVENESS(args)	((args) & 0xff)

#ifndef PAR

#define OR_MSG_PENDING	/* never */

#else 

extern int PacketsWaiting;		/*Probes for incoming messages*/
extern int heapChkCounter;		/*Not currently used! We check for messages when*/
					/*a thread is resheduled PWT*/
/* #define OR_MSG_PENDING	|| (--heapChkCounter == 0 && PacketsWaiting())*/
#define OR_MSG_PENDING	/* never */

#endif	/* PAR */
#endif	/* CONCURRENT */

#if 0 /* alpha_TARGET_ARCH */
#define CACHE_LINE  4	/* words */
#define LINES_AHEAD 3
#define PRE_FETCH(n)					\
do {							\
 StgInt j;						\
 j = ((STG_VOLATILE StgInt *) Hp)[LINES_AHEAD * CACHE_LINE];	\
} while(0);
#define EXTRA_HEAP_WORDS (CACHE_LINE * LINES_AHEAD)
#else
#define PRE_FETCH(reg)
#define EXTRA_HEAP_WORDS 0
#endif

#if defined(GRAN)
#define HEAP_CHK(liveness_mask,n,reenter)			\
	do {							\
	/* TICKY_PARANOIA(__FILE__, __LINE__); */		\
	/* THREAD_CONTEXT_SWITCH(liveness_mask,reenter); */	        \
	ALLOC_HEAP(n); /* ticky profiling */			\
        GRAN_ALLOC_HEAP(n,liveness_mask); /* Granularity Simulation */ \
	SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */		\
	if (((Hp = Hp + (n)) > HpLim)) {			\
	    /* Old:  STGCALL3_GC(PerformGC,liveness_mask,n,StgFalse); */\
	    HEAP_OVERFLOW(liveness_mask,n,StgFalse); \
	}							\
	SET_ACTIVITY(ACT_REDN); /* back to normal reduction */	\
	}while(0)

#else

#define HEAP_CHK(liveness_mask,n,reenter)		\
do {							\
  /* TICKY_PARANOIA(__FILE__, __LINE__); */		\
  PRE_FETCH(n);						\
  ALLOC_HEAP(n); /* ticky profiling */			\
  SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */	\
  if (((Hp = Hp + (n)) > HpLim) OR_INTERVAL_EXPIRED OR_CONTEXT_SWITCH OR_MSG_PENDING) {	\
    HEAP_OVERFLOW(liveness_mask,n,reenter);		\
  }							\
} while(0)

#endif  /* GRAN */

#ifdef CONCURRENT

#define HEAP_CHK_AND_RESTORE_N(liveness_mask,n,reenter)	\
do {							\
  /* TICKY_PARANOIA(__FILE__, __LINE__); */		\
  PRE_FETCH(n);						\
  ALLOC_HEAP(n); /* ticky profiling */			\
  SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */	\
  if (((Hp = Hp + (n)) > HpLim) OR_INTERVAL_EXPIRED OR_CONTEXT_SWITCH OR_MSG_PENDING) {	\
    HEAP_OVERFLOW(liveness_mask,n,reenter);		\
    n = TSO_ARG1(CurrentTSO);				\
  }							\
  SET_ACTIVITY(ACT_REDN); /* back to normal reduction */\
} while(0)

#else

#define HEAP_CHK_AND_RESTORE_N(liveness_mask,n,reenter)	    \
    HEAP_CHK(liveness_mask,n,reenter)

#endif

\end{code}


%************************************************************************
%*									*
\subsection[StgMacros-prim-ops]{Primitive operations}
%*									*
%************************************************************************

One thing to be {\em very careful about} with these macros that assign
to results is that the assignment must come {\em last}.	 Some of the
other arguments may be in terms of addressing modes that get clobbered
by the assignment.  (Dirty imperative programming RULES!)

The order here is roughly that in \tr{compiler/prelude/PrimOps.lhs}.

%************************************************************************
%*									*
\subsubsection[StgMacros-compare-primops]{Primitive comparison ops on basic types}
%*									*
%************************************************************************

We cast the chars in case one of them is a literal (so C things work right
even for 8-bit chars).
\begin{code}
#define gtCharZh(r,a,b)	r=(I_)((a)> (b))
#define geCharZh(r,a,b)	r=(I_)((a)>=(b))
#define eqCharZh(r,a,b)	r=(I_)((a)==(b))
#define neCharZh(r,a,b)	r=(I_)((a)!=(b))
#define ltCharZh(r,a,b)	r=(I_)((a)< (b))
#define leCharZh(r,a,b)	r=(I_)((a)<=(b))

#define gtIntZh(r,a,b)	r=(I_)((a) >(b))
#define geIntZh(r,a,b)	r=(I_)((a)>=(b))
#define eqIntZh(r,a,b)	r=(I_)((a)==(b))
#define neIntZh(r,a,b)	r=(I_)((a)!=(b))
#define ltIntZh(r,a,b)	r=(I_)((a) <(b))
#define leIntZh(r,a,b)	r=(I_)((a)<=(b))

#define gtWordZh(r,a,b)	r=(I_)((a) >(b))
#define geWordZh(r,a,b)	r=(I_)((a)>=(b))
#define eqWordZh(r,a,b)	r=(I_)((a)==(b))
#define neWordZh(r,a,b)	r=(I_)((a)!=(b))
#define ltWordZh(r,a,b)	r=(I_)((a) <(b))
#define leWordZh(r,a,b)	r=(I_)((a)<=(b))

#define gtAddrZh(r,a,b)	r=(I_)((a) >(b))
#define geAddrZh(r,a,b)	r=(I_)((a)>=(b))
#define eqAddrZh(r,a,b)	r=(I_)((a)==(b))
#define neAddrZh(r,a,b)	r=(I_)((a)!=(b))
#define ltAddrZh(r,a,b)	r=(I_)((a) <(b))
#define leAddrZh(r,a,b)	r=(I_)((a)<=(b))

#define gtFloatZh(r,a,b)  r=(I_)((a)> (b))
#define geFloatZh(r,a,b)  r=(I_)((a)>=(b))
#define eqFloatZh(r,a,b)  r=(I_)((a)==(b))
#define neFloatZh(r,a,b)  r=(I_)((a)!=(b))
#define ltFloatZh(r,a,b)  r=(I_)((a)< (b))
#define leFloatZh(r,a,b)  r=(I_)((a)<=(b))

#define gtDoubleZh(r,a,b) r=(I_)((a)> (b))
#define geDoubleZh(r,a,b) r=(I_)((a)>=(b))
#define eqDoubleZh(r,a,b) r=(I_)((a)==(b))
#define neDoubleZh(r,a,b) r=(I_)((a)!=(b))
#define ltDoubleZh(r,a,b) r=(I_)((a)< (b))
#define leDoubleZh(r,a,b) r=(I_)((a)<=(b))
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-char-primops]{Primitive @Char#@ ops (and @LitString#@ish things, too)}
%*									*
%************************************************************************

We cast the chars in case one of them is a literal (so C things work right
even for 8-bit chars).
\begin{code}
#define ordZh(r,a)	r=(I_)((W_) (a))
#define chrZh(r,a)	r=(StgChar)((W_)(a))
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-int-primops]{Primitive @Int#@ ops}
%*									*
%************************************************************************

\begin{code}
I_ stg_div PROTO((I_ a, I_ b));

#define plusIntZh(r,a,b)	r=(a)+(b)
#define minusIntZh(r,a,b)	r=(a)-(b)
#define timesIntZh(r,a,b)	r=(a)*(b)
#define quotIntZh(r,a,b)	r=(a)/(b)
#define divIntZh(r,a,b)		r=ULTRASAFESTGCALL2(I_,(void *, I_, I_),stg_div,(a),(b))
#define remIntZh(r,a,b)		r=(a)%(b)
#define negateIntZh(r,a)	r=-(a)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-word-primops]{Primitive @Word#@ ops}
%*									*
%************************************************************************

\begin{code}
#define andZh(r,a,b)	r=(a)&(b)
#define orZh(r,a,b)	r=(a)|(b)
#define notZh(r,a)	r=~(a)

#define shiftLZh(r,a,b)	  r=(a)<<(b)
#define shiftRAZh(r,a,b)  r=(a)>>(b)
#define shiftRLZh(r,a,b)  r=(a)>>(b)
#define iShiftLZh(r,a,b)  r=(a)<<(b)
#define iShiftRAZh(r,a,b) r=(a)>>(b)
#define iShiftRLZh(r,a,b) r=(a)>>(b)

#define int2WordZh(r,a) r=(W_)(a)
#define word2IntZh(r,a) r=(I_)(a)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-addr-primops]{Primitive @Addr#@ ops}
%*									*
%************************************************************************

\begin{code}
#define int2AddrZh(r,a) r=(A_)(a)
#define addr2IntZh(r,a) r=(I_)(a)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-float-primops]{Primitive @Float#@ ops}
%*									*
%************************************************************************

\begin{code}
#define plusFloatZh(r,a,b)	r=(a)+(b)
#define minusFloatZh(r,a,b)	r=(a)-(b)
#define timesFloatZh(r,a,b)	r=(a)*(b)
#define divideFloatZh(r,a,b)	r=(a)/(b)
#define negateFloatZh(r,a)	r=-(a)

#define int2FloatZh(r,a)	r=(StgFloat)(a)
#define float2IntZh(r,a)	r=(I_)(a)

#define expFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),exp,a)
#define logFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),log,a)
#define sqrtFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sqrt,a)
#define sinFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sin,a)
#define cosFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),cos,a)
#define tanFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),tan,a)
#define asinFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),asin,a)
#define acosFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),acos,a)
#define atanFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),atan,a)
#define sinhFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sinh,a)
#define coshFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),cosh,a)
#define tanhFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),tanh,a)
#define powerFloatZh(r,a,b)	r=(StgFloat) SAFESTGCALL2(StgDouble,(void *, StgDouble,StgDouble),pow,a,b)

/* encoding/decoding given w/ Integer stuff */
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-double-primops]{Primitive @Double#@ ops}
%*									*
%************************************************************************

\begin{code}
#define plusDoubleZh(r,a,b)	r=(a)+(b)
#define minusDoubleZh(r,a,b)	r=(a)-(b)
#define timesDoubleZh(r,a,b)	r=(a)*(b)
#define divideDoubleZh(r,a,b)	r=(a)/(b)
#define negateDoubleZh(r,a)	r=-(a)

#define int2DoubleZh(r,a)	r=(StgDouble)(a)
#define double2IntZh(r,a)	r=(I_)(a)

#define float2DoubleZh(r,a)	r=(StgDouble)(a)
#define double2FloatZh(r,a)	r=(StgFloat)(a)

#define expDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),exp,a)
#define logDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),log,a)
#define sqrtDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sqrt,a)
#define sinDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sin,a)
#define cosDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),cos,a)
#define tanDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),tan,a)
#define asinDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),asin,a)
#define acosDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),acos,a)
#define atanDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),atan,a)
#define sinhDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sinh,a)
#define coshDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),cosh,a)
#define tanhDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),tanh,a)
#define powerDoubleZh(r,a,b)	r=(StgDouble) SAFESTGCALL2(StgDouble,(void *, StgDouble,StgDouble),pow,a,b)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-integer-primops]{Primitive @Integer@-related ops (GMP stuff)}
%*									*
%************************************************************************

Dirty macros we use for the real business.

INVARIANT: When one of these macros is called, the only live data is
tidily on the STG stacks or in the STG registers (the code generator
ensures this).	If there are any pointer-arguments, they will be in
the first \tr{Ret*} registers (e.g., \tr{da} arg of \tr{gmpTake1Return1}).

OK, here are the real macros:
\begin{code}
#define gmpTake1Return1(size_chk_macro, liveness, mpz_op, ar,sr,dr, aa,sa,da)	\
{ MP_INT arg;									\
  MP_INT result;								\
  I_ space = size_chk_macro(sa);						\
										\
  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
  GMP_HEAP_LOOKAHEAD(liveness,space);						\
										\
  /* Now we can initialise (post possible GC) */				\
  arg.alloc	= (aa);								\
  arg.size	= (sa);								\
  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da));			\
										\
  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result);			\
										\
  /* Perform the operation */							\
  SAFESTGCALL2(void,(void *, MP_INT *, MP_INT *),mpz_op,&result,&arg);		\
										\
  GMP_HEAP_HANDBACK();		/* restore Hp */				\
  (ar) = result.alloc;								\
  (sr) = result.size;								\
  (dr) = (B_) (result.d - DATA_HS);						\
  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
}


#define gmpTake2Return1(size_chk_macro, liveness, mpz_op, ar,sr,dr, a1,s1,d1, a2,s2,d2)\
{ MP_INT arg1;									\
  MP_INT arg2;									\
  MP_INT result;								\
  I_ space = size_chk_macro(s1,s2);						\
										\
  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
  GMP_HEAP_LOOKAHEAD(liveness,space);						\
										\
  /* Now we can initialise (post possible GC) */				\
  arg1.alloc	= (a1);								\
  arg1.size	= (s1);								\
  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
  arg2.alloc	= (a2);								\
  arg2.size	= (s2);								\
  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
										\
  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result);			\
										\
  /* Perform the operation */							\
  SAFESTGCALL3(void,(void *, MP_INT *, MP_INT *, MP_INT *),mpz_op,&result,&arg1,&arg2); \
										\
  GMP_HEAP_HANDBACK();		/* restore Hp */				\
  (ar) = result.alloc;								\
  (sr) = result.size;								\
  (dr) = (B_) (result.d - DATA_HS);						\
  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
}

#define gmpTake2Return2(size_chk_macro, liveness, mpz_op, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2) \
{ MP_INT arg1;									\
  MP_INT arg2;									\
  MP_INT result1;								\
  MP_INT result2;								\
  I_ space = size_chk_macro(s1,s2);						\
										\
  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
  GMP_HEAP_LOOKAHEAD(liveness,space);						\
										\
  /* Now we can initialise (post possible GC) */				\
  arg1.alloc	= (a1);								\
  arg1.size	= (s1);								\
  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
  arg2.alloc	= (a2);								\
  arg2.size	= (s2);								\
  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
										\
  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result1);			\
  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result2);			\
										\
  /* Perform the operation */							\
  SAFESTGCALL4(void,(void *, MP_INT *, MP_INT *, MP_INT *, MP_INT *),mpz_op,&result1,&result2,&arg1,&arg2); \
										\
  GMP_HEAP_HANDBACK();		/* restore Hp */				\
  (ar1) = result1.alloc;							\
  (sr1) = result1.size;								\
  (dr1) = (B_) (result1.d - DATA_HS);						\
  (ar2) = result2.alloc;							\
  (sr2) = result2.size;								\
  (dr2) = (B_) (result2.d - DATA_HS);						\
}
\end{code}

Some handy size-munging macros: sometimes gratuitously {\em conservative}.
The \tr{+16} is to allow for the initial allocation of \tr{MP_INT} results.
The \tr{__abs} stuff is because negative-ness of GMP things is encoded
in their ``size''...
\begin{code}
#define __abs(a)		(( (a) >= 0 ) ? (a) : (-(a)))
#define GMP_SIZE_ONE()		(2 + DATA_HS + 16)
#define GMP_SAME_SIZE(a)	(__abs(a) + DATA_HS + 16)
#define GMP_MAX_SIZE(a,b)	((__abs(a) > __abs(b) ? __abs(a) : __abs(b)) + 1 + DATA_HS + 16)
				/* NB: the +1 is for the carry (or whatever) */
#define GMP_2MAX_SIZE(a,b)	(2 * GMP_MAX_SIZE(a,b))
#define GMP_ADD_SIZES(a,b)	(__abs(a) + __abs(b) + 1 + DATA_HS + 16)
				/* the +1 may just be paranoia */
\end{code}

For the Integer/GMP stuff, we have macros that {\em look ahead} for
some space, but don't actually grab it.

If there are live pointers at the time of the lookahead, the caller
must make sure they are in \tr{Ret1}, \tr{Ret2}, ..., so they can be
handled normally.  We achieve this by having the code generator {\em
always} pass args to may-invoke-GC primitives in registers, using the
normal pointers-first policy.  This means that, if we do go to garbage
collection, everything is already in the Right Place.

Saving and restoring Hp register so the MP allocator can see them. If we are
performing liftime profiling need to save and restore HpLim as well so that
it can be bumped if allocation occurs.

The second argument to @GMP_HEAP_LOOKAHEAD@ must be an lvalue so that
it can be restored from @TSO_ARG1@ after a failed @HEAP_CHK@ in
threaded land.

\begin{code}
#define GMP_HEAP_LOOKAHEAD(liveness,n)			\
	do {						\
	HEAP_CHK_AND_RESTORE_N(liveness,n,0);		\
	Hp = Hp - (n);					\
	UN_ALLOC_HEAP(n);	/* Undo ticky-ticky */	\
	SAVE_Hp = Hp;		/* Hand over the hp */	\
	DEBUG_SetGMPAllocBudget(n)			\
	OptSaveHpLimRegister()				\
	}while(0)

#define GMP_HEAP_HANDBACK()				\
	Hp = SAVE_Hp;					\
	DEBUG_ResetGMPAllocBudget()			\
	OptRestoreHpLimRegister()
\end{code}

\begin{code}
void *stgAllocForGMP   PROTO((size_t size_in_bytes));
void *stgReallocForGMP PROTO((void *ptr, size_t old_size, size_t new_size));
void stgDeallocForGMP  PROTO((void *ptr, size_t size));

#ifdef ALLOC_DEBUG
extern StgInt DEBUG_GMPAllocBudget;
#define DEBUG_SetGMPAllocBudget(n)  DEBUG_GMPAllocBudget = (n);
#define DEBUG_ResetGMPAllocBudget() DEBUG_GMPAllocBudget = 0;
#else
#define DEBUG_SetGMPAllocBudget(n)  /*nothing*/
#define DEBUG_ResetGMPAllocBudget() /*nothing*/
#endif
\end{code}

\begin{code}
#if defined (LIFE_PROFILE)

#define OptSaveHpLimRegister() \
	SAVE_HpLim = HpLim
#define OptRestoreHpLimRegister() \
	HpLim = SAVE_HpLim

#else  /* ! LIFE_PROFILE */

#define OptSaveHpLimRegister()	    /* nothing */
#define OptRestoreHpLimRegister()   /* nothing */

#endif /* ! LIFE_PROFILE */
\end{code}

The real business (defining Integer primops):
\begin{code}
#define negateIntegerZh(ar,sr,dr, liveness, aa,sa,da) \
	gmpTake1Return1(GMP_SAME_SIZE, liveness, mpz_neg, ar,sr,dr, aa,sa,da)

#define plusIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
	gmpTake2Return1(GMP_MAX_SIZE, liveness, mpz_add, ar,sr,dr, a1,s1,d1, a2,s2,d2)
#define minusIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
	gmpTake2Return1(GMP_MAX_SIZE, liveness, mpz_sub, ar,sr,dr, a1,s1,d1, a2,s2,d2)
#define timesIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
	gmpTake2Return1(GMP_ADD_SIZES, liveness, mpz_mul, ar,sr,dr, a1,s1,d1, a2,s2,d2)

/* div, mod, quot, rem are defined w/ quotRem & divMod */

#define quotRemIntegerZh(ar1,sr1,dr1, ar2,sr2,dr2, liveness, a1,s1,d1, a2,s2,d2) \
	gmpTake2Return2(GMP_2MAX_SIZE, liveness, mpz_divmod, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2)
#define divModIntegerZh(ar1,sr1,dr1, ar2,sr2,dr2, liveness,  a1,s1,d1, a2,s2,d2) \
	gmpTake2Return2(GMP_2MAX_SIZE, liveness, mpz_mdivmod, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2)
\end{code}

Comparison ops (@<@, @>=@, etc.) are defined in terms of the cmp
fellow (returns -ve, 0, or +ve).
\begin{code}
#define cmpIntegerZh(r, hp, a1,s1,d1, a2,s2,d2) /* calls mpz_cmp */		\
{ MP_INT arg1;									\
  MP_INT arg2;									\
  /* Does not allocate memory */						\
										\
  arg1.alloc	= (a1);								\
  arg1.size	= (s1);								\
  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
  arg2.alloc	= (a2);								\
  arg2.size	= (s2);								\
  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
										\
  (r) = SAFESTGCALL2(I_,(void *, MP_INT *, MP_INT *),mpz_cmp,&arg1,&arg2);	\
}
\end{code}

Coercions:

\begin{code}
#define integer2IntZh(r, hp, aa,sa,da)						\
{ MP_INT arg;									\
  /* Does not allocate memory */						\
										\
  arg.alloc	= (aa);								\
  arg.size	= (sa);								\
  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da));			\
										\
  (r) = SAFESTGCALL1(I_,(void *, MP_INT *),mpz_get_si,&arg);			\
}

/* Since we're forced to know a little bit about MP_INT layout to do this with
   pre-allocated heap, we just inline the whole of mpz_init_set_si here.
	** DIRE WARNING.  if mpz_init_set_si changes, so does this! ***
*/

#define int2IntegerZh(ar,sr,dr, hp, i)						\
{ StgInt val; /* to snaffle arg to avoid aliasing */				\
										\
  val = (i);  /* snaffle... */							\
										\
  SET_DATA_HDR((hp),ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);		\
										\
  if	  ((val) < 0) { (sr) = -1; (hp)[DATA_HS] = -(val); }			\
  else if ((val) > 0) { (sr) =	1; (hp)[DATA_HS] =  (val); }			\
  else /* val==0 */   { (sr) =	0; }						\
  (ar) = 1;									\
  (dr) = (B_)(hp);		/* dr is an StgByteArray */			\
}

#define word2IntegerZh(ar,sr,dr, hp, i)						\
{ StgWord val; /* to snaffle arg to avoid aliasing */				\
										\
  val = (i);  /* snaffle... */							\
										\
  SET_DATA_HDR((hp),ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);		\
										\
  if ((val) != 0)     { (sr) =	1; (hp)[DATA_HS] =  (val); }			\
  else /* val==0 */   { (sr) =	0; }						\
  (ar) = 1;									\
  (dr) = (B_)(hp);		/* dr is an StgByteArray */			\
}

\end{code}

Then there are a few oddments to make life easier:
\begin{code}
/*
   DIRE WARNING.
   The "str" argument must be a literal C string.

	addr2Integer( ..., "foo")   OK!

	x = "foo";
	addr2Integer( ..., x)	    NO! NO!
*/

#define addr2IntegerZh(ar,sr,dr, liveness, str)					\
{ MP_INT result;								\
  /* taking the number of bytes/8 as the number of words of lookahead		\
     is plenty conservative */							\
  I_ space = GMP_SAME_SIZE(sizeof(str) / 8 + 1);				\
										\
  GMP_HEAP_LOOKAHEAD(liveness, space);						\
										\
  /* Perform the operation */							\
  if (SAFESTGCALL3(I_,(void *, MP_INT *, char *, int), mpz_init_set_str,&result,(str),/*base*/10)) \
      abort();									\
										\
  GMP_HEAP_HANDBACK();		/* restore Hp */				\
  (ar) = result.alloc;								\
  (sr) = result.size;								\
  (dr) = (B_) (result.d - DATA_HS);						\
  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
}
\end{code}

Encoding and decoding float-ish things is pretty Integer-ish.  We use
these pretty magical support functions, essentially stolen from Lennart:
\begin{code}
StgFloat  __encodeFloat	 PROTO((MP_INT *, I_));
void  __decodeFloat  PROTO((MP_INT * /*result1*/,
				I_ * /*result2*/,
				StgFloat));

StgDouble __encodeDouble PROTO((MP_INT *, I_));
void  __decodeDouble PROTO((MP_INT * /*result1*/,
				I_ * /*result2*/,
				StgDouble));
\end{code}

Some floating-point format info, made with the \tr{enquire} program
(version~4.3) [comes with gcc].
\begin{code}
/* this should be done by CPU architecture, insofar as possible [WDP] */

#if sparc_TARGET_ARCH	\
 || alpha_TARGET_ARCH	\
 || hppa1_1_TARGET_ARCH	\
 || i386_TARGET_ARCH	\
 || i486_TARGET_ARCH	\
 || m68k_TARGET_ARCH	\
 || mipsel_TARGET_ARCH	\
 || mipseb_TARGET_ARCH	\
 || rs6000_TARGET_ARCH

/* yes, it is IEEE floating point */
#include "ieee-flpt.h"

#if alpha_dec_osf1_TARGET	\
 || i386_TARGET_ARCH		\
 || i486_TARGET_ARCH		\
 || mipsel_TARGET_ARCH

#undef BIGENDIAN /* little-endian weirdos... */
#else
#define BIGENDIAN 1
#endif

#else /* unknown floating-point format */

******* ERROR *********** Any ideas about floating-point format?

#endif /* unknown floating-point */
\end{code}

\begin{code}
#if alpha_dec_osf1_TARGET
#define encodeFloatZh(r, hp, aa,sa,da, expon)	encodeDoubleZh(r, hp, aa,sa,da, expon)
#else
#define encodeFloatZh(r, hp, aa,sa,da, expon)	\
{ MP_INT arg;					\
  /* Does not allocate memory */		\
						\
  arg.alloc	= aa;				\
  arg.size	= sa;				\
  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da)); \
						\
  r = SAFESTGCALL2(StgFloat,(void *, MP_INT *, I_), __encodeFloat,&arg,(expon));	\
}
#endif /* ! alpha */

#define encodeDoubleZh(r, hp, aa,sa,da, expon)	\
{ MP_INT arg;					\
  /* Does not allocate memory */		\
						\
  arg.alloc	= aa;				\
  arg.size	= sa;				\
  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da)); \
						\
  r = SAFESTGCALL2(StgDouble,(void *, MP_INT *, I_), __encodeDouble,&arg,(expon));\
}

#if alpha_dec_osf1_TARGET
#define decodeFloatZh(exponr, ar,sr,dr, hp, f)	decodeDoubleZh(exponr, ar,sr,dr, hp, f)
#else
#define decodeFloatZh(exponr, ar,sr,dr, hp, f)				\
{ MP_INT mantissa;							\
  I_ exponent;								\
  StgFloat arg = (f);							\
									\
  /* Be prepared to tell Lennart-coded __decodeFloat	*/		\
  /* where mantissa.d can be put (it does not care about the rest) */	\
  SET_DATA_HDR(hp,ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);	\
  mantissa.d = (hp) + DATA_HS;						\
									\
  /* Perform the operation */						\
  SAFESTGCALL3(void,(void *, MP_INT *, I_ *, StgFloat),__decodeFloat,&mantissa,&exponent,arg);		\
  exponr= exponent;							\
  ar	= mantissa.alloc;						\
  sr	= mantissa.size;						\
  dr	= (B_)(hp);							\
}
#endif /* !alpha */

#define decodeDoubleZh(exponr, ar,sr,dr, hp, f)				\
{ MP_INT mantissa;							\
  I_ exponent;								\
  StgDouble arg = (f);							\
									\
  /* Be prepared to tell Lennart-coded __decodeDouble	*/		\
  /* where mantissa.d can be put (it does not care about the rest) */	\
  SET_DATA_HDR(hp,ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);	\
  mantissa.d = (hp) + DATA_HS;						\
									\
  /* Perform the operation */						\
  SAFESTGCALL3(void,(void *, MP_INT *, I_ *, StgDouble),__decodeDouble,&mantissa,&exponent,arg);		\
  exponr= exponent;							\
  ar	= mantissa.alloc;						\
  sr	= mantissa.size;						\
  dr	= (B_)(hp);							\
}
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-mv-floats]{Moving floats and doubles around (e.g., to/from stacks)}
%*									*
%************************************************************************

With GCC, we use magic non-standard inlining; for other compilers, we
just use functions (see also \tr{runtime/prims/PrimArith.lc}).

(The @OMIT_...@ is only used in compiling some of the RTS, none of
which uses these anyway.)

\begin{code}
#if alpha_TARGET_ARCH	\
 || i386_TARGET_ARCH	\
 || i486_TARGET_ARCH	\
 || m68k_TARGET_ARCH

#define ASSIGN_FLT(dst, src) *(StgFloat *)(dst) = (src);
#define PK_FLT(src) (*(StgFloat *)(src))

#define ASSIGN_DBL(dst, src) *(StgDouble *)(dst) = (src);
#define PK_DBL(src) (*(StgDouble *)(src))

#else	/* not m68k || alpha || i[34]86 */

/* Special handling for machines with troublesome alignment constraints */

#define FLOAT_ALIGNMENT_TROUBLES    TRUE

#if ! defined(__GNUC__) || ! defined(__STG_GCC_REGS__)

extern void	    ASSIGN_DBL PROTO((W_ [], StgDouble));
extern StgDouble    PK_DBL     PROTO((W_ []));
extern void	    ASSIGN_FLT PROTO((W_ [], StgFloat));
extern StgFloat	    PK_FLT     PROTO((W_ []));

#else /* yes, its __GNUC__ && we really want them */

#if sparc_TARGET_ARCH

#define ASSIGN_FLT(dst, src) *(StgFloat *)(dst) = (src);
#define PK_FLT(src) (*(StgFloat *)(src))

#define ASSIGN_DBL(dst,src) \
      __asm__("st %2,%0\n\tst %R2,%1" : "=m" (((P_)(dst))[0]), \
	"=m" (((P_)(dst))[1]) : "f" (src));

#define PK_DBL(src) \
    ( { register double d; \
      __asm__("ld %1,%0\n\tld %2,%R0" : "=f" (d) : \
	"m" (((P_)(src))[0]), "m" (((P_)(src))[1])); d; \
    } )

#else /* ! sparc */

extern STG_INLINE
void
ASSIGN_DBL(W_ p_dest[], StgDouble src)
{
    double_thing y;
    y.d = src;
    p_dest[0] = y.du.dhi;
    p_dest[1] = y.du.dlo;
}

/* GCC also works with this version, but it generates
   the same code as the previous one, and is not ANSI

#define ASSIGN_DBL( p_dest, src ) \
	*p_dest = ((double_thing) src).du.dhi; \
	*(p_dest+1) = ((double_thing) src).du.dlo \
*/

extern STG_INLINE
StgDouble
PK_DBL(W_ p_src[])
{
    double_thing y;
    y.du.dhi = p_src[0];
    y.du.dlo = p_src[1];
    return(y.d);
}

extern STG_INLINE
void
ASSIGN_FLT(W_ p_dest[], StgFloat src)
{
    float_thing y;
    y.f = src;
    *p_dest = y.fu;
}

extern STG_INLINE
StgFloat
PK_FLT(W_ p_src[])
{
    float_thing y;
    y.fu = *p_src;
    return(y.f);
}

#endif /* ! sparc */

#endif /* __GNUC__ */

#endif /* not __m68k__ */
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-array-primops]{Primitive arrays}
%*									*
%************************************************************************

We regularly use this macro to fish the ``contents'' part
out of a DATA or TUPLE closure, which is what is used for
non-ptr and ptr arrays (respectively).

BYTE_ARR_CTS returns a @C_ *@!

We {\em ASSUME} we can use the same macro for both!!
\begin{code}

#ifdef DEBUG
#define BYTE_ARR_CTS(a)					\
 ({ ASSERT(INFO_PTR(a) == (W_) ArrayOfData_info);	\
    ((C_ *) (((StgPtr) (a))+DATA_HS)); })
#define PTRS_ARR_CTS(a)					\
 ({ ASSERT((INFO_PTR(a) == (W_) ArrayOfPtrs_info)	\
	|| (INFO_PTR(a) == (W_) ImMutArrayOfPtrs_info));\
    ((a)+MUTUPLE_HS);} )
#else
#define BYTE_ARR_CTS(a)		((char *) (((StgPtr) (a))+DATA_HS))
#define PTRS_ARR_CTS(a)		((a)+MUTUPLE_HS)
#endif

/* sigh */
extern I_ genSymZh(STG_NO_ARGS);
extern I_ resetGenSymZh(STG_NO_ARGS);
extern I_ incSeqWorldZh(STG_NO_ARGS);

/* sigh again: without these some (notably "float") willnae work */
extern I_ long2bytes__	 PROTO((long,	unsigned char *));
extern I_ int2bytes__	 PROTO((int,	unsigned char *));
extern I_ short2bytes__	 PROTO((short,	unsigned char *));
extern I_ float2bytes__	 PROTO((float,	unsigned char *));
extern I_ double2bytes__ PROTO((double, unsigned char *));

/* these may not be necessary; and they create warnings (WDP) */
extern I_ bytes2long__	 PROTO((P_, I_ *));
extern I_ bytes2int__	 PROTO((P_, I_ *));
extern I_ bytes2short__	 PROTO((P_, I_ *));
extern I_ bytes2float__	 PROTO((P_, StgFloat *));
extern I_ bytes2double__ PROTO((P_, StgDouble *));

extern I_ byteArrayHasNUL__ PROTO((const char *, I_));
\end{code}

OK, the easy ops first: (all except \tr{newArr*}:

VERY IMPORTANT!	 The read/write/index primitive ops
on @ByteArray#@s index the array using a {\em BYTE} offset, even
if the thing begin gotten out is a multi-byte @Int#@, @Float#@ etc.
This is because you might be trying to take apart a C struct, where
the offset from the start of the struct isn't a multiple of the
size of the thing you're getting.  Hence the @(char *)@ casts.

In the case of messing with @StgAddrs@ (@A_@), which are really \tr{void *},
we cast to @P_@, because you can't index off an uncast \tr{void *}.

In the case of @Array#@ (which contain pointers), the offset is in units
of one ptr (not bytes).

\begin{code}
#define sameMutableArrayZh(r,a,b)	r=(I_)((a)==(b))
#define sameMutableByteArrayZh(r,a,b)	r=(I_)((B_)(a)==(B_)(b))

#define readArrayZh(r,a,i)	 r=((PP_) PTRS_ARR_CTS(a))[(i)]

#define readCharArrayZh(r,a,i)	 indexCharOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define readIntArrayZh(r,a,i)	 indexIntOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define readAddrArrayZh(r,a,i)	 indexAddrOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define readFloatArrayZh(r,a,i)	 indexFloatOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define readDoubleArrayZh(r,a,i) indexDoubleOffAddrZh(r,BYTE_ARR_CTS(a),i)

/* result ("r") arg ignored in write macros! */
#define writeArrayZh(a,i,v)	((PP_) PTRS_ARR_CTS(a))[(i)]=(v)

#define writeCharArrayZh(a,i,v)	  ((C_ *)(BYTE_ARR_CTS(a)))[i] = (v)
#define writeIntArrayZh(a,i,v)	  ((I_ *)(BYTE_ARR_CTS(a)))[i] = (v)
#define writeAddrArrayZh(a,i,v)	  ((PP_)(BYTE_ARR_CTS(a)))[i] = (v)
#define writeFloatArrayZh(a,i,v)  \
	ASSIGN_FLT((P_) (((StgFloat *)(BYTE_ARR_CTS(a))) + i),v)
#define writeDoubleArrayZh(a,i,v) \
	ASSIGN_DBL((P_) (((StgDouble *)(BYTE_ARR_CTS(a))) + i),v)

#define indexArrayZh(r,a,i)	  r=((PP_) PTRS_ARR_CTS(a))[(i)]

#define indexCharArrayZh(r,a,i)	  indexCharOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define indexIntArrayZh(r,a,i)	  indexIntOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define indexAddrArrayZh(r,a,i)	  indexAddrOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define indexFloatArrayZh(r,a,i)  indexFloatOffAddrZh(r,BYTE_ARR_CTS(a),i)
#define indexDoubleArrayZh(r,a,i) indexDoubleOffAddrZh(r,BYTE_ARR_CTS(a),i)

#define indexCharOffAddrZh(r,a,i)   r= ((C_ *)(a))[i]
#define indexIntOffAddrZh(r,a,i)    r= ((I_ *)(a))[i]
#define indexAddrOffAddrZh(r,a,i)   r= ((PP_)(a))[i]
#define indexFloatOffAddrZh(r,a,i)  r= PK_FLT((P_) (((StgFloat *)(a)) + i))
#define indexDoubleOffAddrZh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i))

/* Freezing arrays-of-ptrs requires changing an info table, for the
   benefit of the generational collector.  It needs to scavenge mutable
   objects, even if they are in old space.  When they become immutable,
   they can be removed from this scavenge list.	 */
#define unsafeFreezeArrayZh(r,a)				\
	do {						\
	P_ result;					\
	result=(P_) (a);				\
	FREEZE_MUT_HDR(result,ImMutArrayOfPtrs_info);	\
	r = result;					\
	}while(0)

#define unsafeFreezeByteArrayZh(r,a)	r=(B_)(a)
\end{code}

Now the \tr{newArr*} ops:

\begin{code}
/*
--------------------
Will: ToDo: we need to find suitable places to put this comment, and the
"in-general" one which follows.

************ Nota Bene.	 The "n" in this macro is guaranteed to
be a register, *not* (say) Node[1].  That means that it is guaranteed
to survive GC, provided only that the register is kept unaltered.
This is important, because "n" is used after the HEAP_CHK.

In general, *all* parameters to these primitive-op macros are always
registers.  (Will: For exactly *which* primitive-op macros is this guaranteed?
Exactly those which can trigger GC?)
------------------------

NOTE: the above may now be OLD (WDP 94/02/10)
*/
\end{code}

For char arrays, the size is in {\em BYTES}.

\begin{code}
#define newCharArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(C_))
#define newIntArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(I_))
#define newAddrArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(P_))
#define newFloatArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(StgFloat))
#define newDoubleArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(StgDouble))

#define newByteArray(r,liveness,n)				\
{								\
  P_ result;							\
  I_ size;							\
								\
  HEAP_CHK(liveness,DATA_HS+BYTES_TO_STGWORDS(n),0);		\
  size = BYTES_TO_STGWORDS(n);					\
  ALLOC_PRIM(DATA_HS,size,0,DATA_HS+size) /* ticky ticky */;	\
  CC_ALLOC(CCC,DATA_HS+size,ARR_K);				\
								\
  result = Hp-(DATA_HS+size)+1;					\
  SET_DATA_HDR(result,ArrayOfData_info,CCC,DATA_VHS+size,0);	\
  r = (B_) result;						\
}
\end{code}

Arrays of pointers need to be initialised; uses \tr{TUPLES}!
The initialisation value is guaranteed to be in a register,
and will be indicated by the liveness mask, so it's ok to do
a \tr{HEAP_CHK}, which may trigger GC.

\begin{code}
/* The new array initialization routine for the NCG */
void newArrZh_init PROTO((P_ result, I_ n, P_ init));

#define newArrayZh(r,liveness,n,init)			\
{							\
  P_ p;							\
  P_ result;						\
							\
  HEAP_CHK(liveness, MUTUPLE_HS+(n),0);			\
  ALLOC_PRIM(MUTUPLE_HS,(n),0,MUTUPLE_HS+(n)) /* ticky ticky */; \
  CC_ALLOC(CCC,MUTUPLE_HS+(n),ARR_K); /* cc prof */	\
							\
  result = Hp + 1 - (MUTUPLE_HS+(n));			\
  SET_MUTUPLE_HDR(result,ArrayOfPtrs_info,CCC,MUTUPLE_VHS+(n),0) \
  for (p = result+MUTUPLE_HS; p < (result+MUTUPLE_HS+(n)); p++) { \
	*p = (W_) (init);				\
  }							\
  SET_ACTIVITY(ACT_REDN); /* back to normal reduction */\
							\
  r = result;						\
}
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-SynchVar-primops]{Synchronizing Variables PrimOps}
%*									*
%************************************************************************

\begin{code}
ED_(Nil_closure);

#define newSynchVarZh(r, hp) 				\
{							\
  ALLOC_PRIM(MUTUPLE_HS,3,0,MUTUPLE_HS+3) /* ticky ticky */; \
  CC_ALLOC(CCC,MUTUPLE_HS+3,ARR_K); /* cc prof */	\
  SET_SVAR_HDR(hp,EmptySVar_info,CCC);			\
  SVAR_HEAD(hp) = SVAR_TAIL(hp) = SVAR_VALUE(hp) = Nil_closure;	\
  r = hp;						\
}
\end{code}

\begin{code}
#ifdef CONCURRENT

extern void Yield PROTO((W_));

#define takeMVarZh(r, liveness, node)    		\
{							\
  while (INFO_PTR(node) != (W_) FullSVar_info) {	\
    if (SVAR_HEAD(node) == Nil_closure) 	    	\
      SVAR_HEAD(node) = CurrentTSO; 	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(SVAR_TAIL(node)) = CurrentTSO;	    	\
    TSO_LINK(CurrentTSO) = (P_) Nil_closure;	    	\
    SVAR_TAIL(node) = CurrentTSO;   	    	    	\
    DO_YIELD(liveness << 1);				\
  } 	    	    	    	    	    	    	\
  SET_INFO_PTR(node, EmptySVar_info);			\
  r = SVAR_VALUE(node);	    	    	    	    	\
  SVAR_VALUE(node) = Nil_closure;   	    	    	\
}

#else

#define takeMVarZh(r, liveness, node)			\
{							\
  if (INFO_PTR(node) != (W_) FullSVar_info) {		\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "takeMVar#: MVar is empty.\n");    	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  SET_INFO_PTR(node, EmptySVar_info);			\
  r = SVAR_VALUE(node);	    	    	    	    	\
  SVAR_VALUE(node) = Nil_closure;   	    	    	\
}

#endif
\end{code}

\begin{code}
#ifdef CONCURRENT

#ifdef GRAN

/* Only difference to the !GRAN def: RunnableThreadsHd has been replaced by */
/* ThreadQueueHd i.e. the tso is added at the end of the thread queue on */
/* the CurrentProc. This means we have an implicit context switch after */
/* putMVar even if unfair scheduling is used in GranSim (default)!  -- HWL */

#define putMVarZh(node, value)	    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "putMVar#: MVar already full.\n");	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  SET_INFO_PTR(node, FullSVar_info);			\
  SVAR_VALUE(node) = value; 	    	    	    	\
  tso = SVAR_HEAD(node);    	    	    	    	\
  if (tso != (P_) Nil_closure) {    	    	    	\
    if (DO_QP_PROF) 	    	    	    	    	\
      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
    if (ThreadQueueHd == Nil_closure)   	    	\
      ThreadQueueHd = tso;    	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(ThreadQueueTl) = tso;  	    	\
    ThreadQueueTl = tso;	    	    	    	\
    SVAR_HEAD(node) = TSO_LINK(tso);  	    	    	\
    TSO_LINK(tso) = (P_) Nil_closure; 	    	    	\
    if(SVAR_HEAD(node) == (P_) Nil_closure)   	    	\
      SVAR_TAIL(node) = (P_) Nil_closure;	    	\
  } 	    	    	    	    	    	    	\
}

#else /* !GRAN */

#define putMVarZh(node, value)	    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "putMVar#: MVar already full.\n");	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  SET_INFO_PTR(node, FullSVar_info);			\
  SVAR_VALUE(node) = value; 	    	    	    	\
  tso = SVAR_HEAD(node);    	    	    	    	\
  if (tso != (P_) Nil_closure) {    	    	    	\
    if (DO_QP_PROF) 	    	    	    	    	\
      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
    if (RunnableThreadsHd == Nil_closure)   	    	\
      RunnableThreadsHd = tso;    	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(RunnableThreadsTl) = tso;  	    	\
    RunnableThreadsTl = tso;	    	    	    	\
    SVAR_HEAD(node) = TSO_LINK(tso);  	    	    	\
    TSO_LINK(tso) = (P_) Nil_closure; 	    	    	\
    if(SVAR_HEAD(node) == (P_) Nil_closure)   	    	\
      SVAR_TAIL(node) = (P_) Nil_closure;	    	\
  } 	    	    	    	    	    	    	\
}

#endif  /* GRAN */

#else

#define putMVarZh(node, value)    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "putMVar#: MVar already full.\n");	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  SET_INFO_PTR(node, FullSVar_info);			\
  SVAR_VALUE(node) = value; 	    	    	    	\
}

#endif
\end{code}

\begin{code}
#ifdef CONCURRENT

#define readIVarZh(r, liveness, node)			\
{							\
  if (INFO_PTR(node) != (W_) ImMutArrayOfPtrs_info) {	\
    if (SVAR_HEAD(node) == Nil_closure) 	    	\
      SVAR_HEAD(node) = CurrentTSO; 	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(SVAR_TAIL(node)) = CurrentTSO;	    	\
    TSO_LINK(CurrentTSO) = (P_) Nil_closure;	    	\
    SVAR_TAIL(node) = CurrentTSO;   	    	    	\
    DO_YIELD(liveness << 1);				\
  } 	    	    	    	    	    	    	\
  r = SVAR_VALUE(node);	    	    	    	    	\
}

#else

#define readIVarZh(r, liveness, node)			\
{							\
  if (INFO_PTR(node) != (W_) ImMutArrayOfPtrs_info) {	\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "readIVar#: IVar is empty.\n");    	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  r = SVAR_VALUE(node);	    	    	    	    	\
}

#endif
\end{code}

\begin{code}
#ifdef CONCURRENT

#ifdef GRAN

/* Only difference to the !GRAN def: RunnableThreadsHd has been replaced by */
/* ThreadQueueHd i.e. the tso is added at the end of the thread queue on */
/* the CurrentProc. This means we have an implicit context switch after */
/* writeIVar even if unfair scheduling is used in GranSim (default)!  -- HWL */

#define writeIVarZh(node, value)    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "writeIVar#: IVar already full.\n");\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  tso = SVAR_HEAD(node);    	    	    	    	\
  if (tso != (P_) Nil_closure) {    	    	    	\
    if (ThreadQueueHd == Nil_closure)   	    	\
      ThreadQueueHd = tso;    	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(ThreadQueueTl) = tso;  	    	\
    while(TSO_LINK(tso) != Nil_closure) {   	    	\
      if (DO_QP_PROF)	    	    	    	    	\
        STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
      tso = TSO_LINK(tso);  	    	    	    	\
    }	    	    	    	    	    	    	\
    if (DO_QP_PROF) 	    	    	    	    	\
      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
    ThreadQueueTl = tso;	    	    	    	\
  } 	    	    	    	    	    	    	\
  /* Don't use freeze, since it's conditional on GC */	\
  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
  SVAR_VALUE(node) = value; 	    	    	    	\
}

#else /* !GRAN */

#define writeIVarZh(node, value)    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "writeIVar#: IVar already full.\n");\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  tso = SVAR_HEAD(node);    	    	    	    	\
  if (tso != (P_) Nil_closure) {    	    	    	\
    if (RunnableThreadsHd == Nil_closure)   	    	\
      RunnableThreadsHd = tso;    	    	    	\
    else    	    	    	    	    	    	\
      TSO_LINK(RunnableThreadsTl) = tso;  	    	\
    while(TSO_LINK(tso) != Nil_closure) {   	    	\
      if (DO_QP_PROF)	    	    	    	    	\
        STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
      tso = TSO_LINK(tso);  	    	    	    	\
    }	    	    	    	    	    	    	\
    if (DO_QP_PROF) 	    	    	    	    	\
      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
    RunnableThreadsTl = tso;	    	    	    	\
  } 	    	    	    	    	    	    	\
  /* Don't use freeze, since it's conditional on GC */	\
  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
  SVAR_VALUE(node) = value; 	    	    	    	\
}

#endif  /* GRAN */

#else

#define writeIVarZh(node, value)    			\
{							\
  P_ tso;   	    	    	    	    	    	\
  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
    /* Don't wrap the calls; we're done with STG land */\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "writeIVar#: IVar already full.\n");\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  } 	    	    	    	    	    	    	\
  /* Don't use freeze, since it's conditional on GC */	\
  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
  SVAR_VALUE(node) = value; 	    	    	    	\
}

#endif
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-Wait-primops]{Delay/Wait PrimOps}
%*									*
%************************************************************************

\begin{code}
#ifdef CONCURRENT

/* ToDo: for GRAN */

#define delayZh(liveness, us)				\
  {							\
    if (WaitingThreadsTl == Nil_closure)		\
      WaitingThreadsHd = CurrentTSO;			\
    else						\
      TSO_LINK(WaitingThreadsTl) = CurrentTSO;		\
    WaitingThreadsTl = CurrentTSO;			\
    TSO_LINK(CurrentTSO) = Nil_closure;			\
    TSO_EVENT(CurrentTSO) = (W_) ((us) < 1 ? 1 : (us)); \
    DO_YIELD(liveness << 1);				\
  }

#else

#define delayZh(liveness, us)				\
  {							\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "delay#: unthreaded build.\n");    	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  }

#endif

#ifdef CONCURRENT

/* ToDo: something for GRAN */

#define waitZh(liveness, fd)				\
  {							\
    if (WaitingThreadsTl == Nil_closure)		\
      WaitingThreadsHd = CurrentTSO;			\
    else						\
      TSO_LINK(WaitingThreadsTl) = CurrentTSO;		\
    WaitingThreadsTl = CurrentTSO;			\
    TSO_LINK(CurrentTSO) = Nil_closure;			\
    TSO_EVENT(CurrentTSO) = (W_) (-(fd));		\
    DO_YIELD(liveness << 1);				\
  }

#else

#define waitZh(liveness, fd)				\
  {							\
    fflush(stdout); 	    	    	    	    	\
    fprintf(stderr, "wait#: unthreaded build.\n");    	\
    EXIT(EXIT_FAILURE);	    	    	    	    	\
  }

#endif

\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-IO-primops]{Primitive I/O, error-handling primops}
%*									*
%************************************************************************

\begin{code}
extern P_ TopClosure;
EXTFUN(ErrorIO_innards);
EXTFUN(__std_entry_error__);

#define errorIOZh(a)		\
    do { TopClosure=(a);	\
	 (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stdout); \
	 (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stderr); \
	 JMP_(ErrorIO_innards); \
    } while(0)

#if !defined(CALLER_SAVES_SYSTEM)
/* can use the macros */
#define stg_getc(stream)	getc((FILE *) (stream))
#define stg_putc(c,stream)	putc((c),((FILE *) (stream)))
#else
/* must not use the macros (they contain embedded calls to _filbuf/whatnot) */
#define stg_getc(stream)	SAFESTGCALL1(I_,(void *, FILE *),fgetc,(FILE *) (stream))
#define stg_putc(c,stream)	SAFESTGCALL2(I_,(void *, char, FILE *),fputc,(c),((FILE *) (stream)))
#endif

int initialize_virtual_timer(int us);
int install_segv_handler(STG_NO_ARGS);
int install_vtalrm_handler(STG_NO_ARGS);
void initUserSignals(STG_NO_ARGS);
void blockUserSignals(STG_NO_ARGS);
void unblockUserSignals(STG_NO_ARGS);
IF_RTS(void blockVtAlrmSignal(STG_NO_ARGS);)
IF_RTS(void unblockVtAlrmSignal(STG_NO_ARGS);)

#ifdef _POSIX_SOURCE
extern I_ sig_install PROTO((I_, I_, sigset_t *));
#define stg_sig_ignore(s,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,STG_SIG_IGN,(sigset_t *)m)
#define stg_sig_default(s,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,STG_SIG_DFL,(sigset_t *)m)
#define stg_sig_catch(s,sp,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,sp,(sigset_t *)m)
#else
extern I_ sig_install PROTO((I_, I_));
#define stg_sig_ignore(s,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,STG_SIG_IGN)
#define stg_sig_default(s,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,STG_SIG_DFL)
#define stg_sig_catch(s,sp,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,sp)
#endif

#define STG_SIG_DFL	(-1)
#define STG_SIG_IGN	(-2)
#define STG_SIG_ERR	(-3)

StgInt getErrorHandler(STG_NO_ARGS);
#ifndef PAR
void raiseError PROTO((StgStablePtr handler));
StgInt catchError PROTO((StgStablePtr newErrorHandler));
#endif
void decrementErrorCount(STG_NO_ARGS);

#define stg_catchError(sp)      SAFESTGCALL1(I_,(void *, StgStablePtr),catchError,sp)
#define stg_decrementErrorCount() SAFESTGCALL0(void,(void *),decrementErrorCount)
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-stable-ptr]{Primitive ops for manipulating stable pointers}
%*									*
%************************************************************************


The type of these should be:

\begin{verbatim}
makeStablePointer#  :: a -> State# _RealWorld -> StateAndStablePtr# _RealWorld a
deRefStablePointer# :: StablePtr# a -> State# _RealWorld -> StateAndPtr _RealWorld a
\end{verbatim}

Since world-tokens are no longer explicitly passed around, the
implementations have a few less arguments/results.

The simpler one is @deRefStablePointer#@ (which is only a primop
because it is more polymorphic than is allowed of a ccall).

\begin{code}
#ifdef PAR

#define deRefStablePtrZh(ri,sp)					    \
do {								    \
    fflush(stdout);						    \
    fprintf(stderr, "deRefStablePtr#: no stable pointer support.\n");\
    EXIT(EXIT_FAILURE);						    \
} while(0)

#else /* !PAR */

extern StgPtr _deRefStablePointer PROTO((StgInt, StgPtr));

#define deRefStablePtrZh(ri,sp) \
   ri = SAFESTGCALL2(I_,(void *, I_, P_),_deRefStablePointer,sp,StorageMgrInfo.StablePointerTable);

\end{code}

Declarations for other stable pointer operations.

\begin{code}
void	freeStablePointer	PROTO((I_ stablePtr));

void	enterStablePtr		PROTO((StgStablePtr, StgFunPtr));
void	performIO		PROTO((StgStablePtr));
I_	enterInt		PROTO((StgStablePtr));
I_	enterFloat		PROTO((StgStablePtr));
P_	deRefStablePointer	PROTO((StgStablePtr));
IF_RTS(I_ catchSoftHeapOverflow	PROTO((StgStablePtr, I_));)
IF_RTS(I_ getSoftHeapOverflowHandler(STG_NO_ARGS);)
IF_RTS(extern StgStablePtr softHeapOverflowHandler;)
IF_RTS(void shutdownHaskell(STG_NO_ARGS);)
IF_RTS(extern I_ noBlackHoles;)
IF_RTS(extern I_ SM_word_stk_size;)

EXTFUN(stopPerformIODirectReturn);
EXTFUN(startPerformIO);
EXTFUN(stopEnterIntDirectReturn);
EXTFUN(startEnterInt);
EXTFUN(stopEnterFloatDirectReturn);
EXTFUN(startEnterFloat);

void enterStablePtr PROTO((StgStablePtr stableIndex, StgFunPtr startCode));

#endif /* !PAR */

IF_RTS(extern I_ ErrorIO_call_count;)
\end{code}

Somewhat harder is @makeStablePointer#@ --- it is usually simple but
if we're unlucky, it will have to allocate a new table and copy the
old bit over.  Since we might, very occasionally, have to call the
garbage collector, this has to be a macro... sigh!

NB @newSP@ is required because it is entirely possible that
@stablePtr@ and @unstablePtr@ are aliases and so we can't do the
assignment to @stablePtr@ until we've finished with @unstablePtr@.

Another obscure piece of coding is the recalculation of the size of
the table.  We do this just in case Jim's threads decide they want to
context switch---in which case any stack-allocated variables may get
trashed.  (If only there was a special heap check which didn't
consider context switching...)

\begin{code}
#ifndef PAR

/* Calculate SP Table size from number of pointers */
#define SPTSizeFromNoPtrs( newNP ) (DYN_VHS + 1 + 2 * (newNP))

/* Calculate number of pointers in new table from number in old table:
   any strictly increasing expression will do here */
#define CalcNewNoSPtrs( i ) ((i)*2 + 100)

extern void enlargeSPTable PROTO((P_, P_));

#define makeStablePtrZh(stablePtr,liveness,unstablePtr)		    \
do {								    \
  EXTDATA_RO(StablePointerTable_info);				    \
  EXTDATA(UnusedSP);						    \
  StgStablePtr newSP;						    \
								    \
  if (SPT_EMPTY(StorageMgrInfo.StablePointerTable)) { /* free stack is empty */ \
    { /* Variables used before the heap check */		    \
      I_ OldNoPtrs = SPT_NoPTRS( StorageMgrInfo.StablePointerTable ); \
      I_ NewNoPtrs = CalcNewNoSPtrs( OldNoPtrs );		    \
      I_ NewSize = SPTSizeFromNoPtrs( NewNoPtrs );		    \
      HEAP_CHK(liveness, _FHS+NewSize, 0);			    \
    }								    \
    { /* Variables used after the heap check - same values */	    \
      I_ OldNoPtrs = SPT_NoPTRS( StorageMgrInfo.StablePointerTable ); \
      I_ NewNoPtrs = CalcNewNoSPtrs( OldNoPtrs );		    \
      I_ NewSize = SPTSizeFromNoPtrs( NewNoPtrs );		    \
      P_ SPTable = Hp + 1 - (_FHS + NewSize);			    \
								    \
      CC_ALLOC(CCC, _FHS+NewSize, SPT_K); /* cc prof */		    \
      SET_DYN_HDR(SPTable,StablePointerTable_info,CCC,NewSize,NewNoPtrs);\
      SAFESTGCALL2(void, (void *, P_, P_), enlargeSPTable, SPTable, StorageMgrInfo.StablePointerTable);	     \
      StorageMgrInfo.StablePointerTable = SPTable;		    \
    }								    \
  }								    \
								    \
  newSP = SPT_POP(StorageMgrInfo.StablePointerTable);		    \
  SPT_SPTR(StorageMgrInfo.StablePointerTable, newSP) = unstablePtr; \
  stablePtr = newSP;						    \
} while (0)

#else

#define makeStablePtrZh(stablePtr,liveness,unstablePtr)		    \
do {								    \
    fflush(stdout);						    \
    fprintf(stderr, "makeStablePtr#: no stable pointer support.\n");\
    EXIT(EXIT_FAILURE);						    \
} while(0)

#endif /* !PAR */
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-unsafePointerEquality]{Primitive `op' for breaking referential transparency}
%*									*
%************************************************************************

The type of this is @reallyUnsafePtrEquality :: a -> a -> Int#@ so we
can expect three parameters: the two arguments and a "register" to put
the result into.

Message to Will: This primop breaks referential transparency so badly
you might want to leave it out.	 On the other hand, if you hide it
away in an appropriate monad, it's perfectly safe. [ADR]

Note that this primop is non-deterministic: different results can be
obtained depending on just what the garbage collector (and code
optimiser??) has done.	However, we can guarantee that if two objects
are pointer-equal, they have the same denotation --- the converse most
certainly doesn't hold.

ToDo ADR: The degree of non-determinism could be greatly reduced by
following indirections.

\begin{code}
#define reallyUnsafePtrEqualityZh(r,a,b) r=((StgPtr)(a) == (StgPtr)(b))
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-parallel-primop]{Primitive `op' for sparking (etc)}
%*									*
%************************************************************************

Assuming local sparking in some form, we can now inline the spark request.

We build a doubly-linked list in the heap, so that we can handle FIFO
or LIFO scheduling as we please.

Anything with tag >= 0 is in WHNF, so we discard it.

\begin{code}
#ifdef CONCURRENT

ED_(Nil_closure);
ED_(True_closure);

#if defined(GRAN)
#define parZh(r,hp,node,rest)				\
	PARZh(r,hp,node,rest,0,0)

#define parAtZh(r,hp,node,where,identifier,rest)	\
	parATZh(r,hp,node,where,identifier,rest,1)

#define parAtForNowZh(r,hp,node,where,identifier,rest)	\
	parATZh(r,hp,node,where,identifier,rest,0)

#define parATZh(r,hp,node,where,identifier,rest,local)	\
{							\
  sparkq result;						\
  if (SHOULD_SPARK(node)) {				\
    result = NewSpark((P_)node,identifier,local);	\
    SAFESTGCALL3(void,(W_),GranSimSparkAt,result,where,identifier);	\
  } else if (do_qp_prof) {				\
    I_ tid = threadId++;				\
    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
  }							\
  r = (rest);						\
}

#define parLocalZh(r,hp,node,identifier,rest)		\
	PARZh(r,hp,node,rest,identifier,1)

#define parGlobalZh(r,hp,node,identifier,rest)		\
	PARZh(r,hp,node,rest,identifier,0)

#define PARZh(r,hp,node,rest,identifier,local)		\
{							\
  sparkq result;						\
  if (SHOULD_SPARK(node)) {				\
    result = NewSpark((P_)node,identifier,local);	\
    ADD_TO_SPARK_QUEUE(result);				\
    SAFESTGCALL2(void,(W_),GranSimSpark,local,(P_)node);	\
    /* context_switch = 1;  not needed any more -- HWL */					\
  } else if (do_qp_prof) {				\
    I_ tid = threadId++;				\
    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
  }							\
  r = (rest);						\
}

#else  /* !GRAN */

extern I_ required_thread_count;

#ifdef PAR
#define COUNT_SPARK     TSO_GLOBALSPARKS(CurrentTSO)++
#else
#define COUNT_SPARK
#endif

/* 
   Note that we must bump the required thread count NOW, rather
   than when the thread is actually created.  
 */

#define forkZh(r,liveness,node)				\
{							\
  while (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL]) \
    DO_YIELD((liveness << 1) | 1);			\
  COUNT_SPARK;						\
  if (SHOULD_SPARK(node)) {				\
    *PendingSparksTl[REQUIRED_POOL]++ = (P_)(node);	\
  } else if (DO_QP_PROF) {				\
    I_ tid = threadId++;				\
    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
  }							\
  required_thread_count++;				\
  context_switch = 1;					\
  r = 1; /* Should not be necessary */			\
}

#define parZh(r,node)					\
{							\
  COUNT_SPARK;						\
  if (SHOULD_SPARK(node) &&				\
   PendingSparksTl[ADVISORY_POOL] < PendingSparksLim[ADVISORY_POOL]) {	\
    *PendingSparksTl[ADVISORY_POOL]++ = (P_)(node);	\
  } else if (DO_QP_PROF) {				\
    I_ tid = threadId++;				\
    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
  }							\
  r = 1; /* Should not be necessary */			\
}

\end{code}

The following seq# code should only be used in unoptimized code.
Be warned: it's a potential bug-farm.

First we push two words on the B stack: the current value of RetReg 
(which may or may not be live), and a continuation snatched largely out
of thin air (it's a point within this code block).  Then we set RetReg
to the special polymorphic return code for seq, load up Node with the
closure to be evaluated, and we're off.  When the eval returns to the
polymorphic seq return point, the two words are popped off the B stack,
RetReg is restored, and we jump to the continuation, completing the
primop and going on our merry way.

\begin{code}

ED_RO_(vtbl_seq);

#define seqZh(r,liveness,node)		    \
  ({					    \
    __label__ cont;			    \
    STK_CHK(liveness,0,2,0,0,0,0);	    \
    SpB -= BREL(2);			    \
    SpB[BREL(0)] = (W_) RetReg;		    \
    SpB[BREL(1)] = (W_) &&cont;		    \
    RetReg = (StgRetAddr) vtbl_seq;	    \
    Node = node;			    \
    ENT_VIA_NODE();			    \
    InfoPtr = (D_)(INFO_PTR(Node));	    \
    JMP_(ENTRY_CODE(InfoPtr));		    \
    cont:				    \
    r = 1; /* Should be unnecessary */	    \
  })

#endif  /* GRAN */ 
#endif	/* CONCURRENT */
\end{code}

%************************************************************************
%*									*
\subsubsection[StgMacros-malloc-ptrs]{Malloc Pointers}
%*									*
%************************************************************************

This macro is used to construct a MallocPtr on the heap after a ccall.
Since MallocPtr's are like arrays in many ways, this is heavily based
on the stuff for arrays above.

What this does is plug the pointer (which will be in a local
variable), into a fresh heap object and then sets a result (which will
be a register) to point to the fresh heap object.

Question: what's this "SET_ACTIVITY" stuff - should I be doing this
too?  (It's if you want to use the SPAT profiling tools to
characterize program behavior by ``activity'' -- tail-calling,
heap-checking, etc. -- see RednCounts.lh.  It is quite specialized.
WDP 95/1)

\begin{code}
#ifndef PAR

StgInt eqMallocPtr PROTO((StgMallocPtr p1, StgMallocPtr p2));
void FreeMallocPtr PROTO((StgMallocPtr mp));

#define constructMallocPtr(liveness, r, mptr)		\
do {							\
  P_ result;						\
							\
  HEAP_CHK(liveness, _FHS + MallocPtr_SIZE,0);		\
  CC_ALLOC(CCC,_FHS + MallocPtr_SIZE,MallocPtr_K); /* cc prof */   \
								   \
  result = Hp + 1 - (_FHS + MallocPtr_SIZE);			   \
  SET_MallocPtr_HDR(result,MallocPtr_info,CCC,_FHS + MallocPtr_SIZE,0); \
  MallocPtr_CLOSURE_DATA(result) = mptr;                	   \
  MallocPtr_CLOSURE_LINK(result) = StorageMgrInfo.MallocPtrList;   \
  StorageMgrInfo.MallocPtrList = result;			   \
							\
/*							\
  printf("DEBUG: MallocPtr(0x%x) = <0x%x, 0x%x, 0x%x, 0x%x>\n",	\
      result,						\
      result[0],result[1],				\
      result[2],result[3]);				\
*/							\
  CHECK_MallocPtr_CLOSURE( result );			\
  VALIDATE_MallocPtrList( StorageMgrInfo.MallocPtrList ); \
							\
  (r) = (P_) result;					\
} while (0)

#else
#define constructMallocPtr(liveness, r, mptr)			    \
do {								    \
    fflush(stdout);						    \
    fprintf(stderr, "constructMallocPtr: no malloc pointer support.\n");\
    EXIT(EXIT_FAILURE);						    \
} while(0)

#endif /* !PAR */
\end{code}


End-of-file's multi-slurp protection:
\begin{code}
#endif /* ! STGMACROS_H */
\end{code}