1 /* -----------------------------------------------------------------------------
2 * $Id: StgPrimFloat.c,v 1.7 2001/07/23 23:29:47 ken Exp $
4 * (c) The GHC Team, 1998-2000
6 * Miscellaneous support for floating-point primitives
8 * ---------------------------------------------------------------------------*/
13 * Encoding and decoding Doubles. Code based on the HBC code
18 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
20 #ifdef _LONG_LONG_LIMB
21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
27 #if SIZEOF_LIMB_T == 4
28 #define GMP_BASE 4294967296.0
29 #elif SIZEOF_LIMB_T == 8
30 #define GMP_BASE 18446744073709551616.0
32 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
35 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
36 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
38 #if IEEE_FLOATING_POINT
39 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
40 /* DMINEXP is defined in values.h on Linux (for example) */
41 #define DHIGHBIT 0x00100000
42 #define DMSBIT 0x80000000
44 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
45 #define FHIGHBIT 0x00800000
46 #define FMSBIT 0x80000000
49 #ifdef WORDS_BIGENDIAN
57 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
60 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
63 const mp_limb_t *const arr = (const mp_limb_t *)ba;
66 /* Convert MP_INT to a double; knows a lot about internal rep! */
67 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
68 r = (r * GMP_BASE) + arr[i];
70 /* Now raise to the exponent */
71 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
74 /* sign is encoded in the size */
81 /* Special version for small Integers */
83 __int_encodeDouble (I_ j, I_ e)
87 r = (StgDouble)__abs(j);
89 /* Now raise to the exponent */
90 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
93 /* sign is encoded in the size */
101 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
104 const mp_limb_t *arr = (const mp_limb_t *)ba;
107 /* Convert MP_INT to a float; knows a lot about internal rep! */
108 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
109 r = (r * GMP_BASE) + arr[i];
111 /* Now raise to the exponent */
112 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
115 /* sign is encoded in the size */
122 /* Special version for small Integers */
124 __int_encodeFloat (I_ j, I_ e)
128 r = (StgFloat)__abs(j);
130 /* Now raise to the exponent */
131 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
134 /* sign is encoded in the size */
141 /* This only supports IEEE floating point */
144 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
146 /* Do some bit fiddling on IEEE */
147 unsigned int low, high; /* assuming 32 bit ints */
149 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
151 ASSERT(sizeof(unsigned int ) == 4 );
152 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
153 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
154 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
156 u.d = dbl; /* grab chunks of the double */
160 /* we know the MP_INT* passed in has size zero, so we realloc
163 man->_mp_alloc = DNBIGIT;
165 if (low == 0 && (high & ~DMSBIT) == 0) {
169 man->_mp_size = DNBIGIT;
170 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
174 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
178 /* A denorm, normalize the mantissa */
179 while (! (high & DHIGHBIT)) {
189 man->_mp_d[0] = (mp_limb_t)low;
190 man->_mp_d[1] = (mp_limb_t)high;
193 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
195 #error Cannot cope with DNBIGIT
199 man->_mp_size = -man->_mp_size;
204 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
206 /* Do some bit fiddling on IEEE */
207 int high, sign; /* assuming 32 bit ints */
208 union { float f; int i; } u; /* assuming 32 bit float and int */
210 ASSERT(sizeof(int ) == 4 );
211 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
212 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
213 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
215 u.f = flt; /* grab the float */
218 /* we know the MP_INT* passed in has size zero, so we realloc
221 man->_mp_alloc = FNBIGIT;
223 if ((high & ~FMSBIT) == 0) {
227 man->_mp_size = FNBIGIT;
228 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
232 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
236 /* A denorm, normalize the mantissa */
237 while (! (high & FHIGHBIT)) {
243 man->_mp_d[0] = (mp_limb_t)high;
245 #error Cannot cope with FNBIGIT
248 man->_mp_size = -man->_mp_size;
252 /* Convenient union types for checking the layout of IEEE 754 types -
253 based on defs in GNU libc <ieee754.h>
256 union stg_ieee754_flt
262 unsigned int negative:1;
263 unsigned int exponent:8;
264 unsigned int mantissa:23;
266 unsigned int mantissa:23;
267 unsigned int exponent:8;
268 unsigned int negative:1;
274 unsigned int negative:1;
275 unsigned int exponent:8;
276 unsigned int quiet_nan:1;
277 unsigned int mantissa:22;
279 unsigned int mantissa:22;
280 unsigned int quiet_nan:1;
281 unsigned int exponent:8;
282 unsigned int negative:1;
289 To recap, here's the representation of a double precision
290 IEEE floating point number:
292 sign 63 sign bit (0==positive, 1==negative)
293 exponent 62-52 exponent (biased by 1023)
294 fraction 51-0 fraction (bits to right of binary point)
297 union stg_ieee754_dbl
303 unsigned int negative:1;
304 unsigned int exponent:11;
305 unsigned int mantissa0:20;
306 unsigned int mantissa1:32;
308 unsigned int mantissa1:32;
309 unsigned int mantissa0:20;
310 unsigned int exponent:11;
311 unsigned int negative:1;
314 /* This format makes it easier to see if a NaN is a signalling NaN. */
318 unsigned int negative:1;
319 unsigned int exponent:11;
320 unsigned int quiet_nan:1;
321 unsigned int mantissa0:19;
322 unsigned int mantissa1:32;
324 unsigned int mantissa1:32;
325 unsigned int mantissa0:19;
326 unsigned int quiet_nan:1;
327 unsigned int exponent:11;
328 unsigned int negative:1;
334 * Predicates for testing for extreme IEEE fp values. Used
335 * by the bytecode evaluator and the Prelude.
339 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
340 #ifdef IEEE_FLOATING_POINT
343 isDoubleNaN(StgDouble d)
345 union stg_ieee754_dbl u;
350 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
351 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
352 /* and the mantissa non-zero? */
357 isDoubleInfinite(StgDouble d)
359 union stg_ieee754_dbl u;
363 /* Inf iff exponent is all ones, mantissa all zeros */
365 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
366 u.ieee.mantissa0 == 0 &&
367 u.ieee.mantissa1 == 0
372 isDoubleDenormalized(StgDouble d)
374 union stg_ieee754_dbl u;
378 /* A (single/double/quad) precision floating point number
381 - mantissa is non-zero.
382 - (don't care about setting of sign bit.)
386 u.ieee.exponent == 0 &&
387 (u.ieee.mantissa0 != 0 ||
388 u.ieee.mantissa1 != 0)
394 isDoubleNegativeZero(StgDouble d)
396 union stg_ieee754_dbl u;
399 /* sign (bit 63) set (only) => negative zero */
402 u.ieee.negative == 1 &&
403 u.ieee.exponent == 0 &&
404 u.ieee.mantissa0 == 0 &&
405 u.ieee.mantissa1 == 0);
408 /* Same tests, this time for StgFloats. */
411 To recap, here's the representation of a single precision
412 IEEE floating point number:
414 sign 31 sign bit (0 == positive, 1 == negative)
415 exponent 30-23 exponent (biased by 127)
416 fraction 22-0 fraction (bits to right of binary point)
421 isFloatNaN(StgFloat f)
423 union stg_ieee754_flt u;
426 /* Floating point NaN iff exponent is all ones, mantissa is
427 non-zero (but see below.) */
429 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
430 u.ieee.mantissa != 0);
434 isFloatInfinite(StgFloat f)
436 union stg_ieee754_flt u;
439 /* A float is Inf iff exponent is max (all ones),
440 and mantissa is min(all zeros.) */
442 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
443 u.ieee.mantissa == 0);
447 isFloatDenormalized(StgFloat f)
449 union stg_ieee754_flt u;
452 /* A (single/double/quad) precision floating point number
455 - mantissa is non-zero.
456 - (don't care about setting of sign bit.)
460 u.ieee.exponent == 0 &&
461 u.ieee.mantissa != 0);
465 isFloatNegativeZero(StgFloat f)
467 union stg_ieee754_flt u;
470 /* sign (bit 31) set (only) => negative zero */
473 u.ieee.exponent == 0 &&
474 u.ieee.mantissa == 0);
477 #else /* ! IEEE_FLOATING_POINT */
479 /* Dummy definitions of predicates - they all return false */
480 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
481 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
482 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
483 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
484 StgInt isFloatNaN(f) StgFloat f; { return 0; }
485 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
486 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
487 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
489 #endif /* ! IEEE_FLOATING_POINT */