1 /* -----------------------------------------------------------------------------
3 * (c) Lennart Augustsson
4 * (c) The GHC Team, 1998-2000
6 * Miscellaneous support for floating-point primitives
8 * ---------------------------------------------------------------------------*/
10 #include "PosixSource.h"
16 * Encoding and decoding Doubles. Code based on the HBC code
21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
23 #ifdef _LONG_LONG_LIMB
24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
26 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
30 #if SIZEOF_LIMB_T == 4
31 #define GMP_BASE 4294967296.0
32 #elif SIZEOF_LIMB_T == 8
33 #define GMP_BASE 18446744073709551616.0
35 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
38 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
39 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
41 #if IEEE_FLOATING_POINT
42 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
43 /* DMINEXP is defined in values.h on Linux (for example) */
44 #define DHIGHBIT 0x00100000
45 #define DMSBIT 0x80000000
47 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
48 #define FHIGHBIT 0x00800000
49 #define FMSBIT 0x80000000
52 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
60 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
63 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
66 const mp_limb_t *const arr = (const mp_limb_t *)ba;
69 /* Convert MP_INT to a double; knows a lot about internal rep! */
70 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
71 r = (r * GMP_BASE) + arr[i];
73 /* Now raise to the exponent */
74 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
77 /* sign is encoded in the size */
85 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
89 /* assuming 32 bit ints */
90 ASSERT(sizeof(int ) == 4 );
92 r = (StgDouble)((unsigned int)j_high);
93 r *= 4294967296.0; /* exp2f(32); */
94 r += (StgDouble)((unsigned int)j_low);
96 /* Now raise to the exponent */
97 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
100 /* sign is encoded in the size */
107 /* Special version for words */
109 __word_encodeDouble (W_ j, I_ e)
115 /* Now raise to the exponent */
116 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
122 /* Special version for small Integers */
124 __int_encodeDouble (I_ j, I_ e)
128 r = (StgDouble)__abs(j);
130 /* Now raise to the exponent */
131 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
134 /* sign is encoded in the size */
142 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
145 const mp_limb_t *arr = (const mp_limb_t *)ba;
148 /* Convert MP_INT to a float; knows a lot about internal rep! */
149 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
150 r = (r * GMP_BASE) + arr[i];
152 /* Now raise to the exponent */
153 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
156 /* sign is encoded in the size */
163 /* Special version for small Integers */
165 __int_encodeFloat (I_ j, I_ e)
169 r = (StgFloat)__abs(j);
171 /* Now raise to the exponent */
172 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
175 /* sign is encoded in the size */
182 /* Special version for small positive Integers */
184 __word_encodeFloat (W_ j, I_ e)
190 /* Now raise to the exponent */
191 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
197 /* This only supports IEEE floating point */
200 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
202 /* Do some bit fiddling on IEEE */
203 unsigned int low, high; /* assuming 32 bit ints */
205 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
207 ASSERT(sizeof(unsigned int ) == 4 );
208 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
209 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
210 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
212 u.d = dbl; /* grab chunks of the double */
216 /* we know the MP_INT* passed in has size zero, so we realloc
219 man->_mp_alloc = DNBIGIT;
221 if (low == 0 && (high & ~DMSBIT) == 0) {
225 man->_mp_size = DNBIGIT;
226 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
230 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
234 /* A denorm, normalize the mantissa */
235 while (! (high & DHIGHBIT)) {
245 man->_mp_d[0] = (mp_limb_t)low;
246 man->_mp_d[1] = (mp_limb_t)high;
249 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
251 #error Cannot cope with DNBIGIT
255 man->_mp_size = -man->_mp_size;
260 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
262 /* Do some bit fiddling on IEEE */
263 unsigned int low, high; /* assuming 32 bit ints */
265 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
267 ASSERT(sizeof(unsigned int ) == 4 );
268 ASSERT(sizeof(dbl ) == 8 );
269 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
271 u.d = dbl; /* grab chunks of the double */
275 if (low == 0 && (high & ~DMSBIT) == 0) {
280 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
284 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
288 /* A denorm, normalize the mantissa */
289 while (! (high & DHIGHBIT)) {
300 *man_sign = (sign < 0) ? -1 : 1;
305 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
307 /* Do some bit fiddling on IEEE */
308 int high, sign; /* assuming 32 bit ints */
309 union { float f; int i; } u; /* assuming 32 bit float and int */
311 ASSERT(sizeof(int ) == 4 );
312 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
313 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
314 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
316 u.f = flt; /* grab the float */
319 /* we know the MP_INT* passed in has size zero, so we realloc
322 man->_mp_alloc = FNBIGIT;
324 if ((high & ~FMSBIT) == 0) {
328 man->_mp_size = FNBIGIT;
329 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
333 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
337 /* A denorm, normalize the mantissa */
338 while (! (high & FHIGHBIT)) {
344 man->_mp_d[0] = (mp_limb_t)high;
346 #error Cannot cope with FNBIGIT
349 man->_mp_size = -man->_mp_size;
353 /* Convenient union types for checking the layout of IEEE 754 types -
354 based on defs in GNU libc <ieee754.h>
358 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
360 /* Do some bit fiddling on IEEE */
361 int high, sign; /* assuming 32 bit ints */
362 union { float f; int i; } u; /* assuming 32 bit float and int */
364 ASSERT(sizeof(int ) == 4 );
365 ASSERT(sizeof(flt ) == 4 );
366 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
368 u.f = flt; /* grab the float */
371 if ((high & ~FMSBIT) == 0) {
375 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
379 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
383 /* A denorm, normalize the mantissa */
384 while (! (high & FHIGHBIT)) {
395 union stg_ieee754_flt
401 unsigned int negative:1;
402 unsigned int exponent:8;
403 unsigned int mantissa:23;
405 unsigned int mantissa:23;
406 unsigned int exponent:8;
407 unsigned int negative:1;
413 unsigned int negative:1;
414 unsigned int exponent:8;
415 unsigned int quiet_nan:1;
416 unsigned int mantissa:22;
418 unsigned int mantissa:22;
419 unsigned int quiet_nan:1;
420 unsigned int exponent:8;
421 unsigned int negative:1;
428 To recap, here's the representation of a double precision
429 IEEE floating point number:
431 sign 63 sign bit (0==positive, 1==negative)
432 exponent 62-52 exponent (biased by 1023)
433 fraction 51-0 fraction (bits to right of binary point)
436 union stg_ieee754_dbl
442 unsigned int negative:1;
443 unsigned int exponent:11;
444 unsigned int mantissa0:20;
445 unsigned int mantissa1:32;
447 #if FLOAT_WORDS_BIGENDIAN
448 unsigned int mantissa0:20;
449 unsigned int exponent:11;
450 unsigned int negative:1;
451 unsigned int mantissa1:32;
453 unsigned int mantissa1:32;
454 unsigned int mantissa0:20;
455 unsigned int exponent:11;
456 unsigned int negative:1;
460 /* This format makes it easier to see if a NaN is a signalling NaN. */
464 unsigned int negative:1;
465 unsigned int exponent:11;
466 unsigned int quiet_nan:1;
467 unsigned int mantissa0:19;
468 unsigned int mantissa1:32;
470 #if FLOAT_WORDS_BIGENDIAN
471 unsigned int mantissa0:19;
472 unsigned int quiet_nan:1;
473 unsigned int exponent:11;
474 unsigned int negative:1;
475 unsigned int mantissa1:32;
477 unsigned int mantissa1:32;
478 unsigned int mantissa0:19;
479 unsigned int quiet_nan:1;
480 unsigned int exponent:11;
481 unsigned int negative:1;
488 * Predicates for testing for extreme IEEE fp values. Used
489 * by the bytecode evaluator and the Prelude.
493 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
494 #ifdef IEEE_FLOATING_POINT
497 isDoubleNaN(StgDouble d)
499 union stg_ieee754_dbl u;
504 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
505 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
506 /* and the mantissa non-zero? */
511 isDoubleInfinite(StgDouble d)
513 union stg_ieee754_dbl u;
517 /* Inf iff exponent is all ones, mantissa all zeros */
519 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
520 u.ieee.mantissa0 == 0 &&
521 u.ieee.mantissa1 == 0
526 isDoubleDenormalized(StgDouble d)
528 union stg_ieee754_dbl u;
532 /* A (single/double/quad) precision floating point number
535 - mantissa is non-zero.
536 - (don't care about setting of sign bit.)
540 u.ieee.exponent == 0 &&
541 (u.ieee.mantissa0 != 0 ||
542 u.ieee.mantissa1 != 0)
548 isDoubleNegativeZero(StgDouble d)
550 union stg_ieee754_dbl u;
553 /* sign (bit 63) set (only) => negative zero */
556 u.ieee.negative == 1 &&
557 u.ieee.exponent == 0 &&
558 u.ieee.mantissa0 == 0 &&
559 u.ieee.mantissa1 == 0);
562 /* Same tests, this time for StgFloats. */
565 To recap, here's the representation of a single precision
566 IEEE floating point number:
568 sign 31 sign bit (0 == positive, 1 == negative)
569 exponent 30-23 exponent (biased by 127)
570 fraction 22-0 fraction (bits to right of binary point)
575 isFloatNaN(StgFloat f)
577 union stg_ieee754_flt u;
580 /* Floating point NaN iff exponent is all ones, mantissa is
581 non-zero (but see below.) */
583 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
584 u.ieee.mantissa != 0);
588 isFloatInfinite(StgFloat f)
590 union stg_ieee754_flt u;
593 /* A float is Inf iff exponent is max (all ones),
594 and mantissa is min(all zeros.) */
596 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
597 u.ieee.mantissa == 0);
601 isFloatDenormalized(StgFloat f)
603 union stg_ieee754_flt u;
606 /* A (single/double/quad) precision floating point number
609 - mantissa is non-zero.
610 - (don't care about setting of sign bit.)
614 u.ieee.exponent == 0 &&
615 u.ieee.mantissa != 0);
619 isFloatNegativeZero(StgFloat f)
621 union stg_ieee754_flt u;
624 /* sign (bit 31) set (only) => negative zero */
627 u.ieee.exponent == 0 &&
628 u.ieee.mantissa == 0);
631 #else /* ! IEEE_FLOATING_POINT */
633 /* Dummy definitions of predicates - they all return false */
634 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
635 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
636 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
637 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
638 StgInt isFloatNaN(f) StgFloat f; { return 0; }
639 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
640 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
641 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
643 #endif /* ! IEEE_FLOATING_POINT */