1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2000
5 * Miscellaneous support for floating-point primitives
7 * ---------------------------------------------------------------------------*/
9 #include "PosixSource.h"
15 * Encoding and decoding Doubles. Code based on the HBC code
20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
22 #ifdef _LONG_LONG_LIMB
23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
29 #if SIZEOF_LIMB_T == 4
30 #define GMP_BASE 4294967296.0
31 #elif SIZEOF_LIMB_T == 8
32 #define GMP_BASE 18446744073709551616.0
34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
37 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
38 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
40 #if IEEE_FLOATING_POINT
41 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
42 /* DMINEXP is defined in values.h on Linux (for example) */
43 #define DHIGHBIT 0x00100000
44 #define DMSBIT 0x80000000
46 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
47 #define FHIGHBIT 0x00800000
48 #define FMSBIT 0x80000000
51 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
59 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
65 const mp_limb_t *const arr = (const mp_limb_t *)ba;
68 /* Convert MP_INT to a double; knows a lot about internal rep! */
69 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
70 r = (r * GMP_BASE) + arr[i];
72 /* Now raise to the exponent */
73 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
76 /* sign is encoded in the size */
84 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
88 /* assuming 32 bit ints */
89 ASSERT(sizeof(int ) == 4 );
91 r = (StgDouble)((unsigned int)j_high);
92 r *= 4294967296.0; /* exp2f(32); */
93 r += (StgDouble)((unsigned int)j_low);
95 /* Now raise to the exponent */
96 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
99 /* sign is encoded in the size */
106 /* Special version for words */
108 __word_encodeDouble (W_ j, I_ e)
114 /* Now raise to the exponent */
115 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
121 /* Special version for small Integers */
123 __int_encodeDouble (I_ j, I_ e)
127 r = (StgDouble)__abs(j);
129 /* Now raise to the exponent */
130 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
133 /* sign is encoded in the size */
141 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
144 const mp_limb_t *arr = (const mp_limb_t *)ba;
147 /* Convert MP_INT to a float; knows a lot about internal rep! */
148 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
149 r = (r * GMP_BASE) + arr[i];
151 /* Now raise to the exponent */
152 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
155 /* sign is encoded in the size */
162 /* Special version for small Integers */
164 __int_encodeFloat (I_ j, I_ e)
168 r = (StgFloat)__abs(j);
170 /* Now raise to the exponent */
171 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
174 /* sign is encoded in the size */
181 /* Special version for small positive Integers */
183 __word_encodeFloat (W_ j, I_ e)
189 /* Now raise to the exponent */
190 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
196 /* This only supports IEEE floating point */
199 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
201 /* Do some bit fiddling on IEEE */
202 unsigned int low, high; /* assuming 32 bit ints */
204 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
206 ASSERT(sizeof(unsigned int ) == 4 );
207 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
208 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
209 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
211 u.d = dbl; /* grab chunks of the double */
215 /* we know the MP_INT* passed in has size zero, so we realloc
218 man->_mp_alloc = DNBIGIT;
220 if (low == 0 && (high & ~DMSBIT) == 0) {
224 man->_mp_size = DNBIGIT;
225 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
229 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
233 /* A denorm, normalize the mantissa */
234 while (! (high & DHIGHBIT)) {
244 man->_mp_d[0] = (mp_limb_t)low;
245 man->_mp_d[1] = (mp_limb_t)high;
248 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
250 #error Cannot cope with DNBIGIT
254 man->_mp_size = -man->_mp_size;
259 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
261 /* Do some bit fiddling on IEEE */
262 unsigned int low, high; /* assuming 32 bit ints */
264 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
266 ASSERT(sizeof(unsigned int ) == 4 );
267 ASSERT(sizeof(dbl ) == 8 );
268 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
270 u.d = dbl; /* grab chunks of the double */
274 if (low == 0 && (high & ~DMSBIT) == 0) {
279 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
283 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
287 /* A denorm, normalize the mantissa */
288 while (! (high & DHIGHBIT)) {
299 *man_sign = (sign < 0) ? -1 : 1;
304 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
306 /* Do some bit fiddling on IEEE */
307 int high, sign; /* assuming 32 bit ints */
308 union { float f; int i; } u; /* assuming 32 bit float and int */
310 ASSERT(sizeof(int ) == 4 );
311 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
312 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
313 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
315 u.f = flt; /* grab the float */
318 /* we know the MP_INT* passed in has size zero, so we realloc
321 man->_mp_alloc = FNBIGIT;
323 if ((high & ~FMSBIT) == 0) {
327 man->_mp_size = FNBIGIT;
328 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
332 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
336 /* A denorm, normalize the mantissa */
337 while (! (high & FHIGHBIT)) {
343 man->_mp_d[0] = (mp_limb_t)high;
345 #error Cannot cope with FNBIGIT
348 man->_mp_size = -man->_mp_size;
352 /* Convenient union types for checking the layout of IEEE 754 types -
353 based on defs in GNU libc <ieee754.h>
357 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
359 /* Do some bit fiddling on IEEE */
360 int high, sign; /* assuming 32 bit ints */
361 union { float f; int i; } u; /* assuming 32 bit float and int */
363 ASSERT(sizeof(int ) == 4 );
364 ASSERT(sizeof(flt ) == 4 );
365 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
367 u.f = flt; /* grab the float */
370 if ((high & ~FMSBIT) == 0) {
374 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
378 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
382 /* A denorm, normalize the mantissa */
383 while (! (high & FHIGHBIT)) {
394 union stg_ieee754_flt
400 unsigned int negative:1;
401 unsigned int exponent:8;
402 unsigned int mantissa:23;
404 unsigned int mantissa:23;
405 unsigned int exponent:8;
406 unsigned int negative:1;
412 unsigned int negative:1;
413 unsigned int exponent:8;
414 unsigned int quiet_nan:1;
415 unsigned int mantissa:22;
417 unsigned int mantissa:22;
418 unsigned int quiet_nan:1;
419 unsigned int exponent:8;
420 unsigned int negative:1;
427 To recap, here's the representation of a double precision
428 IEEE floating point number:
430 sign 63 sign bit (0==positive, 1==negative)
431 exponent 62-52 exponent (biased by 1023)
432 fraction 51-0 fraction (bits to right of binary point)
435 union stg_ieee754_dbl
441 unsigned int negative:1;
442 unsigned int exponent:11;
443 unsigned int mantissa0:20;
444 unsigned int mantissa1:32;
446 #if FLOAT_WORDS_BIGENDIAN
447 unsigned int mantissa0:20;
448 unsigned int exponent:11;
449 unsigned int negative:1;
450 unsigned int mantissa1:32;
452 unsigned int mantissa1:32;
453 unsigned int mantissa0:20;
454 unsigned int exponent:11;
455 unsigned int negative:1;
459 /* This format makes it easier to see if a NaN is a signalling NaN. */
463 unsigned int negative:1;
464 unsigned int exponent:11;
465 unsigned int quiet_nan:1;
466 unsigned int mantissa0:19;
467 unsigned int mantissa1:32;
469 #if FLOAT_WORDS_BIGENDIAN
470 unsigned int mantissa0:19;
471 unsigned int quiet_nan:1;
472 unsigned int exponent:11;
473 unsigned int negative:1;
474 unsigned int mantissa1:32;
476 unsigned int mantissa1:32;
477 unsigned int mantissa0:19;
478 unsigned int quiet_nan:1;
479 unsigned int exponent:11;
480 unsigned int negative:1;
487 * Predicates for testing for extreme IEEE fp values. Used
488 * by the bytecode evaluator and the Prelude.
492 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
493 #ifdef IEEE_FLOATING_POINT
496 isDoubleNaN(StgDouble d)
498 union stg_ieee754_dbl u;
503 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
504 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
505 /* and the mantissa non-zero? */
510 isDoubleInfinite(StgDouble d)
512 union stg_ieee754_dbl u;
516 /* Inf iff exponent is all ones, mantissa all zeros */
518 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
519 u.ieee.mantissa0 == 0 &&
520 u.ieee.mantissa1 == 0
525 isDoubleDenormalized(StgDouble d)
527 union stg_ieee754_dbl u;
531 /* A (single/double/quad) precision floating point number
534 - mantissa is non-zero.
535 - (don't care about setting of sign bit.)
539 u.ieee.exponent == 0 &&
540 (u.ieee.mantissa0 != 0 ||
541 u.ieee.mantissa1 != 0)
547 isDoubleNegativeZero(StgDouble d)
549 union stg_ieee754_dbl u;
552 /* sign (bit 63) set (only) => negative zero */
555 u.ieee.negative == 1 &&
556 u.ieee.exponent == 0 &&
557 u.ieee.mantissa0 == 0 &&
558 u.ieee.mantissa1 == 0);
561 /* Same tests, this time for StgFloats. */
564 To recap, here's the representation of a single precision
565 IEEE floating point number:
567 sign 31 sign bit (0 == positive, 1 == negative)
568 exponent 30-23 exponent (biased by 127)
569 fraction 22-0 fraction (bits to right of binary point)
574 isFloatNaN(StgFloat f)
576 union stg_ieee754_flt u;
579 /* Floating point NaN iff exponent is all ones, mantissa is
580 non-zero (but see below.) */
582 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
583 u.ieee.mantissa != 0);
587 isFloatInfinite(StgFloat f)
589 union stg_ieee754_flt u;
592 /* A float is Inf iff exponent is max (all ones),
593 and mantissa is min(all zeros.) */
595 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
596 u.ieee.mantissa == 0);
600 isFloatDenormalized(StgFloat f)
602 union stg_ieee754_flt u;
605 /* A (single/double/quad) precision floating point number
608 - mantissa is non-zero.
609 - (don't care about setting of sign bit.)
613 u.ieee.exponent == 0 &&
614 u.ieee.mantissa != 0);
618 isFloatNegativeZero(StgFloat f)
620 union stg_ieee754_flt u;
623 /* sign (bit 31) set (only) => negative zero */
626 u.ieee.exponent == 0 &&
627 u.ieee.mantissa == 0);
630 #else /* ! IEEE_FLOATING_POINT */
632 /* Dummy definitions of predicates - they all return false */
633 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
634 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
635 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
636 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
637 StgInt isFloatNaN(f) StgFloat f; { return 0; }
638 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
639 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
640 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
642 #endif /* ! IEEE_FLOATING_POINT */