rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2000
   4  *
   5  * Miscellaneous support for floating-point primitives
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11
  12 #include <math.h>
  13
  14 /*
  15  * Encoding and decoding Doubles.  Code based on the HBC code
  16  * (lib/fltcode.c).
  17  */
  18
  19 #ifdef _SHORT_LIMB
  20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  21 #else
  22 #ifdef _LONG_LONG_LIMB
  23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  24 #else
  25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  26 #endif
  27 #endif
  28
  29 #if SIZEOF_LIMB_T == 4
  30 #define GMP_BASE 4294967296.0
  31 #elif SIZEOF_LIMB_T == 8
  32 #define GMP_BASE 18446744073709551616.0
  33 #else
  34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  35 #endif
  36
  37 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  38 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39
  40 #if IEEE_FLOATING_POINT
  41 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  42 /* DMINEXP is defined in values.h on Linux (for example) */
  43 #define DHIGHBIT 0x00100000
  44 #define DMSBIT   0x80000000
  45
  46 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  47 #define FHIGHBIT 0x00800000
  48 #define FMSBIT   0x80000000
  49 #endif
  50
  51 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
  52 #define L 1
  53 #define H 0
  54 #else
  55 #define L 0
  56 #define H 1
  57 #endif
  58
  59 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  60
  61 StgDouble
  62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  63 {
  64     StgDouble r;
  65     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  66     I_ i;
  67
  68     /* Convert MP_INT to a double; knows a lot about internal rep! */
  69     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  70         r = (r * GMP_BASE) + arr[i];
  71
  72     /* Now raise to the exponent */
  73     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  74         r = ldexp(r, e);
  75
  76     /* sign is encoded in the size */
  77     if (size < 0)
  78         r = -r;
  79
  80     return r;
  81 }
  82
  83 StgDouble
  84 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
  85 {
  86   StgDouble r;
  87
  88   /* assuming 32 bit ints */
  89   ASSERT(sizeof(int          ) == 4            );
  90
  91   r = (StgDouble)((unsigned int)j_high);
  92   r *= 4294967296.0; /* exp2f(32); */
  93   r += (StgDouble)((unsigned int)j_low);
  94
  95   /* Now raise to the exponent */
  96   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  97     r = ldexp(r, e);
  98
  99   /* sign is encoded in the size */
 100   if (j_high < 0)
 101     r = -r;
 102
 103   return r;
 104 }
 105
 106 /* Special version for words */
 107 StgDouble
 108 __word_encodeDouble (W_ j, I_ e)
 109 {
 110   StgDouble r;
 111
 112   r = (StgDouble)j;
 113
 114   /* Now raise to the exponent */
 115   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 116     r = ldexp(r, e);
 117
 118   return r;
 119 }
 120
 121 /* Special version for small Integers */
 122 StgDouble
 123 __int_encodeDouble (I_ j, I_ e)
 124 {
 125   StgDouble r;
 126
 127   r = (StgDouble)__abs(j);
 128
 129   /* Now raise to the exponent */
 130   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 131     r = ldexp(r, e);
 132
 133   /* sign is encoded in the size */
 134   if (j < 0)
 135     r = -r;
 136
 137   return r;
 138 }
 139
 140 StgFloat
 141 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 142 {
 143     StgFloat r;
 144     const mp_limb_t *arr = (const mp_limb_t *)ba;
 145     I_ i;
 146
 147     /* Convert MP_INT to a float; knows a lot about internal rep! */
 148     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 149         r = (r * GMP_BASE) + arr[i];
 150
 151     /* Now raise to the exponent */
 152     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 153         r = ldexp(r, e);
 154
 155     /* sign is encoded in the size */
 156     if (size < 0)
 157         r = -r;
 158
 159     return r;
 160 }
 161
 162 /* Special version for small Integers */
 163 StgFloat
 164 __int_encodeFloat (I_ j, I_ e)
 165 {
 166   StgFloat r;
 167
 168   r = (StgFloat)__abs(j);
 169
 170   /* Now raise to the exponent */
 171   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 172     r = ldexp(r, e);
 173
 174   /* sign is encoded in the size */
 175   if (j < 0)
 176     r = -r;
 177
 178   return r;
 179 }
 180
 181 /* Special version for small positive Integers */
 182 StgFloat
 183 __word_encodeFloat (W_ j, I_ e)
 184 {
 185   StgFloat r;
 186
 187   r = (StgFloat)j;
 188
 189   /* Now raise to the exponent */
 190   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 191     r = ldexp(r, e);
 192
 193   return r;
 194 }
 195
 196 /* This only supports IEEE floating point */
 197
 198 void
 199 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 200 {
 201     /* Do some bit fiddling on IEEE */
 202     unsigned int low, high;             /* assuming 32 bit ints */
 203     int sign, iexp;
 204     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 205
 206     ASSERT(sizeof(unsigned int ) == 4            );
 207     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 208     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 209     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 210
 211     u.d = dbl;      /* grab chunks of the double */
 212     low = u.i[L];
 213     high = u.i[H];
 214
 215     /* we know the MP_INT* passed in has size zero, so we realloc
 216         no matter what.
 217     */
 218     man->_mp_alloc = DNBIGIT;
 219
 220     if (low == 0 && (high & ~DMSBIT) == 0) {
 221         man->_mp_size = 0;
 222         *exp = 0L;
 223     } else {
 224         man->_mp_size = DNBIGIT;
 225         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 226         sign = high;
 227
 228         high &= DHIGHBIT-1;
 229         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 230             high |= DHIGHBIT;
 231         else {
 232             iexp++;
 233             /* A denorm, normalize the mantissa */
 234             while (! (high & DHIGHBIT)) {
 235                 high <<= 1;
 236                 if (low & DMSBIT)
 237                     high++;
 238                 low <<= 1;
 239                 iexp--;
 240             }
 241         }
 242         *exp = (I_) iexp;
 243 #if DNBIGIT == 2
 244         man->_mp_d[0] = (mp_limb_t)low;
 245         man->_mp_d[1] = (mp_limb_t)high;
 246 #else
 247 #if DNBIGIT == 1
 248         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 249 #else
 250 #error Cannot cope with DNBIGIT
 251 #endif
 252 #endif
 253         if (sign < 0)
 254             man->_mp_size = -man->_mp_size;
 255     }
 256 }
 257
 258 void
 259 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
 260 {
 261     /* Do some bit fiddling on IEEE */
 262     unsigned int low, high;             /* assuming 32 bit ints */
 263     int sign, iexp;
 264     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 265
 266     ASSERT(sizeof(unsigned int ) == 4            );
 267     ASSERT(sizeof(dbl          ) == 8            );
 268     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 269
 270     u.d = dbl;      /* grab chunks of the double */
 271     low = u.i[L];
 272     high = u.i[H];
 273
 274     if (low == 0 && (high & ~DMSBIT) == 0) {
 275         *man_low = 0;
 276         *man_high = 0;
 277         *exp = 0L;
 278     } else {
 279         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 280         sign = high;
 281
 282         high &= DHIGHBIT-1;
 283         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 284             high |= DHIGHBIT;
 285         else {
 286             iexp++;
 287             /* A denorm, normalize the mantissa */
 288             while (! (high & DHIGHBIT)) {
 289                 high <<= 1;
 290                 if (low & DMSBIT)
 291                     high++;
 292                 low <<= 1;
 293                 iexp--;
 294             }
 295         }
 296         *exp = (I_) iexp;
 297         *man_low = low;
 298         *man_high = high;
 299         *man_sign = (sign < 0) ? -1 : 1;
 300     }
 301 }
 302
 303 void
 304 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 305 {
 306     /* Do some bit fiddling on IEEE */
 307     int high, sign;                 /* assuming 32 bit ints */
 308     union { float f; int i; } u;    /* assuming 32 bit float and int */
 309
 310     ASSERT(sizeof(int          ) == 4            );
 311     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 312     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 313     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 314
 315     u.f = flt;      /* grab the float */
 316     high = u.i;
 317
 318     /* we know the MP_INT* passed in has size zero, so we realloc
 319         no matter what.
 320     */
 321     man->_mp_alloc = FNBIGIT;
 322
 323     if ((high & ~FMSBIT) == 0) {
 324         man->_mp_size = 0;
 325         *exp = 0;
 326     } else {
 327         man->_mp_size = FNBIGIT;
 328         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 329         sign = high;
 330
 331         high &= FHIGHBIT-1;
 332         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 333             high |= FHIGHBIT;
 334         else {
 335             (*exp)++;
 336             /* A denorm, normalize the mantissa */
 337             while (! (high & FHIGHBIT)) {
 338                 high <<= 1;
 339                 (*exp)--;
 340             }
 341         }
 342 #if FNBIGIT == 1
 343         man->_mp_d[0] = (mp_limb_t)high;
 344 #else
 345 #error Cannot cope with FNBIGIT
 346 #endif
 347         if (sign < 0)
 348             man->_mp_size = -man->_mp_size;
 349     }
 350 }
 351
 352 /* Convenient union types for checking the layout of IEEE 754 types -
 353    based on defs in GNU libc <ieee754.h>
 354 */
 355
 356 void
 357 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
 358 {
 359     /* Do some bit fiddling on IEEE */
 360     int high, sign;                 /* assuming 32 bit ints */
 361     union { float f; int i; } u;    /* assuming 32 bit float and int */
 362
 363     ASSERT(sizeof(int          ) == 4            );
 364     ASSERT(sizeof(flt          ) == 4            );
 365     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 366
 367     u.f = flt;      /* grab the float */
 368     high = u.i;
 369
 370     if ((high & ~FMSBIT) == 0) {
 371         *man = 0;
 372         *exp = 0;
 373     } else {
 374         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 375         sign = high;
 376
 377         high &= FHIGHBIT-1;
 378         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 379             high |= FHIGHBIT;
 380         else {
 381             (*exp)++;
 382             /* A denorm, normalize the mantissa */
 383             while (! (high & FHIGHBIT)) {
 384                 high <<= 1;
 385                 (*exp)--;
 386             }
 387         }
 388         *man = high;
 389         if (sign < 0)
 390             *man = - *man;
 391     }
 392 }
 393
 394 union stg_ieee754_flt
 395 {
 396    float f;
 397    struct {
 398
 399 #if WORDS_BIGENDIAN
 400         unsigned int negative:1;
 401         unsigned int exponent:8;
 402         unsigned int mantissa:23;
 403 #else
 404         unsigned int mantissa:23;
 405         unsigned int exponent:8;
 406         unsigned int negative:1;
 407 #endif
 408    } ieee;
 409    struct {
 410
 411 #if WORDS_BIGENDIAN
 412         unsigned int negative:1;
 413         unsigned int exponent:8;
 414         unsigned int quiet_nan:1;
 415         unsigned int mantissa:22;
 416 #else
 417         unsigned int mantissa:22;
 418         unsigned int quiet_nan:1;
 419         unsigned int exponent:8;
 420         unsigned int negative:1;
 421 #endif
 422    } ieee_nan;
 423 };
 424
 425 /*
 426
 427  To recap, here's the representation of a double precision
 428  IEEE floating point number:
 429
 430  sign         63           sign bit (0==positive, 1==negative)
 431  exponent     62-52        exponent (biased by 1023)
 432  fraction     51-0         fraction (bits to right of binary point)
 433 */
 434
 435 union stg_ieee754_dbl
 436 {
 437    double d;
 438    struct {
 439
 440 #if WORDS_BIGENDIAN
 441         unsigned int negative:1;
 442         unsigned int exponent:11;
 443         unsigned int mantissa0:20;
 444         unsigned int mantissa1:32;
 445 #else
 446 #if FLOAT_WORDS_BIGENDIAN
 447         unsigned int mantissa0:20;
 448         unsigned int exponent:11;
 449         unsigned int negative:1;
 450         unsigned int mantissa1:32;
 451 #else
 452         unsigned int mantissa1:32;
 453         unsigned int mantissa0:20;
 454         unsigned int exponent:11;
 455         unsigned int negative:1;
 456 #endif
 457 #endif
 458    } ieee;
 459     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 460    struct {
 461
 462 #if WORDS_BIGENDIAN
 463         unsigned int negative:1;
 464         unsigned int exponent:11;
 465         unsigned int quiet_nan:1;
 466         unsigned int mantissa0:19;
 467         unsigned int mantissa1:32;
 468 #else
 469 #if FLOAT_WORDS_BIGENDIAN
 470         unsigned int mantissa0:19;
 471         unsigned int quiet_nan:1;
 472         unsigned int exponent:11;
 473         unsigned int negative:1;
 474         unsigned int mantissa1:32;
 475 #else
 476         unsigned int mantissa1:32;
 477         unsigned int mantissa0:19;
 478         unsigned int quiet_nan:1;
 479         unsigned int exponent:11;
 480         unsigned int negative:1;
 481 #endif
 482 #endif
 483    } ieee_nan;
 484 };
 485
 486 /*
 487  * Predicates for testing for extreme IEEE fp values. Used
 488  * by the bytecode evaluator and the Prelude.
 489  *
 490  */
 491
 492 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 493 #ifdef IEEE_FLOATING_POINT
 494
 495 StgInt
 496 isDoubleNaN(StgDouble d)
 497 {
 498   union stg_ieee754_dbl u;
 499
 500   u.d = d;
 501
 502   return (
 503     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 504     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 505         /* and the mantissa non-zero? */
 506     );
 507 }
 508
 509 StgInt
 510 isDoubleInfinite(StgDouble d)
 511 {
 512     union stg_ieee754_dbl u;
 513
 514     u.d = d;
 515
 516     /* Inf iff exponent is all ones, mantissa all zeros */
 517     return (
 518         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 519         u.ieee.mantissa0 == 0                   &&
 520         u.ieee.mantissa1 == 0
 521       );
 522 }
 523
 524 StgInt
 525 isDoubleDenormalized(StgDouble d)
 526 {
 527     union stg_ieee754_dbl u;
 528
 529     u.d = d;
 530
 531     /* A (single/double/quad) precision floating point number
 532        is denormalised iff:
 533         - exponent is zero
 534         - mantissa is non-zero.
 535         - (don't care about setting of sign bit.)
 536
 537     */
 538     return (
 539         u.ieee.exponent  == 0 &&
 540         (u.ieee.mantissa0 != 0 ||
 541          u.ieee.mantissa1 != 0)
 542       );
 543
 544 }
 545
 546 StgInt
 547 isDoubleNegativeZero(StgDouble d)
 548 {
 549     union stg_ieee754_dbl u;
 550
 551     u.d = d;
 552     /* sign (bit 63) set (only) => negative zero */
 553
 554     return (
 555         u.ieee.negative  == 1 &&
 556         u.ieee.exponent  == 0 &&
 557         u.ieee.mantissa0 == 0 &&
 558         u.ieee.mantissa1 == 0);
 559 }
 560
 561 /* Same tests, this time for StgFloats. */
 562
 563 /*
 564  To recap, here's the representation of a single precision
 565  IEEE floating point number:
 566
 567  sign         31           sign bit (0 == positive, 1 == negative)
 568  exponent     30-23        exponent (biased by 127)
 569  fraction     22-0         fraction (bits to right of binary point)
 570 */
 571
 572
 573 StgInt
 574 isFloatNaN(StgFloat f)
 575 {
 576     union stg_ieee754_flt u;
 577     u.f = f;
 578
 579    /* Floating point NaN iff exponent is all ones, mantissa is
 580       non-zero (but see below.) */
 581    return (
 582         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 583         u.ieee.mantissa != 0);
 584 }
 585
 586 StgInt
 587 isFloatInfinite(StgFloat f)
 588 {
 589     union stg_ieee754_flt u;
 590     u.f = f;
 591
 592     /* A float is Inf iff exponent is max (all ones),
 593        and mantissa is min(all zeros.) */
 594     return (
 595         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 596         u.ieee.mantissa == 0);
 597 }
 598
 599 StgInt
 600 isFloatDenormalized(StgFloat f)
 601 {
 602     union stg_ieee754_flt u;
 603     u.f = f;
 604
 605     /* A (single/double/quad) precision floating point number
 606        is denormalised iff:
 607         - exponent is zero
 608         - mantissa is non-zero.
 609         - (don't care about setting of sign bit.)
 610
 611     */
 612     return (
 613         u.ieee.exponent == 0 &&
 614         u.ieee.mantissa != 0);
 615 }
 616
 617 StgInt
 618 isFloatNegativeZero(StgFloat f)
 619 {
 620     union stg_ieee754_flt u;
 621     u.f = f;
 622
 623     /* sign (bit 31) set (only) => negative zero */
 624     return (
 625         u.ieee.negative      &&
 626         u.ieee.exponent == 0 &&
 627         u.ieee.mantissa == 0);
 628 }
 629
 630 #else /* ! IEEE_FLOATING_POINT */
 631
 632 /* Dummy definitions of predicates - they all return false */
 633 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 634 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 635 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 636 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 637 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 638 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 639 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 640 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 641
 642 #endif /* ! IEEE_FLOATING_POINT */