rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) Lennart Augustsson
   4  * (c) The GHC Team, 1998-2000
   5  *
   6  * Miscellaneous support for floating-point primitives
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12
  13 #include <math.h>
  14
  15 /*
  16  * Encoding and decoding Doubles.  Code based on the HBC code
  17  * (lib/fltcode.c).
  18  */
  19
  20 #ifdef _SHORT_LIMB
  21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  22 #else
  23 #ifdef _LONG_LONG_LIMB
  24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  25 #else
  26 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  27 #endif
  28 #endif
  29
  30 #if SIZEOF_LIMB_T == 4
  31 #define GMP_BASE 4294967296.0
  32 #elif SIZEOF_LIMB_T == 8
  33 #define GMP_BASE 18446744073709551616.0
  34 #else
  35 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  36 #endif
  37
  38 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  40
  41 #if IEEE_FLOATING_POINT
  42 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  43 /* DMINEXP is defined in values.h on Linux (for example) */
  44 #define DHIGHBIT 0x00100000
  45 #define DMSBIT   0x80000000
  46
  47 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  48 #define FHIGHBIT 0x00800000
  49 #define FMSBIT   0x80000000
  50 #endif
  51
  52 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
  53 #define L 1
  54 #define H 0
  55 #else
  56 #define L 0
  57 #define H 1
  58 #endif
  59
  60 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  61
  62 StgDouble
  63 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  64 {
  65     StgDouble r;
  66     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  67     I_ i;
  68
  69     /* Convert MP_INT to a double; knows a lot about internal rep! */
  70     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  71         r = (r * GMP_BASE) + arr[i];
  72
  73     /* Now raise to the exponent */
  74     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  75         r = ldexp(r, e);
  76
  77     /* sign is encoded in the size */
  78     if (size < 0)
  79         r = -r;
  80
  81     return r;
  82 }
  83
  84 StgDouble
  85 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
  86 {
  87   StgDouble r;
  88
  89   /* assuming 32 bit ints */
  90   ASSERT(sizeof(int          ) == 4            );
  91
  92   r = (StgDouble)((unsigned int)j_high);
  93   r *= 4294967296.0; /* exp2f(32); */
  94   r += (StgDouble)((unsigned int)j_low);
  95
  96   /* Now raise to the exponent */
  97   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  98     r = ldexp(r, e);
  99
 100   /* sign is encoded in the size */
 101   if (j_high < 0)
 102     r = -r;
 103
 104   return r;
 105 }
 106
 107 /* Special version for words */
 108 StgDouble
 109 __word_encodeDouble (W_ j, I_ e)
 110 {
 111   StgDouble r;
 112
 113   r = (StgDouble)j;
 114
 115   /* Now raise to the exponent */
 116   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 117     r = ldexp(r, e);
 118
 119   return r;
 120 }
 121
 122 /* Special version for small Integers */
 123 StgDouble
 124 __int_encodeDouble (I_ j, I_ e)
 125 {
 126   StgDouble r;
 127
 128   r = (StgDouble)__abs(j);
 129
 130   /* Now raise to the exponent */
 131   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 132     r = ldexp(r, e);
 133
 134   /* sign is encoded in the size */
 135   if (j < 0)
 136     r = -r;
 137
 138   return r;
 139 }
 140
 141 StgFloat
 142 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 143 {
 144     StgFloat r;
 145     const mp_limb_t *arr = (const mp_limb_t *)ba;
 146     I_ i;
 147
 148     /* Convert MP_INT to a float; knows a lot about internal rep! */
 149     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 150         r = (r * GMP_BASE) + arr[i];
 151
 152     /* Now raise to the exponent */
 153     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 154         r = ldexp(r, e);
 155
 156     /* sign is encoded in the size */
 157     if (size < 0)
 158         r = -r;
 159
 160     return r;
 161 }
 162
 163 /* Special version for small Integers */
 164 StgFloat
 165 __int_encodeFloat (I_ j, I_ e)
 166 {
 167   StgFloat r;
 168
 169   r = (StgFloat)__abs(j);
 170
 171   /* Now raise to the exponent */
 172   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 173     r = ldexp(r, e);
 174
 175   /* sign is encoded in the size */
 176   if (j < 0)
 177     r = -r;
 178
 179   return r;
 180 }
 181
 182 /* Special version for small positive Integers */
 183 StgFloat
 184 __word_encodeFloat (W_ j, I_ e)
 185 {
 186   StgFloat r;
 187
 188   r = (StgFloat)j;
 189
 190   /* Now raise to the exponent */
 191   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 192     r = ldexp(r, e);
 193
 194   return r;
 195 }
 196
 197 /* This only supports IEEE floating point */
 198
 199 void
 200 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 201 {
 202     /* Do some bit fiddling on IEEE */
 203     unsigned int low, high;             /* assuming 32 bit ints */
 204     int sign, iexp;
 205     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 206
 207     ASSERT(sizeof(unsigned int ) == 4            );
 208     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 209     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 210     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 211
 212     u.d = dbl;      /* grab chunks of the double */
 213     low = u.i[L];
 214     high = u.i[H];
 215
 216     /* we know the MP_INT* passed in has size zero, so we realloc
 217         no matter what.
 218     */
 219     man->_mp_alloc = DNBIGIT;
 220
 221     if (low == 0 && (high & ~DMSBIT) == 0) {
 222         man->_mp_size = 0;
 223         *exp = 0L;
 224     } else {
 225         man->_mp_size = DNBIGIT;
 226         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 227         sign = high;
 228
 229         high &= DHIGHBIT-1;
 230         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 231             high |= DHIGHBIT;
 232         else {
 233             iexp++;
 234             /* A denorm, normalize the mantissa */
 235             while (! (high & DHIGHBIT)) {
 236                 high <<= 1;
 237                 if (low & DMSBIT)
 238                     high++;
 239                 low <<= 1;
 240                 iexp--;
 241             }
 242         }
 243         *exp = (I_) iexp;
 244 #if DNBIGIT == 2
 245         man->_mp_d[0] = (mp_limb_t)low;
 246         man->_mp_d[1] = (mp_limb_t)high;
 247 #else
 248 #if DNBIGIT == 1
 249         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 250 #else
 251 #error Cannot cope with DNBIGIT
 252 #endif
 253 #endif
 254         if (sign < 0)
 255             man->_mp_size = -man->_mp_size;
 256     }
 257 }
 258
 259 void
 260 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
 261 {
 262     /* Do some bit fiddling on IEEE */
 263     unsigned int low, high;             /* assuming 32 bit ints */
 264     int sign, iexp;
 265     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 266
 267     ASSERT(sizeof(unsigned int ) == 4            );
 268     ASSERT(sizeof(dbl          ) == 8            );
 269     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 270
 271     u.d = dbl;      /* grab chunks of the double */
 272     low = u.i[L];
 273     high = u.i[H];
 274
 275     if (low == 0 && (high & ~DMSBIT) == 0) {
 276         *man_low = 0;
 277         *man_high = 0;
 278         *exp = 0L;
 279     } else {
 280         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 281         sign = high;
 282
 283         high &= DHIGHBIT-1;
 284         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 285             high |= DHIGHBIT;
 286         else {
 287             iexp++;
 288             /* A denorm, normalize the mantissa */
 289             while (! (high & DHIGHBIT)) {
 290                 high <<= 1;
 291                 if (low & DMSBIT)
 292                     high++;
 293                 low <<= 1;
 294                 iexp--;
 295             }
 296         }
 297         *exp = (I_) iexp;
 298         *man_low = low;
 299         *man_high = high;
 300         *man_sign = (sign < 0) ? -1 : 1;
 301     }
 302 }
 303
 304 void
 305 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 306 {
 307     /* Do some bit fiddling on IEEE */
 308     int high, sign;                 /* assuming 32 bit ints */
 309     union { float f; int i; } u;    /* assuming 32 bit float and int */
 310
 311     ASSERT(sizeof(int          ) == 4            );
 312     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 313     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 314     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 315
 316     u.f = flt;      /* grab the float */
 317     high = u.i;
 318
 319     /* we know the MP_INT* passed in has size zero, so we realloc
 320         no matter what.
 321     */
 322     man->_mp_alloc = FNBIGIT;
 323
 324     if ((high & ~FMSBIT) == 0) {
 325         man->_mp_size = 0;
 326         *exp = 0;
 327     } else {
 328         man->_mp_size = FNBIGIT;
 329         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 330         sign = high;
 331
 332         high &= FHIGHBIT-1;
 333         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 334             high |= FHIGHBIT;
 335         else {
 336             (*exp)++;
 337             /* A denorm, normalize the mantissa */
 338             while (! (high & FHIGHBIT)) {
 339                 high <<= 1;
 340                 (*exp)--;
 341             }
 342         }
 343 #if FNBIGIT == 1
 344         man->_mp_d[0] = (mp_limb_t)high;
 345 #else
 346 #error Cannot cope with FNBIGIT
 347 #endif
 348         if (sign < 0)
 349             man->_mp_size = -man->_mp_size;
 350     }
 351 }
 352
 353 /* Convenient union types for checking the layout of IEEE 754 types -
 354    based on defs in GNU libc <ieee754.h>
 355 */
 356
 357 void
 358 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
 359 {
 360     /* Do some bit fiddling on IEEE */
 361     int high, sign;                 /* assuming 32 bit ints */
 362     union { float f; int i; } u;    /* assuming 32 bit float and int */
 363
 364     ASSERT(sizeof(int          ) == 4            );
 365     ASSERT(sizeof(flt          ) == 4            );
 366     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 367
 368     u.f = flt;      /* grab the float */
 369     high = u.i;
 370
 371     if ((high & ~FMSBIT) == 0) {
 372         *man = 0;
 373         *exp = 0;
 374     } else {
 375         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 376         sign = high;
 377
 378         high &= FHIGHBIT-1;
 379         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 380             high |= FHIGHBIT;
 381         else {
 382             (*exp)++;
 383             /* A denorm, normalize the mantissa */
 384             while (! (high & FHIGHBIT)) {
 385                 high <<= 1;
 386                 (*exp)--;
 387             }
 388         }
 389         *man = high;
 390         if (sign < 0)
 391             *man = - *man;
 392     }
 393 }
 394
 395 union stg_ieee754_flt
 396 {
 397    float f;
 398    struct {
 399
 400 #if WORDS_BIGENDIAN
 401         unsigned int negative:1;
 402         unsigned int exponent:8;
 403         unsigned int mantissa:23;
 404 #else
 405         unsigned int mantissa:23;
 406         unsigned int exponent:8;
 407         unsigned int negative:1;
 408 #endif
 409    } ieee;
 410    struct {
 411
 412 #if WORDS_BIGENDIAN
 413         unsigned int negative:1;
 414         unsigned int exponent:8;
 415         unsigned int quiet_nan:1;
 416         unsigned int mantissa:22;
 417 #else
 418         unsigned int mantissa:22;
 419         unsigned int quiet_nan:1;
 420         unsigned int exponent:8;
 421         unsigned int negative:1;
 422 #endif
 423    } ieee_nan;
 424 };
 425
 426 /*
 427
 428  To recap, here's the representation of a double precision
 429  IEEE floating point number:
 430
 431  sign         63           sign bit (0==positive, 1==negative)
 432  exponent     62-52        exponent (biased by 1023)
 433  fraction     51-0         fraction (bits to right of binary point)
 434 */
 435
 436 union stg_ieee754_dbl
 437 {
 438    double d;
 439    struct {
 440
 441 #if WORDS_BIGENDIAN
 442         unsigned int negative:1;
 443         unsigned int exponent:11;
 444         unsigned int mantissa0:20;
 445         unsigned int mantissa1:32;
 446 #else
 447 #if FLOAT_WORDS_BIGENDIAN
 448         unsigned int mantissa0:20;
 449         unsigned int exponent:11;
 450         unsigned int negative:1;
 451         unsigned int mantissa1:32;
 452 #else
 453         unsigned int mantissa1:32;
 454         unsigned int mantissa0:20;
 455         unsigned int exponent:11;
 456         unsigned int negative:1;
 457 #endif
 458 #endif
 459    } ieee;
 460     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 461    struct {
 462
 463 #if WORDS_BIGENDIAN
 464         unsigned int negative:1;
 465         unsigned int exponent:11;
 466         unsigned int quiet_nan:1;
 467         unsigned int mantissa0:19;
 468         unsigned int mantissa1:32;
 469 #else
 470 #if FLOAT_WORDS_BIGENDIAN
 471         unsigned int mantissa0:19;
 472         unsigned int quiet_nan:1;
 473         unsigned int exponent:11;
 474         unsigned int negative:1;
 475         unsigned int mantissa1:32;
 476 #else
 477         unsigned int mantissa1:32;
 478         unsigned int mantissa0:19;
 479         unsigned int quiet_nan:1;
 480         unsigned int exponent:11;
 481         unsigned int negative:1;
 482 #endif
 483 #endif
 484    } ieee_nan;
 485 };
 486
 487 /*
 488  * Predicates for testing for extreme IEEE fp values. Used
 489  * by the bytecode evaluator and the Prelude.
 490  *
 491  */
 492
 493 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 494 #ifdef IEEE_FLOATING_POINT
 495
 496 StgInt
 497 isDoubleNaN(StgDouble d)
 498 {
 499   union stg_ieee754_dbl u;
 500
 501   u.d = d;
 502
 503   return (
 504     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 505     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 506         /* and the mantissa non-zero? */
 507     );
 508 }
 509
 510 StgInt
 511 isDoubleInfinite(StgDouble d)
 512 {
 513     union stg_ieee754_dbl u;
 514
 515     u.d = d;
 516
 517     /* Inf iff exponent is all ones, mantissa all zeros */
 518     return (
 519         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 520         u.ieee.mantissa0 == 0                   &&
 521         u.ieee.mantissa1 == 0
 522       );
 523 }
 524
 525 StgInt
 526 isDoubleDenormalized(StgDouble d)
 527 {
 528     union stg_ieee754_dbl u;
 529
 530     u.d = d;
 531
 532     /* A (single/double/quad) precision floating point number
 533        is denormalised iff:
 534         - exponent is zero
 535         - mantissa is non-zero.
 536         - (don't care about setting of sign bit.)
 537
 538     */
 539     return (
 540         u.ieee.exponent  == 0 &&
 541         (u.ieee.mantissa0 != 0 ||
 542          u.ieee.mantissa1 != 0)
 543       );
 544
 545 }
 546
 547 StgInt
 548 isDoubleNegativeZero(StgDouble d)
 549 {
 550     union stg_ieee754_dbl u;
 551
 552     u.d = d;
 553     /* sign (bit 63) set (only) => negative zero */
 554
 555     return (
 556         u.ieee.negative  == 1 &&
 557         u.ieee.exponent  == 0 &&
 558         u.ieee.mantissa0 == 0 &&
 559         u.ieee.mantissa1 == 0);
 560 }
 561
 562 /* Same tests, this time for StgFloats. */
 563
 564 /*
 565  To recap, here's the representation of a single precision
 566  IEEE floating point number:
 567
 568  sign         31           sign bit (0 == positive, 1 == negative)
 569  exponent     30-23        exponent (biased by 127)
 570  fraction     22-0         fraction (bits to right of binary point)
 571 */
 572
 573
 574 StgInt
 575 isFloatNaN(StgFloat f)
 576 {
 577     union stg_ieee754_flt u;
 578     u.f = f;
 579
 580    /* Floating point NaN iff exponent is all ones, mantissa is
 581       non-zero (but see below.) */
 582    return (
 583         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 584         u.ieee.mantissa != 0);
 585 }
 586
 587 StgInt
 588 isFloatInfinite(StgFloat f)
 589 {
 590     union stg_ieee754_flt u;
 591     u.f = f;
 592
 593     /* A float is Inf iff exponent is max (all ones),
 594        and mantissa is min(all zeros.) */
 595     return (
 596         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 597         u.ieee.mantissa == 0);
 598 }
 599
 600 StgInt
 601 isFloatDenormalized(StgFloat f)
 602 {
 603     union stg_ieee754_flt u;
 604     u.f = f;
 605
 606     /* A (single/double/quad) precision floating point number
 607        is denormalised iff:
 608         - exponent is zero
 609         - mantissa is non-zero.
 610         - (don't care about setting of sign bit.)
 611
 612     */
 613     return (
 614         u.ieee.exponent == 0 &&
 615         u.ieee.mantissa != 0);
 616 }
 617
 618 StgInt
 619 isFloatNegativeZero(StgFloat f)
 620 {
 621     union stg_ieee754_flt u;
 622     u.f = f;
 623
 624     /* sign (bit 31) set (only) => negative zero */
 625     return (
 626         u.ieee.negative      &&
 627         u.ieee.exponent == 0 &&
 628         u.ieee.mantissa == 0);
 629 }
 630
 631 #else /* ! IEEE_FLOATING_POINT */
 632
 633 /* Dummy definitions of predicates - they all return false */
 634 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 635 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 636 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 637 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 638 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 639 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 640 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 641 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 642
 643 #endif /* ! IEEE_FLOATING_POINT */