rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2000
   4  *
   5  * Miscellaneous support for floating-point primitives
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11
  12 #include <math.h>
  13
  14 /*
  15  * Encoding and decoding Doubles.  Code based on the HBC code
  16  * (lib/fltcode.c).
  17  */
  18
  19 #ifdef _SHORT_LIMB
  20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  21 #else
  22 #ifdef _LONG_LONG_LIMB
  23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  24 #else
  25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  26 #endif
  27 #endif
  28
  29 #if SIZEOF_LIMB_T == 4
  30 #define GMP_BASE 4294967296.0
  31 #elif SIZEOF_LIMB_T == 8
  32 #define GMP_BASE 18446744073709551616.0
  33 #else
  34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  35 #endif
  36
  37 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  38 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39
  40 #if IEEE_FLOATING_POINT
  41 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  42 /* DMINEXP is defined in values.h on Linux (for example) */
  43 #define DHIGHBIT 0x00100000
  44 #define DMSBIT   0x80000000
  45
  46 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  47 #define FHIGHBIT 0x00800000
  48 #define FMSBIT   0x80000000
  49 #endif
  50
  51 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
  52 #define L 1
  53 #define H 0
  54 #else
  55 #define L 0
  56 #define H 1
  57 #endif
  58
  59 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  60
  61 StgDouble
  62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  63 {
  64     StgDouble r;
  65     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  66     I_ i;
  67
  68     /* Convert MP_INT to a double; knows a lot about internal rep! */
  69     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  70         r = (r * GMP_BASE) + arr[i];
  71
  72     /* Now raise to the exponent */
  73     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  74         r = ldexp(r, e);
  75
  76     /* sign is encoded in the size */
  77     if (size < 0)
  78         r = -r;
  79
  80     return r;
  81 }
  82
  83 StgDouble
  84 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
  85 {
  86   StgDouble r;
  87
  88   /* assuming 32 bit ints */
  89   ASSERT(sizeof(int          ) == 4            );
  90
  91   r = (StgDouble)((unsigned int)j_high);
  92   r *= exp2f(32);
  93   r += (StgDouble)((unsigned int)j_low);
  94
  95   /* Now raise to the exponent */
  96   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  97     r = ldexp(r, e);
  98
  99   /* sign is encoded in the size */
 100   if (j_high < 0)
 101     r = -r;
 102
 103   return r;
 104 }
 105
 106 /* Special version for small Integers */
 107 StgDouble
 108 __int_encodeDouble (I_ j, I_ e)
 109 {
 110   StgDouble r;
 111
 112   r = (StgDouble)__abs(j);
 113
 114   /* Now raise to the exponent */
 115   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 116     r = ldexp(r, e);
 117
 118   /* sign is encoded in the size */
 119   if (j < 0)
 120     r = -r;
 121
 122   return r;
 123 }
 124
 125 StgFloat
 126 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 127 {
 128     StgFloat r;
 129     const mp_limb_t *arr = (const mp_limb_t *)ba;
 130     I_ i;
 131
 132     /* Convert MP_INT to a float; knows a lot about internal rep! */
 133     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 134         r = (r * GMP_BASE) + arr[i];
 135
 136     /* Now raise to the exponent */
 137     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 138         r = ldexp(r, e);
 139
 140     /* sign is encoded in the size */
 141     if (size < 0)
 142         r = -r;
 143
 144     return r;
 145 }
 146
 147 /* Special version for small Integers */
 148 StgFloat
 149 __int_encodeFloat (I_ j, I_ e)
 150 {
 151   StgFloat r;
 152
 153   r = (StgFloat)__abs(j);
 154
 155   /* Now raise to the exponent */
 156   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 157     r = ldexp(r, e);
 158
 159   /* sign is encoded in the size */
 160   if (j < 0)
 161     r = -r;
 162
 163   return r;
 164 }
 165
 166 /* This only supports IEEE floating point */
 167
 168 void
 169 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 170 {
 171     /* Do some bit fiddling on IEEE */
 172     unsigned int low, high;             /* assuming 32 bit ints */
 173     int sign, iexp;
 174     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 175
 176     ASSERT(sizeof(unsigned int ) == 4            );
 177     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 178     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 179     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 180
 181     u.d = dbl;      /* grab chunks of the double */
 182     low = u.i[L];
 183     high = u.i[H];
 184
 185     /* we know the MP_INT* passed in has size zero, so we realloc
 186         no matter what.
 187     */
 188     man->_mp_alloc = DNBIGIT;
 189
 190     if (low == 0 && (high & ~DMSBIT) == 0) {
 191         man->_mp_size = 0;
 192         *exp = 0L;
 193     } else {
 194         man->_mp_size = DNBIGIT;
 195         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 196         sign = high;
 197
 198         high &= DHIGHBIT-1;
 199         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 200             high |= DHIGHBIT;
 201         else {
 202             iexp++;
 203             /* A denorm, normalize the mantissa */
 204             while (! (high & DHIGHBIT)) {
 205                 high <<= 1;
 206                 if (low & DMSBIT)
 207                     high++;
 208                 low <<= 1;
 209                 iexp--;
 210             }
 211         }
 212         *exp = (I_) iexp;
 213 #if DNBIGIT == 2
 214         man->_mp_d[0] = (mp_limb_t)low;
 215         man->_mp_d[1] = (mp_limb_t)high;
 216 #else
 217 #if DNBIGIT == 1
 218         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 219 #else
 220 #error Cannot cope with DNBIGIT
 221 #endif
 222 #endif
 223         if (sign < 0)
 224             man->_mp_size = -man->_mp_size;
 225     }
 226 }
 227
 228 void
 229 __decodeDouble_2Int (I_ *man_high, I_ *man_low, I_ *exp, StgDouble dbl)
 230 {
 231     /* Do some bit fiddling on IEEE */
 232     unsigned int low, high;             /* assuming 32 bit ints */
 233     int sign, iexp;
 234     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 235
 236     ASSERT(sizeof(unsigned int ) == 4            );
 237     ASSERT(sizeof(dbl          ) == 8            );
 238     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 239
 240     u.d = dbl;      /* grab chunks of the double */
 241     low = u.i[L];
 242     high = u.i[H];
 243
 244     if (low == 0 && (high & ~DMSBIT) == 0) {
 245         *man_low = 0;
 246         *man_high = 0;
 247         *exp = 0L;
 248     } else {
 249         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 250         sign = high;
 251
 252         high &= DHIGHBIT-1;
 253         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 254             high |= DHIGHBIT;
 255         else {
 256             iexp++;
 257             /* A denorm, normalize the mantissa */
 258             while (! (high & DHIGHBIT)) {
 259                 high <<= 1;
 260                 if (low & DMSBIT)
 261                     high++;
 262                 low <<= 1;
 263                 iexp--;
 264             }
 265         }
 266         *exp = (I_) iexp;
 267         *man_low = low;
 268         *man_high = high;
 269         if (sign < 0) {
 270             *man_high = - *man_high;
 271         }
 272     }
 273 }
 274
 275 void
 276 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 277 {
 278     /* Do some bit fiddling on IEEE */
 279     int high, sign;                 /* assuming 32 bit ints */
 280     union { float f; int i; } u;    /* assuming 32 bit float and int */
 281
 282     ASSERT(sizeof(int          ) == 4            );
 283     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 284     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 285     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 286
 287     u.f = flt;      /* grab the float */
 288     high = u.i;
 289
 290     /* we know the MP_INT* passed in has size zero, so we realloc
 291         no matter what.
 292     */
 293     man->_mp_alloc = FNBIGIT;
 294
 295     if ((high & ~FMSBIT) == 0) {
 296         man->_mp_size = 0;
 297         *exp = 0;
 298     } else {
 299         man->_mp_size = FNBIGIT;
 300         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 301         sign = high;
 302
 303         high &= FHIGHBIT-1;
 304         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 305             high |= FHIGHBIT;
 306         else {
 307             (*exp)++;
 308             /* A denorm, normalize the mantissa */
 309             while (! (high & FHIGHBIT)) {
 310                 high <<= 1;
 311                 (*exp)--;
 312             }
 313         }
 314 #if FNBIGIT == 1
 315         man->_mp_d[0] = (mp_limb_t)high;
 316 #else
 317 #error Cannot cope with FNBIGIT
 318 #endif
 319         if (sign < 0)
 320             man->_mp_size = -man->_mp_size;
 321     }
 322 }
 323
 324 /* Convenient union types for checking the layout of IEEE 754 types -
 325    based on defs in GNU libc <ieee754.h>
 326 */
 327
 328 void
 329 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
 330 {
 331     /* Do some bit fiddling on IEEE */
 332     int high, sign;                 /* assuming 32 bit ints */
 333     union { float f; int i; } u;    /* assuming 32 bit float and int */
 334
 335     ASSERT(sizeof(int          ) == 4            );
 336     ASSERT(sizeof(flt          ) == 4            );
 337     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 338
 339     u.f = flt;      /* grab the float */
 340     high = u.i;
 341
 342     if ((high & ~FMSBIT) == 0) {
 343         *man = 0;
 344         *exp = 0;
 345     } else {
 346         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 347         sign = high;
 348
 349         high &= FHIGHBIT-1;
 350         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 351             high |= FHIGHBIT;
 352         else {
 353             (*exp)++;
 354             /* A denorm, normalize the mantissa */
 355             while (! (high & FHIGHBIT)) {
 356                 high <<= 1;
 357                 (*exp)--;
 358             }
 359         }
 360         *man = high;
 361         if (sign < 0)
 362             *man = - *man;
 363     }
 364 }
 365
 366 union stg_ieee754_flt
 367 {
 368    float f;
 369    struct {
 370
 371 #if WORDS_BIGENDIAN
 372         unsigned int negative:1;
 373         unsigned int exponent:8;
 374         unsigned int mantissa:23;
 375 #else
 376         unsigned int mantissa:23;
 377         unsigned int exponent:8;
 378         unsigned int negative:1;
 379 #endif
 380    } ieee;
 381    struct {
 382
 383 #if WORDS_BIGENDIAN
 384         unsigned int negative:1;
 385         unsigned int exponent:8;
 386         unsigned int quiet_nan:1;
 387         unsigned int mantissa:22;
 388 #else
 389         unsigned int mantissa:22;
 390         unsigned int quiet_nan:1;
 391         unsigned int exponent:8;
 392         unsigned int negative:1;
 393 #endif
 394    } ieee_nan;
 395 };
 396
 397 /*
 398
 399  To recap, here's the representation of a double precision
 400  IEEE floating point number:
 401
 402  sign         63           sign bit (0==positive, 1==negative)
 403  exponent     62-52        exponent (biased by 1023)
 404  fraction     51-0         fraction (bits to right of binary point)
 405 */
 406
 407 union stg_ieee754_dbl
 408 {
 409    double d;
 410    struct {
 411
 412 #if WORDS_BIGENDIAN
 413         unsigned int negative:1;
 414         unsigned int exponent:11;
 415         unsigned int mantissa0:20;
 416         unsigned int mantissa1:32;
 417 #else
 418 #if FLOAT_WORDS_BIGENDIAN
 419         unsigned int mantissa0:20;
 420         unsigned int exponent:11;
 421         unsigned int negative:1;
 422         unsigned int mantissa1:32;
 423 #else
 424         unsigned int mantissa1:32;
 425         unsigned int mantissa0:20;
 426         unsigned int exponent:11;
 427         unsigned int negative:1;
 428 #endif
 429 #endif
 430    } ieee;
 431     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 432    struct {
 433
 434 #if WORDS_BIGENDIAN
 435         unsigned int negative:1;
 436         unsigned int exponent:11;
 437         unsigned int quiet_nan:1;
 438         unsigned int mantissa0:19;
 439         unsigned int mantissa1:32;
 440 #else
 441 #if FLOAT_WORDS_BIGENDIAN
 442         unsigned int mantissa0:19;
 443         unsigned int quiet_nan:1;
 444         unsigned int exponent:11;
 445         unsigned int negative:1;
 446         unsigned int mantissa1:32;
 447 #else
 448         unsigned int mantissa1:32;
 449         unsigned int mantissa0:19;
 450         unsigned int quiet_nan:1;
 451         unsigned int exponent:11;
 452         unsigned int negative:1;
 453 #endif
 454 #endif
 455    } ieee_nan;
 456 };
 457
 458 /*
 459  * Predicates for testing for extreme IEEE fp values. Used
 460  * by the bytecode evaluator and the Prelude.
 461  *
 462  */
 463
 464 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 465 #ifdef IEEE_FLOATING_POINT
 466
 467 StgInt
 468 isDoubleNaN(StgDouble d)
 469 {
 470   union stg_ieee754_dbl u;
 471
 472   u.d = d;
 473
 474   return (
 475     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 476     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 477         /* and the mantissa non-zero? */
 478     );
 479 }
 480
 481 StgInt
 482 isDoubleInfinite(StgDouble d)
 483 {
 484     union stg_ieee754_dbl u;
 485
 486     u.d = d;
 487
 488     /* Inf iff exponent is all ones, mantissa all zeros */
 489     return (
 490         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 491         u.ieee.mantissa0 == 0                   &&
 492         u.ieee.mantissa1 == 0
 493       );
 494 }
 495
 496 StgInt
 497 isDoubleDenormalized(StgDouble d)
 498 {
 499     union stg_ieee754_dbl u;
 500
 501     u.d = d;
 502
 503     /* A (single/double/quad) precision floating point number
 504        is denormalised iff:
 505         - exponent is zero
 506         - mantissa is non-zero.
 507         - (don't care about setting of sign bit.)
 508
 509     */
 510     return (
 511         u.ieee.exponent  == 0 &&
 512         (u.ieee.mantissa0 != 0 ||
 513          u.ieee.mantissa1 != 0)
 514       );
 515
 516 }
 517
 518 StgInt
 519 isDoubleNegativeZero(StgDouble d)
 520 {
 521     union stg_ieee754_dbl u;
 522
 523     u.d = d;
 524     /* sign (bit 63) set (only) => negative zero */
 525
 526     return (
 527         u.ieee.negative  == 1 &&
 528         u.ieee.exponent  == 0 &&
 529         u.ieee.mantissa0 == 0 &&
 530         u.ieee.mantissa1 == 0);
 531 }
 532
 533 /* Same tests, this time for StgFloats. */
 534
 535 /*
 536  To recap, here's the representation of a single precision
 537  IEEE floating point number:
 538
 539  sign         31           sign bit (0 == positive, 1 == negative)
 540  exponent     30-23        exponent (biased by 127)
 541  fraction     22-0         fraction (bits to right of binary point)
 542 */
 543
 544
 545 StgInt
 546 isFloatNaN(StgFloat f)
 547 {
 548     union stg_ieee754_flt u;
 549     u.f = f;
 550
 551    /* Floating point NaN iff exponent is all ones, mantissa is
 552       non-zero (but see below.) */
 553    return (
 554         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 555         u.ieee.mantissa != 0);
 556 }
 557
 558 StgInt
 559 isFloatInfinite(StgFloat f)
 560 {
 561     union stg_ieee754_flt u;
 562     u.f = f;
 563
 564     /* A float is Inf iff exponent is max (all ones),
 565        and mantissa is min(all zeros.) */
 566     return (
 567         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 568         u.ieee.mantissa == 0);
 569 }
 570
 571 StgInt
 572 isFloatDenormalized(StgFloat f)
 573 {
 574     union stg_ieee754_flt u;
 575     u.f = f;
 576
 577     /* A (single/double/quad) precision floating point number
 578        is denormalised iff:
 579         - exponent is zero
 580         - mantissa is non-zero.
 581         - (don't care about setting of sign bit.)
 582
 583     */
 584     return (
 585         u.ieee.exponent == 0 &&
 586         u.ieee.mantissa != 0);
 587 }
 588
 589 StgInt
 590 isFloatNegativeZero(StgFloat f)
 591 {
 592     union stg_ieee754_flt u;
 593     u.f = f;
 594
 595     /* sign (bit 31) set (only) => negative zero */
 596     return (
 597         u.ieee.negative      &&
 598         u.ieee.exponent == 0 &&
 599         u.ieee.mantissa == 0);
 600 }
 601
 602 #else /* ! IEEE_FLOATING_POINT */
 603
 604 /* Dummy definitions of predicates - they all return false */
 605 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 606 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 607 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 608 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 609 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 610 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 611 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 612 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 613
 614 #endif /* ! IEEE_FLOATING_POINT */