rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) Lennart Augustsson
   4  * (c) The GHC Team, 1998-2000
   5  *
   6  * Miscellaneous support for floating-point primitives
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12
  13 #include <math.h>
  14
  15 /*
  16  * Encoding and decoding Doubles.  Code based on the HBC code
  17  * (lib/fltcode.c).
  18  */
  19
  20 #ifdef _SHORT_LIMB
  21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  22 #else
  23 #ifdef _LONG_LONG_LIMB
  24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  25 #else
  26 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  27 #endif
  28 #endif
  29
  30 #if SIZEOF_LIMB_T == 4
  31 #define GMP_BASE 4294967296.0
  32 #elif SIZEOF_LIMB_T == 8
  33 #define GMP_BASE 18446744073709551616.0
  34 #else
  35 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  36 #endif
  37
  38 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  40
  41 #if IEEE_FLOATING_POINT
  42 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  43 /* DMINEXP is defined in values.h on Linux (for example) */
  44 #define DHIGHBIT 0x00100000
  45 #define DMSBIT   0x80000000
  46
  47 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  48 #define FHIGHBIT 0x00800000
  49 #define FMSBIT   0x80000000
  50 #endif
  51
  52 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
  53 #define L 1
  54 #define H 0
  55 #else
  56 #define L 0
  57 #define H 1
  58 #endif
  59
  60 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  61
  62 StgDouble
  63 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  64 {
  65     StgDouble r;
  66     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  67     I_ i;
  68
  69     /* Convert MP_INT to a double; knows a lot about internal rep! */
  70     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  71         r = (r * GMP_BASE) + arr[i];
  72
  73     /* Now raise to the exponent */
  74     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  75         r = ldexp(r, e);
  76
  77     /* sign is encoded in the size */
  78     if (size < 0)
  79         r = -r;
  80
  81     return r;
  82 }
  83
  84 StgDouble
  85 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
  86 {
  87   StgDouble r;
  88
  89   /* assuming 32 bit ints */
  90   ASSERT(sizeof(int          ) == 4            );
  91
  92   r = (StgDouble)((unsigned int)j_high);
  93   r *= 4294967296.0; /* exp2f(32); */
  94   r += (StgDouble)((unsigned int)j_low);
  95
  96   /* Now raise to the exponent */
  97   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  98     r = ldexp(r, e);
  99
 100   /* sign is encoded in the size */
 101   if (j_high < 0)
 102     r = -r;
 103
 104   return r;
 105 }
 106
 107 /* Special version for words */
 108 StgDouble
 109 __word_encodeDouble (W_ j, I_ e)
 110 {
 111   StgDouble r;
 112
 113   r = (StgDouble)j;
 114
 115   /* Now raise to the exponent */
 116   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 117     r = ldexp(r, e);
 118
 119   return r;
 120 }
 121
 122 /* Special version for small Integers */
 123 StgDouble
 124 __int_encodeDouble (I_ j, I_ e)
 125 {
 126   StgDouble r;
 127
 128   r = (StgDouble)__abs(j);
 129
 130   /* Now raise to the exponent */
 131   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 132     r = ldexp(r, e);
 133
 134   /* sign is encoded in the size */
 135   if (j < 0)
 136     r = -r;
 137
 138   return r;
 139 }
 140
 141 StgFloat
 142 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 143 {
 144     StgFloat r;
 145     const mp_limb_t *arr = (const mp_limb_t *)ba;
 146     I_ i;
 147
 148     /* Convert MP_INT to a float; knows a lot about internal rep! */
 149     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 150         r = (r * GMP_BASE) + arr[i];
 151
 152     /* Now raise to the exponent */
 153     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 154         r = ldexp(r, e);
 155
 156     /* sign is encoded in the size */
 157     if (size < 0)
 158         r = -r;
 159
 160     return r;
 161 }
 162
 163 /* Special version for small Integers */
 164 StgFloat
 165 __int_encodeFloat (I_ j, I_ e)
 166 {
 167   StgFloat r;
 168
 169   r = (StgFloat)__abs(j);
 170
 171   /* Now raise to the exponent */
 172   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 173     r = ldexp(r, e);
 174
 175   /* sign is encoded in the size */
 176   if (j < 0)
 177     r = -r;
 178
 179   return r;
 180 }
 181
 182 /* Special version for small positive Integers */
 183 StgFloat
 184 __word_encodeFloat (W_ j, I_ e)
 185 {
 186   StgFloat r;
 187
 188   r = (StgFloat)j;
 189
 190   /* Now raise to the exponent */
 191   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 192     r = ldexp(r, e);
 193
 194   return r;
 195 }
 196
 197 /* This only supports IEEE floating point */
 198
 199 void
 200 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 201 {
 202     /* Do some bit fiddling on IEEE */
 203     unsigned int low, high;             /* assuming 32 bit ints */
 204     int sign, iexp;
 205     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 206
 207     ASSERT(sizeof(unsigned int ) == 4            );
 208     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 209     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 210     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 211
 212     u.d = dbl;      /* grab chunks of the double */
 213     low = u.i[L];
 214     high = u.i[H];
 215
 216     /* we know the MP_INT* passed in has size zero, so we realloc
 217         no matter what.
 218     */
 219     man->_mp_alloc = DNBIGIT;
 220
 221     if (low == 0 && (high & ~DMSBIT) == 0) {
 222         man->_mp_size = 0;
 223         *exp = 0L;
 224     } else {
 225         man->_mp_size = DNBIGIT;
 226         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 227         sign = high;
 228
 229         high &= DHIGHBIT-1;
 230         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 231             high |= DHIGHBIT;
 232         else {
 233             iexp++;
 234             /* A denorm, normalize the mantissa */
 235             while (! (high & DHIGHBIT)) {
 236                 high <<= 1;
 237                 if (low & DMSBIT)
 238                     high++;
 239                 low <<= 1;
 240                 iexp--;
 241             }
 242         }
 243         *exp = (I_) iexp;
 244 #if DNBIGIT == 2
 245         man->_mp_d[0] = (mp_limb_t)low;
 246         man->_mp_d[1] = (mp_limb_t)high;
 247 #else
 248 #if DNBIGIT == 1
 249         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 250 #else
 251 #error Cannot cope with DNBIGIT
 252 #endif
 253 #endif
 254         if (sign < 0)
 255             man->_mp_size = -man->_mp_size;
 256     }
 257 }
 258
 259 void
 260 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
 261 {
 262     /* Do some bit fiddling on IEEE */
 263     unsigned int low, high;             /* assuming 32 bit ints */
 264     int sign, iexp;
 265     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 266
 267     ASSERT(sizeof(unsigned int ) == 4            );
 268     ASSERT(sizeof(dbl          ) == 8            );
 269     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 270
 271     u.d = dbl;      /* grab chunks of the double */
 272     low = u.i[L];
 273     high = u.i[H];
 274
 275     if (low == 0 && (high & ~DMSBIT) == 0) {
 276         *man_low = 0;
 277         *man_high = 0;
 278         *exp = 0L;
 279     } else {
 280         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 281         sign = high;
 282
 283         high &= DHIGHBIT-1;
 284         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 285             high |= DHIGHBIT;
 286         else {
 287             iexp++;
 288             /* A denorm, normalize the mantissa */
 289             while (! (high & DHIGHBIT)) {
 290                 high <<= 1;
 291                 if (low & DMSBIT)
 292                     high++;
 293                 low <<= 1;
 294                 iexp--;
 295             }
 296         }
 297         *exp = (I_) iexp;
 298         *man_low = low;
 299         *man_high = high;
 300         *man_sign = (sign < 0) ? -1 : 1;
 301     }
 302 }
 303
 304 /* Convenient union types for checking the layout of IEEE 754 types -
 305    based on defs in GNU libc <ieee754.h>
 306 */
 307
 308 void
 309 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
 310 {
 311     /* Do some bit fiddling on IEEE */
 312     int high, sign;                 /* assuming 32 bit ints */
 313     union { float f; int i; } u;    /* assuming 32 bit float and int */
 314
 315     ASSERT(sizeof(int          ) == 4            );
 316     ASSERT(sizeof(flt          ) == 4            );
 317     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 318
 319     u.f = flt;      /* grab the float */
 320     high = u.i;
 321
 322     if ((high & ~FMSBIT) == 0) {
 323         *man = 0;
 324         *exp = 0;
 325     } else {
 326         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 327         sign = high;
 328
 329         high &= FHIGHBIT-1;
 330         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 331             high |= FHIGHBIT;
 332         else {
 333             (*exp)++;
 334             /* A denorm, normalize the mantissa */
 335             while (! (high & FHIGHBIT)) {
 336                 high <<= 1;
 337                 (*exp)--;
 338             }
 339         }
 340         *man = high;
 341         if (sign < 0)
 342             *man = - *man;
 343     }
 344 }
 345
 346 union stg_ieee754_flt
 347 {
 348    float f;
 349    struct {
 350
 351 #if WORDS_BIGENDIAN
 352         unsigned int negative:1;
 353         unsigned int exponent:8;
 354         unsigned int mantissa:23;
 355 #else
 356         unsigned int mantissa:23;
 357         unsigned int exponent:8;
 358         unsigned int negative:1;
 359 #endif
 360    } ieee;
 361    struct {
 362
 363 #if WORDS_BIGENDIAN
 364         unsigned int negative:1;
 365         unsigned int exponent:8;
 366         unsigned int quiet_nan:1;
 367         unsigned int mantissa:22;
 368 #else
 369         unsigned int mantissa:22;
 370         unsigned int quiet_nan:1;
 371         unsigned int exponent:8;
 372         unsigned int negative:1;
 373 #endif
 374    } ieee_nan;
 375 };
 376
 377 /*
 378
 379  To recap, here's the representation of a double precision
 380  IEEE floating point number:
 381
 382  sign         63           sign bit (0==positive, 1==negative)
 383  exponent     62-52        exponent (biased by 1023)
 384  fraction     51-0         fraction (bits to right of binary point)
 385 */
 386
 387 union stg_ieee754_dbl
 388 {
 389    double d;
 390    struct {
 391
 392 #if WORDS_BIGENDIAN
 393         unsigned int negative:1;
 394         unsigned int exponent:11;
 395         unsigned int mantissa0:20;
 396         unsigned int mantissa1:32;
 397 #else
 398 #if FLOAT_WORDS_BIGENDIAN
 399         unsigned int mantissa0:20;
 400         unsigned int exponent:11;
 401         unsigned int negative:1;
 402         unsigned int mantissa1:32;
 403 #else
 404         unsigned int mantissa1:32;
 405         unsigned int mantissa0:20;
 406         unsigned int exponent:11;
 407         unsigned int negative:1;
 408 #endif
 409 #endif
 410    } ieee;
 411     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 412    struct {
 413
 414 #if WORDS_BIGENDIAN
 415         unsigned int negative:1;
 416         unsigned int exponent:11;
 417         unsigned int quiet_nan:1;
 418         unsigned int mantissa0:19;
 419         unsigned int mantissa1:32;
 420 #else
 421 #if FLOAT_WORDS_BIGENDIAN
 422         unsigned int mantissa0:19;
 423         unsigned int quiet_nan:1;
 424         unsigned int exponent:11;
 425         unsigned int negative:1;
 426         unsigned int mantissa1:32;
 427 #else
 428         unsigned int mantissa1:32;
 429         unsigned int mantissa0:19;
 430         unsigned int quiet_nan:1;
 431         unsigned int exponent:11;
 432         unsigned int negative:1;
 433 #endif
 434 #endif
 435    } ieee_nan;
 436 };
 437
 438 /*
 439  * Predicates for testing for extreme IEEE fp values. Used
 440  * by the bytecode evaluator and the Prelude.
 441  *
 442  */
 443
 444 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 445 #ifdef IEEE_FLOATING_POINT
 446
 447 StgInt
 448 isDoubleNaN(StgDouble d)
 449 {
 450   union stg_ieee754_dbl u;
 451
 452   u.d = d;
 453
 454   return (
 455     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 456     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 457         /* and the mantissa non-zero? */
 458     );
 459 }
 460
 461 StgInt
 462 isDoubleInfinite(StgDouble d)
 463 {
 464     union stg_ieee754_dbl u;
 465
 466     u.d = d;
 467
 468     /* Inf iff exponent is all ones, mantissa all zeros */
 469     return (
 470         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 471         u.ieee.mantissa0 == 0                   &&
 472         u.ieee.mantissa1 == 0
 473       );
 474 }
 475
 476 StgInt
 477 isDoubleDenormalized(StgDouble d)
 478 {
 479     union stg_ieee754_dbl u;
 480
 481     u.d = d;
 482
 483     /* A (single/double/quad) precision floating point number
 484        is denormalised iff:
 485         - exponent is zero
 486         - mantissa is non-zero.
 487         - (don't care about setting of sign bit.)
 488
 489     */
 490     return (
 491         u.ieee.exponent  == 0 &&
 492         (u.ieee.mantissa0 != 0 ||
 493          u.ieee.mantissa1 != 0)
 494       );
 495
 496 }
 497
 498 StgInt
 499 isDoubleNegativeZero(StgDouble d)
 500 {
 501     union stg_ieee754_dbl u;
 502
 503     u.d = d;
 504     /* sign (bit 63) set (only) => negative zero */
 505
 506     return (
 507         u.ieee.negative  == 1 &&
 508         u.ieee.exponent  == 0 &&
 509         u.ieee.mantissa0 == 0 &&
 510         u.ieee.mantissa1 == 0);
 511 }
 512
 513 /* Same tests, this time for StgFloats. */
 514
 515 /*
 516  To recap, here's the representation of a single precision
 517  IEEE floating point number:
 518
 519  sign         31           sign bit (0 == positive, 1 == negative)
 520  exponent     30-23        exponent (biased by 127)
 521  fraction     22-0         fraction (bits to right of binary point)
 522 */
 523
 524
 525 StgInt
 526 isFloatNaN(StgFloat f)
 527 {
 528     union stg_ieee754_flt u;
 529     u.f = f;
 530
 531    /* Floating point NaN iff exponent is all ones, mantissa is
 532       non-zero (but see below.) */
 533    return (
 534         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 535         u.ieee.mantissa != 0);
 536 }
 537
 538 StgInt
 539 isFloatInfinite(StgFloat f)
 540 {
 541     union stg_ieee754_flt u;
 542     u.f = f;
 543
 544     /* A float is Inf iff exponent is max (all ones),
 545        and mantissa is min(all zeros.) */
 546     return (
 547         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 548         u.ieee.mantissa == 0);
 549 }
 550
 551 StgInt
 552 isFloatDenormalized(StgFloat f)
 553 {
 554     union stg_ieee754_flt u;
 555     u.f = f;
 556
 557     /* A (single/double/quad) precision floating point number
 558        is denormalised iff:
 559         - exponent is zero
 560         - mantissa is non-zero.
 561         - (don't care about setting of sign bit.)
 562
 563     */
 564     return (
 565         u.ieee.exponent == 0 &&
 566         u.ieee.mantissa != 0);
 567 }
 568
 569 StgInt
 570 isFloatNegativeZero(StgFloat f)
 571 {
 572     union stg_ieee754_flt u;
 573     u.f = f;
 574
 575     /* sign (bit 31) set (only) => negative zero */
 576     return (
 577         u.ieee.negative      &&
 578         u.ieee.exponent == 0 &&
 579         u.ieee.mantissa == 0);
 580 }
 581
 582 #else /* ! IEEE_FLOATING_POINT */
 583
 584 /* Dummy definitions of predicates - they all return false */
 585 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 586 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 587 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 588 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 589 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 590 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 591 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 592 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 593
 594 #endif /* ! IEEE_FLOATING_POINT */