rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2000
   4  *
   5  * Miscellaneous support for floating-point primitives
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11
  12 #include <math.h>
  13
  14 /*
  15  * Encoding and decoding Doubles.  Code based on the HBC code
  16  * (lib/fltcode.c).
  17  */
  18
  19 #ifdef _SHORT_LIMB
  20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  21 #else
  22 #ifdef _LONG_LONG_LIMB
  23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  24 #else
  25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  26 #endif
  27 #endif
  28
  29 #if SIZEOF_LIMB_T == 4
  30 #define GMP_BASE 4294967296.0
  31 #elif SIZEOF_LIMB_T == 8
  32 #define GMP_BASE 18446744073709551616.0
  33 #else
  34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  35 #endif
  36
  37 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  38 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39
  40 #if IEEE_FLOATING_POINT
  41 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  42 /* DMINEXP is defined in values.h on Linux (for example) */
  43 #define DHIGHBIT 0x00100000
  44 #define DMSBIT   0x80000000
  45
  46 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  47 #define FHIGHBIT 0x00800000
  48 #define FMSBIT   0x80000000
  49 #endif
  50
  51 #ifdef WORDS_BIGENDIAN
  52 #define L 1
  53 #define H 0
  54 #else
  55 #define L 0
  56 #define H 1
  57 #endif
  58
  59 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  60
  61 StgDouble
  62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  63 {
  64     StgDouble r;
  65     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  66     I_ i;
  67
  68     /* Convert MP_INT to a double; knows a lot about internal rep! */
  69     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  70         r = (r * GMP_BASE) + arr[i];
  71
  72     /* Now raise to the exponent */
  73     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  74         r = ldexp(r, e);
  75
  76     /* sign is encoded in the size */
  77     if (size < 0)
  78         r = -r;
  79
  80     return r;
  81 }
  82
  83 /* Special version for small Integers */
  84 StgDouble
  85 __int_encodeDouble (I_ j, I_ e)
  86 {
  87   StgDouble r;
  88
  89   r = (StgDouble)__abs(j);
  90
  91   /* Now raise to the exponent */
  92   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  93     r = ldexp(r, e);
  94
  95   /* sign is encoded in the size */
  96   if (j < 0)
  97     r = -r;
  98
  99   return r;
 100 }
 101
 102 StgFloat
 103 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 104 {
 105     StgFloat r;
 106     const mp_limb_t *arr = (const mp_limb_t *)ba;
 107     I_ i;
 108
 109     /* Convert MP_INT to a float; knows a lot about internal rep! */
 110     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 111         r = (r * GMP_BASE) + arr[i];
 112
 113     /* Now raise to the exponent */
 114     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 115         r = ldexp(r, e);
 116
 117     /* sign is encoded in the size */
 118     if (size < 0)
 119         r = -r;
 120
 121     return r;
 122 }
 123
 124 /* Special version for small Integers */
 125 StgFloat
 126 __int_encodeFloat (I_ j, I_ e)
 127 {
 128   StgFloat r;
 129
 130   r = (StgFloat)__abs(j);
 131
 132   /* Now raise to the exponent */
 133   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 134     r = ldexp(r, e);
 135
 136   /* sign is encoded in the size */
 137   if (j < 0)
 138     r = -r;
 139
 140   return r;
 141 }
 142
 143 /* This only supports IEEE floating point */
 144
 145 void
 146 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 147 {
 148     /* Do some bit fiddling on IEEE */
 149     unsigned int low, high;             /* assuming 32 bit ints */
 150     int sign, iexp;
 151     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 152
 153     ASSERT(sizeof(unsigned int ) == 4            );
 154     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 155     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 156     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 157
 158     u.d = dbl;      /* grab chunks of the double */
 159     low = u.i[L];
 160     high = u.i[H];
 161
 162     /* we know the MP_INT* passed in has size zero, so we realloc
 163         no matter what.
 164     */
 165     man->_mp_alloc = DNBIGIT;
 166
 167     if (low == 0 && (high & ~DMSBIT) == 0) {
 168         man->_mp_size = 0;
 169         *exp = 0L;
 170     } else {
 171         man->_mp_size = DNBIGIT;
 172         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 173         sign = high;
 174
 175         high &= DHIGHBIT-1;
 176         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 177             high |= DHIGHBIT;
 178         else {
 179             iexp++;
 180             /* A denorm, normalize the mantissa */
 181             while (! (high & DHIGHBIT)) {
 182                 high <<= 1;
 183                 if (low & DMSBIT)
 184                     high++;
 185                 low <<= 1;
 186                 iexp--;
 187             }
 188         }
 189         *exp = (I_) iexp;
 190 #if DNBIGIT == 2
 191         man->_mp_d[0] = (mp_limb_t)low;
 192         man->_mp_d[1] = (mp_limb_t)high;
 193 #else
 194 #if DNBIGIT == 1
 195         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 196 #else
 197 #error Cannot cope with DNBIGIT
 198 #endif
 199 #endif
 200         if (sign < 0)
 201             man->_mp_size = -man->_mp_size;
 202     }
 203 }
 204
 205 void
 206 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 207 {
 208     /* Do some bit fiddling on IEEE */
 209     int high, sign;                 /* assuming 32 bit ints */
 210     union { float f; int i; } u;    /* assuming 32 bit float and int */
 211
 212     ASSERT(sizeof(int          ) == 4            );
 213     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 214     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 215     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 216
 217     u.f = flt;      /* grab the float */
 218     high = u.i;
 219
 220     /* we know the MP_INT* passed in has size zero, so we realloc
 221         no matter what.
 222     */
 223     man->_mp_alloc = FNBIGIT;
 224
 225     if ((high & ~FMSBIT) == 0) {
 226         man->_mp_size = 0;
 227         *exp = 0;
 228     } else {
 229         man->_mp_size = FNBIGIT;
 230         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 231         sign = high;
 232
 233         high &= FHIGHBIT-1;
 234         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 235             high |= FHIGHBIT;
 236         else {
 237             (*exp)++;
 238             /* A denorm, normalize the mantissa */
 239             while (! (high & FHIGHBIT)) {
 240                 high <<= 1;
 241                 (*exp)--;
 242             }
 243         }
 244 #if FNBIGIT == 1
 245         man->_mp_d[0] = (mp_limb_t)high;
 246 #else
 247 #error Cannot cope with FNBIGIT
 248 #endif
 249         if (sign < 0)
 250             man->_mp_size = -man->_mp_size;
 251     }
 252 }
 253
 254 /* Convenient union types for checking the layout of IEEE 754 types -
 255    based on defs in GNU libc <ieee754.h>
 256 */
 257
 258 union stg_ieee754_flt
 259 {
 260    float f;
 261    struct {
 262
 263 #if WORDS_BIGENDIAN
 264         unsigned int negative:1;
 265         unsigned int exponent:8;
 266         unsigned int mantissa:23;
 267 #else
 268         unsigned int mantissa:23;
 269         unsigned int exponent:8;
 270         unsigned int negative:1;
 271 #endif
 272    } ieee;
 273    struct {
 274
 275 #if WORDS_BIGENDIAN
 276         unsigned int negative:1;
 277         unsigned int exponent:8;
 278         unsigned int quiet_nan:1;
 279         unsigned int mantissa:22;
 280 #else
 281         unsigned int mantissa:22;
 282         unsigned int quiet_nan:1;
 283         unsigned int exponent:8;
 284         unsigned int negative:1;
 285 #endif
 286    } ieee_nan;
 287 };
 288
 289 /*
 290
 291  To recap, here's the representation of a double precision
 292  IEEE floating point number:
 293
 294  sign         63           sign bit (0==positive, 1==negative)
 295  exponent     62-52        exponent (biased by 1023)
 296  fraction     51-0         fraction (bits to right of binary point)
 297 */
 298
 299 union stg_ieee754_dbl
 300 {
 301    double d;
 302    struct {
 303
 304 #if WORDS_BIGENDIAN
 305         unsigned int negative:1;
 306         unsigned int exponent:11;
 307         unsigned int mantissa0:20;
 308         unsigned int mantissa1:32;
 309 #else
 310         unsigned int mantissa1:32;
 311         unsigned int mantissa0:20;
 312         unsigned int exponent:11;
 313         unsigned int negative:1;
 314 #endif
 315    } ieee;
 316     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 317    struct {
 318
 319 #if WORDS_BIGENDIAN
 320         unsigned int negative:1;
 321         unsigned int exponent:11;
 322         unsigned int quiet_nan:1;
 323         unsigned int mantissa0:19;
 324         unsigned int mantissa1:32;
 325 #else
 326         unsigned int mantissa1:32;
 327         unsigned int mantissa0:19;
 328         unsigned int quiet_nan:1;
 329         unsigned int exponent:11;
 330         unsigned int negative:1;
 331 #endif
 332    } ieee_nan;
 333 };
 334
 335 /*
 336  * Predicates for testing for extreme IEEE fp values. Used
 337  * by the bytecode evaluator and the Prelude.
 338  *
 339  */
 340
 341 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 342 #ifdef IEEE_FLOATING_POINT
 343
 344 StgInt
 345 isDoubleNaN(StgDouble d)
 346 {
 347   union stg_ieee754_dbl u;
 348
 349   u.d = d;
 350
 351   return (
 352     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 353     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 354         /* and the mantissa non-zero? */
 355     );
 356 }
 357
 358 StgInt
 359 isDoubleInfinite(StgDouble d)
 360 {
 361     union stg_ieee754_dbl u;
 362
 363     u.d = d;
 364
 365     /* Inf iff exponent is all ones, mantissa all zeros */
 366     return (
 367         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 368         u.ieee.mantissa0 == 0                   &&
 369         u.ieee.mantissa1 == 0
 370       );
 371 }
 372
 373 StgInt
 374 isDoubleDenormalized(StgDouble d)
 375 {
 376     union stg_ieee754_dbl u;
 377
 378     u.d = d;
 379
 380     /* A (single/double/quad) precision floating point number
 381        is denormalised iff:
 382         - exponent is zero
 383         - mantissa is non-zero.
 384         - (don't care about setting of sign bit.)
 385
 386     */
 387     return (
 388         u.ieee.exponent  == 0 &&
 389         (u.ieee.mantissa0 != 0 ||
 390          u.ieee.mantissa1 != 0)
 391       );
 392
 393 }
 394
 395 StgInt
 396 isDoubleNegativeZero(StgDouble d)
 397 {
 398     union stg_ieee754_dbl u;
 399
 400     u.d = d;
 401     /* sign (bit 63) set (only) => negative zero */
 402
 403     return (
 404         u.ieee.negative  == 1 &&
 405         u.ieee.exponent  == 0 &&
 406         u.ieee.mantissa0 == 0 &&
 407         u.ieee.mantissa1 == 0);
 408 }
 409
 410 /* Same tests, this time for StgFloats. */
 411
 412 /*
 413  To recap, here's the representation of a single precision
 414  IEEE floating point number:
 415
 416  sign         31           sign bit (0 == positive, 1 == negative)
 417  exponent     30-23        exponent (biased by 127)
 418  fraction     22-0         fraction (bits to right of binary point)
 419 */
 420
 421
 422 StgInt
 423 isFloatNaN(StgFloat f)
 424 {
 425     union stg_ieee754_flt u;
 426     u.f = f;
 427
 428    /* Floating point NaN iff exponent is all ones, mantissa is
 429       non-zero (but see below.) */
 430    return (
 431         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 432         u.ieee.mantissa != 0);
 433 }
 434
 435 StgInt
 436 isFloatInfinite(StgFloat f)
 437 {
 438     union stg_ieee754_flt u;
 439     u.f = f;
 440
 441     /* A float is Inf iff exponent is max (all ones),
 442        and mantissa is min(all zeros.) */
 443     return (
 444         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 445         u.ieee.mantissa == 0);
 446 }
 447
 448 StgInt
 449 isFloatDenormalized(StgFloat f)
 450 {
 451     union stg_ieee754_flt u;
 452     u.f = f;
 453
 454     /* A (single/double/quad) precision floating point number
 455        is denormalised iff:
 456         - exponent is zero
 457         - mantissa is non-zero.
 458         - (don't care about setting of sign bit.)
 459
 460     */
 461     return (
 462         u.ieee.exponent == 0 &&
 463         u.ieee.mantissa != 0);
 464 }
 465
 466 StgInt
 467 isFloatNegativeZero(StgFloat f)
 468 {
 469     union stg_ieee754_flt u;
 470     u.f = f;
 471
 472     /* sign (bit 31) set (only) => negative zero */
 473     return (
 474         u.ieee.negative      &&
 475         u.ieee.exponent == 0 &&
 476         u.ieee.mantissa == 0);
 477 }
 478
 479 #else /* ! IEEE_FLOATING_POINT */
 480
 481 /* Dummy definitions of predicates - they all return false */
 482 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 483 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 484 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 485 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 486 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 487 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 488 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 489 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 490
 491 #endif /* ! IEEE_FLOATING_POINT */