rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2000
   4  *
   5  * Miscellaneous support for floating-point primitives
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11
  12 #include <math.h>
  13
  14 /*
  15  * Encoding and decoding Doubles.  Code based on the HBC code
  16  * (lib/fltcode.c).
  17  */
  18
  19 #ifdef _SHORT_LIMB
  20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  21 #else
  22 #ifdef _LONG_LONG_LIMB
  23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  24 #else
  25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  26 #endif
  27 #endif
  28
  29 #if SIZEOF_LIMB_T == 4
  30 #define GMP_BASE 4294967296.0
  31 #elif SIZEOF_LIMB_T == 8
  32 #define GMP_BASE 18446744073709551616.0
  33 #else
  34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  35 #endif
  36
  37 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  38 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39
  40 #if IEEE_FLOATING_POINT
  41 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  42 /* DMINEXP is defined in values.h on Linux (for example) */
  43 #define DHIGHBIT 0x00100000
  44 #define DMSBIT   0x80000000
  45
  46 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  47 #define FHIGHBIT 0x00800000
  48 #define FMSBIT   0x80000000
  49 #endif
  50
  51 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
  52 #define L 1
  53 #define H 0
  54 #else
  55 #define L 0
  56 #define H 1
  57 #endif
  58
  59 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  60
  61 StgDouble
  62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  63 {
  64     StgDouble r;
  65     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  66     I_ i;
  67
  68     /* Convert MP_INT to a double; knows a lot about internal rep! */
  69     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  70         r = (r * GMP_BASE) + arr[i];
  71
  72     /* Now raise to the exponent */
  73     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  74         r = ldexp(r, e);
  75
  76     /* sign is encoded in the size */
  77     if (size < 0)
  78         r = -r;
  79
  80     return r;
  81 }
  82
  83 /* Special version for small Integers */
  84 StgDouble
  85 __int_encodeDouble (I_ j, I_ e)
  86 {
  87   StgDouble r;
  88
  89   r = (StgDouble)__abs(j);
  90
  91   /* Now raise to the exponent */
  92   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  93     r = ldexp(r, e);
  94
  95   /* sign is encoded in the size */
  96   if (j < 0)
  97     r = -r;
  98
  99   return r;
 100 }
 101
 102 StgFloat
 103 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 104 {
 105     StgFloat r;
 106     const mp_limb_t *arr = (const mp_limb_t *)ba;
 107     I_ i;
 108
 109     /* Convert MP_INT to a float; knows a lot about internal rep! */
 110     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 111         r = (r * GMP_BASE) + arr[i];
 112
 113     /* Now raise to the exponent */
 114     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 115         r = ldexp(r, e);
 116
 117     /* sign is encoded in the size */
 118     if (size < 0)
 119         r = -r;
 120
 121     return r;
 122 }
 123
 124 /* Special version for small Integers */
 125 StgFloat
 126 __int_encodeFloat (I_ j, I_ e)
 127 {
 128   StgFloat r;
 129
 130   r = (StgFloat)__abs(j);
 131
 132   /* Now raise to the exponent */
 133   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 134     r = ldexp(r, e);
 135
 136   /* sign is encoded in the size */
 137   if (j < 0)
 138     r = -r;
 139
 140   return r;
 141 }
 142
 143 /* This only supports IEEE floating point */
 144
 145 void
 146 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 147 {
 148     /* Do some bit fiddling on IEEE */
 149     unsigned int low, high;             /* assuming 32 bit ints */
 150     int sign, iexp;
 151     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 152
 153     ASSERT(sizeof(unsigned int ) == 4            );
 154     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 155     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 156     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 157
 158     u.d = dbl;      /* grab chunks of the double */
 159     low = u.i[L];
 160     high = u.i[H];
 161
 162     /* we know the MP_INT* passed in has size zero, so we realloc
 163         no matter what.
 164     */
 165     man->_mp_alloc = DNBIGIT;
 166
 167     if (low == 0 && (high & ~DMSBIT) == 0) {
 168         man->_mp_size = 0;
 169         *exp = 0L;
 170     } else {
 171         man->_mp_size = DNBIGIT;
 172         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 173         sign = high;
 174
 175         high &= DHIGHBIT-1;
 176         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 177             high |= DHIGHBIT;
 178         else {
 179             iexp++;
 180             /* A denorm, normalize the mantissa */
 181             while (! (high & DHIGHBIT)) {
 182                 high <<= 1;
 183                 if (low & DMSBIT)
 184                     high++;
 185                 low <<= 1;
 186                 iexp--;
 187             }
 188         }
 189         *exp = (I_) iexp;
 190 #if DNBIGIT == 2
 191         man->_mp_d[0] = (mp_limb_t)low;
 192         man->_mp_d[1] = (mp_limb_t)high;
 193 #else
 194 #if DNBIGIT == 1
 195         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 196 #else
 197 #error Cannot cope with DNBIGIT
 198 #endif
 199 #endif
 200         if (sign < 0)
 201             man->_mp_size = -man->_mp_size;
 202     }
 203 }
 204
 205 void
 206 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 207 {
 208     /* Do some bit fiddling on IEEE */
 209     int high, sign;                 /* assuming 32 bit ints */
 210     union { float f; int i; } u;    /* assuming 32 bit float and int */
 211
 212     ASSERT(sizeof(int          ) == 4            );
 213     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 214     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 215     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 216
 217     u.f = flt;      /* grab the float */
 218     high = u.i;
 219
 220     /* we know the MP_INT* passed in has size zero, so we realloc
 221         no matter what.
 222     */
 223     man->_mp_alloc = FNBIGIT;
 224
 225     if ((high & ~FMSBIT) == 0) {
 226         man->_mp_size = 0;
 227         *exp = 0;
 228     } else {
 229         man->_mp_size = FNBIGIT;
 230         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 231         sign = high;
 232
 233         high &= FHIGHBIT-1;
 234         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 235             high |= FHIGHBIT;
 236         else {
 237             (*exp)++;
 238             /* A denorm, normalize the mantissa */
 239             while (! (high & FHIGHBIT)) {
 240                 high <<= 1;
 241                 (*exp)--;
 242             }
 243         }
 244 #if FNBIGIT == 1
 245         man->_mp_d[0] = (mp_limb_t)high;
 246 #else
 247 #error Cannot cope with FNBIGIT
 248 #endif
 249         if (sign < 0)
 250             man->_mp_size = -man->_mp_size;
 251     }
 252 }
 253
 254 /* Convenient union types for checking the layout of IEEE 754 types -
 255    based on defs in GNU libc <ieee754.h>
 256 */
 257
 258 union stg_ieee754_flt
 259 {
 260    float f;
 261    struct {
 262
 263 #if WORDS_BIGENDIAN
 264         unsigned int negative:1;
 265         unsigned int exponent:8;
 266         unsigned int mantissa:23;
 267 #else
 268         unsigned int mantissa:23;
 269         unsigned int exponent:8;
 270         unsigned int negative:1;
 271 #endif
 272    } ieee;
 273    struct {
 274
 275 #if WORDS_BIGENDIAN
 276         unsigned int negative:1;
 277         unsigned int exponent:8;
 278         unsigned int quiet_nan:1;
 279         unsigned int mantissa:22;
 280 #else
 281         unsigned int mantissa:22;
 282         unsigned int quiet_nan:1;
 283         unsigned int exponent:8;
 284         unsigned int negative:1;
 285 #endif
 286    } ieee_nan;
 287 };
 288
 289 /*
 290
 291  To recap, here's the representation of a double precision
 292  IEEE floating point number:
 293
 294  sign         63           sign bit (0==positive, 1==negative)
 295  exponent     62-52        exponent (biased by 1023)
 296  fraction     51-0         fraction (bits to right of binary point)
 297 */
 298
 299 union stg_ieee754_dbl
 300 {
 301    double d;
 302    struct {
 303
 304 #if WORDS_BIGENDIAN
 305         unsigned int negative:1;
 306         unsigned int exponent:11;
 307         unsigned int mantissa0:20;
 308         unsigned int mantissa1:32;
 309 #else
 310 #if FLOAT_WORDS_BIGENDIAN
 311         unsigned int mantissa0:20;
 312         unsigned int exponent:11;
 313         unsigned int negative:1;
 314         unsigned int mantissa1:32;
 315 #else
 316         unsigned int mantissa1:32;
 317         unsigned int mantissa0:20;
 318         unsigned int exponent:11;
 319         unsigned int negative:1;
 320 #endif
 321 #endif
 322    } ieee;
 323     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 324    struct {
 325
 326 #if WORDS_BIGENDIAN
 327         unsigned int negative:1;
 328         unsigned int exponent:11;
 329         unsigned int quiet_nan:1;
 330         unsigned int mantissa0:19;
 331         unsigned int mantissa1:32;
 332 #else
 333 #if FLOAT_WORDS_BIGENDIAN
 334         unsigned int mantissa0:19;
 335         unsigned int quiet_nan:1;
 336         unsigned int exponent:11;
 337         unsigned int negative:1;
 338         unsigned int mantissa1:32;
 339 #else
 340         unsigned int mantissa1:32;
 341         unsigned int mantissa0:19;
 342         unsigned int quiet_nan:1;
 343         unsigned int exponent:11;
 344         unsigned int negative:1;
 345 #endif
 346 #endif
 347    } ieee_nan;
 348 };
 349
 350 /*
 351  * Predicates for testing for extreme IEEE fp values. Used
 352  * by the bytecode evaluator and the Prelude.
 353  *
 354  */
 355
 356 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 357 #ifdef IEEE_FLOATING_POINT
 358
 359 StgInt
 360 isDoubleNaN(StgDouble d)
 361 {
 362   union stg_ieee754_dbl u;
 363
 364   u.d = d;
 365
 366   return (
 367     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 368     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 369         /* and the mantissa non-zero? */
 370     );
 371 }
 372
 373 StgInt
 374 isDoubleInfinite(StgDouble d)
 375 {
 376     union stg_ieee754_dbl u;
 377
 378     u.d = d;
 379
 380     /* Inf iff exponent is all ones, mantissa all zeros */
 381     return (
 382         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 383         u.ieee.mantissa0 == 0                   &&
 384         u.ieee.mantissa1 == 0
 385       );
 386 }
 387
 388 StgInt
 389 isDoubleDenormalized(StgDouble d)
 390 {
 391     union stg_ieee754_dbl u;
 392
 393     u.d = d;
 394
 395     /* A (single/double/quad) precision floating point number
 396        is denormalised iff:
 397         - exponent is zero
 398         - mantissa is non-zero.
 399         - (don't care about setting of sign bit.)
 400
 401     */
 402     return (
 403         u.ieee.exponent  == 0 &&
 404         (u.ieee.mantissa0 != 0 ||
 405          u.ieee.mantissa1 != 0)
 406       );
 407
 408 }
 409
 410 StgInt
 411 isDoubleNegativeZero(StgDouble d)
 412 {
 413     union stg_ieee754_dbl u;
 414
 415     u.d = d;
 416     /* sign (bit 63) set (only) => negative zero */
 417
 418     return (
 419         u.ieee.negative  == 1 &&
 420         u.ieee.exponent  == 0 &&
 421         u.ieee.mantissa0 == 0 &&
 422         u.ieee.mantissa1 == 0);
 423 }
 424
 425 /* Same tests, this time for StgFloats. */
 426
 427 /*
 428  To recap, here's the representation of a single precision
 429  IEEE floating point number:
 430
 431  sign         31           sign bit (0 == positive, 1 == negative)
 432  exponent     30-23        exponent (biased by 127)
 433  fraction     22-0         fraction (bits to right of binary point)
 434 */
 435
 436
 437 StgInt
 438 isFloatNaN(StgFloat f)
 439 {
 440     union stg_ieee754_flt u;
 441     u.f = f;
 442
 443    /* Floating point NaN iff exponent is all ones, mantissa is
 444       non-zero (but see below.) */
 445    return (
 446         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 447         u.ieee.mantissa != 0);
 448 }
 449
 450 StgInt
 451 isFloatInfinite(StgFloat f)
 452 {
 453     union stg_ieee754_flt u;
 454     u.f = f;
 455
 456     /* A float is Inf iff exponent is max (all ones),
 457        and mantissa is min(all zeros.) */
 458     return (
 459         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 460         u.ieee.mantissa == 0);
 461 }
 462
 463 StgInt
 464 isFloatDenormalized(StgFloat f)
 465 {
 466     union stg_ieee754_flt u;
 467     u.f = f;
 468
 469     /* A (single/double/quad) precision floating point number
 470        is denormalised iff:
 471         - exponent is zero
 472         - mantissa is non-zero.
 473         - (don't care about setting of sign bit.)
 474
 475     */
 476     return (
 477         u.ieee.exponent == 0 &&
 478         u.ieee.mantissa != 0);
 479 }
 480
 481 StgInt
 482 isFloatNegativeZero(StgFloat f)
 483 {
 484     union stg_ieee754_flt u;
 485     u.f = f;
 486
 487     /* sign (bit 31) set (only) => negative zero */
 488     return (
 489         u.ieee.negative      &&
 490         u.ieee.exponent == 0 &&
 491         u.ieee.mantissa == 0);
 492 }
 493
 494 #else /* ! IEEE_FLOATING_POINT */
 495
 496 /* Dummy definitions of predicates - they all return false */
 497 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 498 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 499 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 500 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 501 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 502 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 503 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 504 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 505
 506 #endif /* ! IEEE_FLOATING_POINT */