ghc/rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  * $Id: StgPrimFloat.c,v 1.7 2001/07/23 23:29:47 ken Exp $
   3  *
   4  * (c) The GHC Team, 1998-2000
   5  *
   6  * Miscellaneous support for floating-point primitives
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "Rts.h"
  11
  12 /*
  13  * Encoding and decoding Doubles.  Code based on the HBC code
  14  * (lib/fltcode.c).
  15  */
  16
  17 #ifdef _SHORT_LIMB
  18 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  19 #else
  20 #ifdef _LONG_LONG_LIMB
  21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  22 #else
  23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  24 #endif
  25 #endif
  26
  27 #if SIZEOF_LIMB_T == 4
  28 #define GMP_BASE 4294967296.0
  29 #elif SIZEOF_LIMB_T == 8
  30 #define GMP_BASE 18446744073709551616.0
  31 #else
  32 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  33 #endif
  34
  35 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  36 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  37
  38 #if IEEE_FLOATING_POINT
  39 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  40 /* DMINEXP is defined in values.h on Linux (for example) */
  41 #define DHIGHBIT 0x00100000
  42 #define DMSBIT   0x80000000
  43
  44 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  45 #define FHIGHBIT 0x00800000
  46 #define FMSBIT   0x80000000
  47 #endif
  48
  49 #ifdef WORDS_BIGENDIAN
  50 #define L 1
  51 #define H 0
  52 #else
  53 #define L 0
  54 #define H 1
  55 #endif
  56
  57 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  58
  59 StgDouble
  60 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  61 {
  62     StgDouble r;
  63     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  64     I_ i;
  65
  66     /* Convert MP_INT to a double; knows a lot about internal rep! */
  67     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  68         r = (r * GMP_BASE) + arr[i];
  69
  70     /* Now raise to the exponent */
  71     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  72         r = ldexp(r, e);
  73
  74     /* sign is encoded in the size */
  75     if (size < 0)
  76         r = -r;
  77
  78     return r;
  79 }
  80
  81 /* Special version for small Integers */
  82 StgDouble
  83 __int_encodeDouble (I_ j, I_ e)
  84 {
  85   StgDouble r;
  86
  87   r = (StgDouble)__abs(j);
  88
  89   /* Now raise to the exponent */
  90   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  91     r = ldexp(r, e);
  92
  93   /* sign is encoded in the size */
  94   if (j < 0)
  95     r = -r;
  96
  97   return r;
  98 }
  99
 100 StgFloat
 101 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 102 {
 103     StgFloat r;
 104     const mp_limb_t *arr = (const mp_limb_t *)ba;
 105     I_ i;
 106
 107     /* Convert MP_INT to a float; knows a lot about internal rep! */
 108     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 109         r = (r * GMP_BASE) + arr[i];
 110
 111     /* Now raise to the exponent */
 112     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 113         r = ldexp(r, e);
 114
 115     /* sign is encoded in the size */
 116     if (size < 0)
 117         r = -r;
 118
 119     return r;
 120 }
 121
 122 /* Special version for small Integers */
 123 StgFloat
 124 __int_encodeFloat (I_ j, I_ e)
 125 {
 126   StgFloat r;
 127
 128   r = (StgFloat)__abs(j);
 129
 130   /* Now raise to the exponent */
 131   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 132     r = ldexp(r, e);
 133
 134   /* sign is encoded in the size */
 135   if (j < 0)
 136     r = -r;
 137
 138   return r;
 139 }
 140
 141 /* This only supports IEEE floating point */
 142
 143 void
 144 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 145 {
 146     /* Do some bit fiddling on IEEE */
 147     unsigned int low, high;             /* assuming 32 bit ints */
 148     int sign, iexp;
 149     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 150
 151     ASSERT(sizeof(unsigned int ) == 4            );
 152     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 153     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 154     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 155
 156     u.d = dbl;      /* grab chunks of the double */
 157     low = u.i[L];
 158     high = u.i[H];
 159
 160     /* we know the MP_INT* passed in has size zero, so we realloc
 161         no matter what.
 162     */
 163     man->_mp_alloc = DNBIGIT;
 164
 165     if (low == 0 && (high & ~DMSBIT) == 0) {
 166         man->_mp_size = 0;
 167         *exp = 0L;
 168     } else {
 169         man->_mp_size = DNBIGIT;
 170         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 171         sign = high;
 172
 173         high &= DHIGHBIT-1;
 174         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 175             high |= DHIGHBIT;
 176         else {
 177             iexp++;
 178             /* A denorm, normalize the mantissa */
 179             while (! (high & DHIGHBIT)) {
 180                 high <<= 1;
 181                 if (low & DMSBIT)
 182                     high++;
 183                 low <<= 1;
 184                 iexp--;
 185             }
 186         }
 187         *exp = (I_) iexp;
 188 #if DNBIGIT == 2
 189         man->_mp_d[0] = (mp_limb_t)low;
 190         man->_mp_d[1] = (mp_limb_t)high;
 191 #else
 192 #if DNBIGIT == 1
 193         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 194 #else
 195 #error Cannot cope with DNBIGIT
 196 #endif
 197 #endif
 198         if (sign < 0)
 199             man->_mp_size = -man->_mp_size;
 200     }
 201 }
 202
 203 void
 204 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 205 {
 206     /* Do some bit fiddling on IEEE */
 207     int high, sign;                 /* assuming 32 bit ints */
 208     union { float f; int i; } u;    /* assuming 32 bit float and int */
 209
 210     ASSERT(sizeof(int          ) == 4            );
 211     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 212     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 213     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 214
 215     u.f = flt;      /* grab the float */
 216     high = u.i;
 217
 218     /* we know the MP_INT* passed in has size zero, so we realloc
 219         no matter what.
 220     */
 221     man->_mp_alloc = FNBIGIT;
 222
 223     if ((high & ~FMSBIT) == 0) {
 224         man->_mp_size = 0;
 225         *exp = 0;
 226     } else {
 227         man->_mp_size = FNBIGIT;
 228         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 229         sign = high;
 230
 231         high &= FHIGHBIT-1;
 232         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 233             high |= FHIGHBIT;
 234         else {
 235             (*exp)++;
 236             /* A denorm, normalize the mantissa */
 237             while (! (high & FHIGHBIT)) {
 238                 high <<= 1;
 239                 (*exp)--;
 240             }
 241         }
 242 #if FNBIGIT == 1
 243         man->_mp_d[0] = (mp_limb_t)high;
 244 #else
 245 #error Cannot cope with FNBIGIT
 246 #endif
 247         if (sign < 0)
 248             man->_mp_size = -man->_mp_size;
 249     }
 250 }
 251
 252 /* Convenient union types for checking the layout of IEEE 754 types -
 253    based on defs in GNU libc <ieee754.h>
 254 */
 255
 256 union stg_ieee754_flt
 257 {
 258    float f;
 259    struct {
 260
 261 #if WORDS_BIGENDIAN
 262         unsigned int negative:1;
 263         unsigned int exponent:8;
 264         unsigned int mantissa:23;
 265 #else
 266         unsigned int mantissa:23;
 267         unsigned int exponent:8;
 268         unsigned int negative:1;
 269 #endif
 270    } ieee;
 271    struct {
 272
 273 #if WORDS_BIGENDIAN
 274         unsigned int negative:1;
 275         unsigned int exponent:8;
 276         unsigned int quiet_nan:1;
 277         unsigned int mantissa:22;
 278 #else
 279         unsigned int mantissa:22;
 280         unsigned int quiet_nan:1;
 281         unsigned int exponent:8;
 282         unsigned int negative:1;
 283 #endif
 284    } ieee_nan;
 285 };
 286
 287 /*
 288
 289  To recap, here's the representation of a double precision
 290  IEEE floating point number:
 291
 292  sign         63           sign bit (0==positive, 1==negative)
 293  exponent     62-52        exponent (biased by 1023)
 294  fraction     51-0         fraction (bits to right of binary point)
 295 */
 296
 297 union stg_ieee754_dbl
 298 {
 299    double d;
 300    struct {
 301
 302 #if WORDS_BIGENDIAN
 303         unsigned int negative:1;
 304         unsigned int exponent:11;
 305         unsigned int mantissa0:20;
 306         unsigned int mantissa1:32;
 307 #else
 308         unsigned int mantissa1:32;
 309         unsigned int mantissa0:20;
 310         unsigned int exponent:11;
 311         unsigned int negative:1;
 312 #endif
 313    } ieee;
 314     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 315    struct {
 316
 317 #if WORDS_BIGENDIAN
 318         unsigned int negative:1;
 319         unsigned int exponent:11;
 320         unsigned int quiet_nan:1;
 321         unsigned int mantissa0:19;
 322         unsigned int mantissa1:32;
 323 #else
 324         unsigned int mantissa1:32;
 325         unsigned int mantissa0:19;
 326         unsigned int quiet_nan:1;
 327         unsigned int exponent:11;
 328         unsigned int negative:1;
 329 #endif
 330    } ieee_nan;
 331 };
 332
 333 /*
 334  * Predicates for testing for extreme IEEE fp values. Used
 335  * by the bytecode evaluator and the Prelude.
 336  *
 337  */
 338
 339 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 340 #ifdef IEEE_FLOATING_POINT
 341
 342 StgInt
 343 isDoubleNaN(StgDouble d)
 344 {
 345   union stg_ieee754_dbl u;
 346
 347   u.d = d;
 348
 349   return (
 350     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 351     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 352         /* and the mantissa non-zero? */
 353     );
 354 }
 355
 356 StgInt
 357 isDoubleInfinite(StgDouble d)
 358 {
 359     union stg_ieee754_dbl u;
 360
 361     u.d = d;
 362
 363     /* Inf iff exponent is all ones, mantissa all zeros */
 364     return (
 365         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 366         u.ieee.mantissa0 == 0                   &&
 367         u.ieee.mantissa1 == 0
 368       );
 369 }
 370
 371 StgInt
 372 isDoubleDenormalized(StgDouble d)
 373 {
 374     union stg_ieee754_dbl u;
 375
 376     u.d = d;
 377
 378     /* A (single/double/quad) precision floating point number
 379        is denormalised iff:
 380         - exponent is zero
 381         - mantissa is non-zero.
 382         - (don't care about setting of sign bit.)
 383
 384     */
 385     return (
 386         u.ieee.exponent  == 0 &&
 387         (u.ieee.mantissa0 != 0 ||
 388          u.ieee.mantissa1 != 0)
 389       );
 390
 391 }
 392
 393 StgInt
 394 isDoubleNegativeZero(StgDouble d)
 395 {
 396     union stg_ieee754_dbl u;
 397
 398     u.d = d;
 399     /* sign (bit 63) set (only) => negative zero */
 400
 401     return (
 402         u.ieee.negative  == 1 &&
 403         u.ieee.exponent  == 0 &&
 404         u.ieee.mantissa0 == 0 &&
 405         u.ieee.mantissa1 == 0);
 406 }
 407
 408 /* Same tests, this time for StgFloats. */
 409
 410 /*
 411  To recap, here's the representation of a single precision
 412  IEEE floating point number:
 413
 414  sign         31           sign bit (0 == positive, 1 == negative)
 415  exponent     30-23        exponent (biased by 127)
 416  fraction     22-0         fraction (bits to right of binary point)
 417 */
 418
 419
 420 StgInt
 421 isFloatNaN(StgFloat f)
 422 {
 423     union stg_ieee754_flt u;
 424     u.f = f;
 425
 426    /* Floating point NaN iff exponent is all ones, mantissa is
 427       non-zero (but see below.) */
 428    return (
 429         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 430         u.ieee.mantissa != 0);
 431 }
 432
 433 StgInt
 434 isFloatInfinite(StgFloat f)
 435 {
 436     union stg_ieee754_flt u;
 437     u.f = f;
 438
 439     /* A float is Inf iff exponent is max (all ones),
 440        and mantissa is min(all zeros.) */
 441     return (
 442         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 443         u.ieee.mantissa == 0);
 444 }
 445
 446 StgInt
 447 isFloatDenormalized(StgFloat f)
 448 {
 449     union stg_ieee754_flt u;
 450     u.f = f;
 451
 452     /* A (single/double/quad) precision floating point number
 453        is denormalised iff:
 454         - exponent is zero
 455         - mantissa is non-zero.
 456         - (don't care about setting of sign bit.)
 457
 458     */
 459     return (
 460         u.ieee.exponent == 0 &&
 461         u.ieee.mantissa != 0);
 462 }
 463
 464 StgInt
 465 isFloatNegativeZero(StgFloat f)
 466 {
 467     union stg_ieee754_flt u;
 468     u.f = f;
 469
 470     /* sign (bit 31) set (only) => negative zero */
 471     return (
 472         u.ieee.negative      &&
 473         u.ieee.exponent == 0 &&
 474         u.ieee.mantissa == 0);
 475 }
 476
 477 #else /* ! IEEE_FLOATING_POINT */
 478
 479 /* Dummy definitions of predicates - they all return false */
 480 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 481 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 482 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 483 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 484 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 485 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 486 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 487 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 488
 489 #endif /* ! IEEE_FLOATING_POINT */