ghc/rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  * $Id: StgPrimFloat.c,v 1.9 2002/07/17 09:21:51 simonmar Exp $
   3  *
   4  * (c) The GHC Team, 1998-2000
   5  *
   6  * Miscellaneous support for floating-point primitives
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12
  13 #include <math.h>
  14
  15 /*
  16  * Encoding and decoding Doubles.  Code based on the HBC code
  17  * (lib/fltcode.c).
  18  */
  19
  20 #ifdef _SHORT_LIMB
  21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  22 #else
  23 #ifdef _LONG_LONG_LIMB
  24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  25 #else
  26 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  27 #endif
  28 #endif
  29
  30 #if SIZEOF_LIMB_T == 4
  31 #define GMP_BASE 4294967296.0
  32 #elif SIZEOF_LIMB_T == 8
  33 #define GMP_BASE 18446744073709551616.0
  34 #else
  35 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  36 #endif
  37
  38 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  39 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  40
  41 #if IEEE_FLOATING_POINT
  42 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  43 /* DMINEXP is defined in values.h on Linux (for example) */
  44 #define DHIGHBIT 0x00100000
  45 #define DMSBIT   0x80000000
  46
  47 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  48 #define FHIGHBIT 0x00800000
  49 #define FMSBIT   0x80000000
  50 #endif
  51
  52 #ifdef WORDS_BIGENDIAN
  53 #define L 1
  54 #define H 0
  55 #else
  56 #define L 0
  57 #define H 1
  58 #endif
  59
  60 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  61
  62 StgDouble
  63 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  64 {
  65     StgDouble r;
  66     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  67     I_ i;
  68
  69     /* Convert MP_INT to a double; knows a lot about internal rep! */
  70     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  71         r = (r * GMP_BASE) + arr[i];
  72
  73     /* Now raise to the exponent */
  74     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  75         r = ldexp(r, e);
  76
  77     /* sign is encoded in the size */
  78     if (size < 0)
  79         r = -r;
  80
  81     return r;
  82 }
  83
  84 /* Special version for small Integers */
  85 StgDouble
  86 __int_encodeDouble (I_ j, I_ e)
  87 {
  88   StgDouble r;
  89
  90   r = (StgDouble)__abs(j);
  91
  92   /* Now raise to the exponent */
  93   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  94     r = ldexp(r, e);
  95
  96   /* sign is encoded in the size */
  97   if (j < 0)
  98     r = -r;
  99
 100   return r;
 101 }
 102
 103 StgFloat
 104 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 105 {
 106     StgFloat r;
 107     const mp_limb_t *arr = (const mp_limb_t *)ba;
 108     I_ i;
 109
 110     /* Convert MP_INT to a float; knows a lot about internal rep! */
 111     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 112         r = (r * GMP_BASE) + arr[i];
 113
 114     /* Now raise to the exponent */
 115     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 116         r = ldexp(r, e);
 117
 118     /* sign is encoded in the size */
 119     if (size < 0)
 120         r = -r;
 121
 122     return r;
 123 }
 124
 125 /* Special version for small Integers */
 126 StgFloat
 127 __int_encodeFloat (I_ j, I_ e)
 128 {
 129   StgFloat r;
 130
 131   r = (StgFloat)__abs(j);
 132
 133   /* Now raise to the exponent */
 134   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 135     r = ldexp(r, e);
 136
 137   /* sign is encoded in the size */
 138   if (j < 0)
 139     r = -r;
 140
 141   return r;
 142 }
 143
 144 /* This only supports IEEE floating point */
 145
 146 void
 147 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 148 {
 149     /* Do some bit fiddling on IEEE */
 150     unsigned int low, high;             /* assuming 32 bit ints */
 151     int sign, iexp;
 152     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 153
 154     ASSERT(sizeof(unsigned int ) == 4            );
 155     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 156     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 157     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 158
 159     u.d = dbl;      /* grab chunks of the double */
 160     low = u.i[L];
 161     high = u.i[H];
 162
 163     /* we know the MP_INT* passed in has size zero, so we realloc
 164         no matter what.
 165     */
 166     man->_mp_alloc = DNBIGIT;
 167
 168     if (low == 0 && (high & ~DMSBIT) == 0) {
 169         man->_mp_size = 0;
 170         *exp = 0L;
 171     } else {
 172         man->_mp_size = DNBIGIT;
 173         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 174         sign = high;
 175
 176         high &= DHIGHBIT-1;
 177         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 178             high |= DHIGHBIT;
 179         else {
 180             iexp++;
 181             /* A denorm, normalize the mantissa */
 182             while (! (high & DHIGHBIT)) {
 183                 high <<= 1;
 184                 if (low & DMSBIT)
 185                     high++;
 186                 low <<= 1;
 187                 iexp--;
 188             }
 189         }
 190         *exp = (I_) iexp;
 191 #if DNBIGIT == 2
 192         man->_mp_d[0] = (mp_limb_t)low;
 193         man->_mp_d[1] = (mp_limb_t)high;
 194 #else
 195 #if DNBIGIT == 1
 196         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 197 #else
 198 #error Cannot cope with DNBIGIT
 199 #endif
 200 #endif
 201         if (sign < 0)
 202             man->_mp_size = -man->_mp_size;
 203     }
 204 }
 205
 206 void
 207 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 208 {
 209     /* Do some bit fiddling on IEEE */
 210     int high, sign;                 /* assuming 32 bit ints */
 211     union { float f; int i; } u;    /* assuming 32 bit float and int */
 212
 213     ASSERT(sizeof(int          ) == 4            );
 214     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 215     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 216     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 217
 218     u.f = flt;      /* grab the float */
 219     high = u.i;
 220
 221     /* we know the MP_INT* passed in has size zero, so we realloc
 222         no matter what.
 223     */
 224     man->_mp_alloc = FNBIGIT;
 225
 226     if ((high & ~FMSBIT) == 0) {
 227         man->_mp_size = 0;
 228         *exp = 0;
 229     } else {
 230         man->_mp_size = FNBIGIT;
 231         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 232         sign = high;
 233
 234         high &= FHIGHBIT-1;
 235         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 236             high |= FHIGHBIT;
 237         else {
 238             (*exp)++;
 239             /* A denorm, normalize the mantissa */
 240             while (! (high & FHIGHBIT)) {
 241                 high <<= 1;
 242                 (*exp)--;
 243             }
 244         }
 245 #if FNBIGIT == 1
 246         man->_mp_d[0] = (mp_limb_t)high;
 247 #else
 248 #error Cannot cope with FNBIGIT
 249 #endif
 250         if (sign < 0)
 251             man->_mp_size = -man->_mp_size;
 252     }
 253 }
 254
 255 /* Convenient union types for checking the layout of IEEE 754 types -
 256    based on defs in GNU libc <ieee754.h>
 257 */
 258
 259 union stg_ieee754_flt
 260 {
 261    float f;
 262    struct {
 263
 264 #if WORDS_BIGENDIAN
 265         unsigned int negative:1;
 266         unsigned int exponent:8;
 267         unsigned int mantissa:23;
 268 #else
 269         unsigned int mantissa:23;
 270         unsigned int exponent:8;
 271         unsigned int negative:1;
 272 #endif
 273    } ieee;
 274    struct {
 275
 276 #if WORDS_BIGENDIAN
 277         unsigned int negative:1;
 278         unsigned int exponent:8;
 279         unsigned int quiet_nan:1;
 280         unsigned int mantissa:22;
 281 #else
 282         unsigned int mantissa:22;
 283         unsigned int quiet_nan:1;
 284         unsigned int exponent:8;
 285         unsigned int negative:1;
 286 #endif
 287    } ieee_nan;
 288 };
 289
 290 /*
 291
 292  To recap, here's the representation of a double precision
 293  IEEE floating point number:
 294
 295  sign         63           sign bit (0==positive, 1==negative)
 296  exponent     62-52        exponent (biased by 1023)
 297  fraction     51-0         fraction (bits to right of binary point)
 298 */
 299
 300 union stg_ieee754_dbl
 301 {
 302    double d;
 303    struct {
 304
 305 #if WORDS_BIGENDIAN
 306         unsigned int negative:1;
 307         unsigned int exponent:11;
 308         unsigned int mantissa0:20;
 309         unsigned int mantissa1:32;
 310 #else
 311         unsigned int mantissa1:32;
 312         unsigned int mantissa0:20;
 313         unsigned int exponent:11;
 314         unsigned int negative:1;
 315 #endif
 316    } ieee;
 317     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 318    struct {
 319
 320 #if WORDS_BIGENDIAN
 321         unsigned int negative:1;
 322         unsigned int exponent:11;
 323         unsigned int quiet_nan:1;
 324         unsigned int mantissa0:19;
 325         unsigned int mantissa1:32;
 326 #else
 327         unsigned int mantissa1:32;
 328         unsigned int mantissa0:19;
 329         unsigned int quiet_nan:1;
 330         unsigned int exponent:11;
 331         unsigned int negative:1;
 332 #endif
 333    } ieee_nan;
 334 };
 335
 336 /*
 337  * Predicates for testing for extreme IEEE fp values. Used
 338  * by the bytecode evaluator and the Prelude.
 339  *
 340  */
 341
 342 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 343 #ifdef IEEE_FLOATING_POINT
 344
 345 StgInt
 346 isDoubleNaN(StgDouble d)
 347 {
 348   union stg_ieee754_dbl u;
 349
 350   u.d = d;
 351
 352   return (
 353     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 354     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 355         /* and the mantissa non-zero? */
 356     );
 357 }
 358
 359 StgInt
 360 isDoubleInfinite(StgDouble d)
 361 {
 362     union stg_ieee754_dbl u;
 363
 364     u.d = d;
 365
 366     /* Inf iff exponent is all ones, mantissa all zeros */
 367     return (
 368         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 369         u.ieee.mantissa0 == 0                   &&
 370         u.ieee.mantissa1 == 0
 371       );
 372 }
 373
 374 StgInt
 375 isDoubleDenormalized(StgDouble d)
 376 {
 377     union stg_ieee754_dbl u;
 378
 379     u.d = d;
 380
 381     /* A (single/double/quad) precision floating point number
 382        is denormalised iff:
 383         - exponent is zero
 384         - mantissa is non-zero.
 385         - (don't care about setting of sign bit.)
 386
 387     */
 388     return (
 389         u.ieee.exponent  == 0 &&
 390         (u.ieee.mantissa0 != 0 ||
 391          u.ieee.mantissa1 != 0)
 392       );
 393
 394 }
 395
 396 StgInt
 397 isDoubleNegativeZero(StgDouble d)
 398 {
 399     union stg_ieee754_dbl u;
 400
 401     u.d = d;
 402     /* sign (bit 63) set (only) => negative zero */
 403
 404     return (
 405         u.ieee.negative  == 1 &&
 406         u.ieee.exponent  == 0 &&
 407         u.ieee.mantissa0 == 0 &&
 408         u.ieee.mantissa1 == 0);
 409 }
 410
 411 /* Same tests, this time for StgFloats. */
 412
 413 /*
 414  To recap, here's the representation of a single precision
 415  IEEE floating point number:
 416
 417  sign         31           sign bit (0 == positive, 1 == negative)
 418  exponent     30-23        exponent (biased by 127)
 419  fraction     22-0         fraction (bits to right of binary point)
 420 */
 421
 422
 423 StgInt
 424 isFloatNaN(StgFloat f)
 425 {
 426     union stg_ieee754_flt u;
 427     u.f = f;
 428
 429    /* Floating point NaN iff exponent is all ones, mantissa is
 430       non-zero (but see below.) */
 431    return (
 432         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 433         u.ieee.mantissa != 0);
 434 }
 435
 436 StgInt
 437 isFloatInfinite(StgFloat f)
 438 {
 439     union stg_ieee754_flt u;
 440     u.f = f;
 441
 442     /* A float is Inf iff exponent is max (all ones),
 443        and mantissa is min(all zeros.) */
 444     return (
 445         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 446         u.ieee.mantissa == 0);
 447 }
 448
 449 StgInt
 450 isFloatDenormalized(StgFloat f)
 451 {
 452     union stg_ieee754_flt u;
 453     u.f = f;
 454
 455     /* A (single/double/quad) precision floating point number
 456        is denormalised iff:
 457         - exponent is zero
 458         - mantissa is non-zero.
 459         - (don't care about setting of sign bit.)
 460
 461     */
 462     return (
 463         u.ieee.exponent == 0 &&
 464         u.ieee.mantissa != 0);
 465 }
 466
 467 StgInt
 468 isFloatNegativeZero(StgFloat f)
 469 {
 470     union stg_ieee754_flt u;
 471     u.f = f;
 472
 473     /* sign (bit 31) set (only) => negative zero */
 474     return (
 475         u.ieee.negative      &&
 476         u.ieee.exponent == 0 &&
 477         u.ieee.mantissa == 0);
 478 }
 479
 480 #else /* ! IEEE_FLOATING_POINT */
 481
 482 /* Dummy definitions of predicates - they all return false */
 483 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 484 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 485 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 486 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 487 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 488 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 489 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 490 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 491
 492 #endif /* ! IEEE_FLOATING_POINT */