ghc/rts/StgPrimFloat.c

   1 /* -----------------------------------------------------------------------------
   2  * $Id: StgPrimFloat.c,v 1.8 2001/08/14 13:40:09 sewardj Exp $
   3  *
   4  * (c) The GHC Team, 1998-2000
   5  *
   6  * Miscellaneous support for floating-point primitives
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12
  13 /*
  14  * Encoding and decoding Doubles.  Code based on the HBC code
  15  * (lib/fltcode.c).
  16  */
  17
  18 #ifdef _SHORT_LIMB
  19 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
  20 #else
  21 #ifdef _LONG_LONG_LIMB
  22 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
  23 #else
  24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
  25 #endif
  26 #endif
  27
  28 #if SIZEOF_LIMB_T == 4
  29 #define GMP_BASE 4294967296.0
  30 #elif SIZEOF_LIMB_T == 8
  31 #define GMP_BASE 18446744073709551616.0
  32 #else
  33 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
  34 #endif
  35
  36 #define DNBIGIT  ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  37 #define FNBIGIT  ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
  38
  39 #if IEEE_FLOATING_POINT
  40 #define MY_DMINEXP  ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
  41 /* DMINEXP is defined in values.h on Linux (for example) */
  42 #define DHIGHBIT 0x00100000
  43 #define DMSBIT   0x80000000
  44
  45 #define MY_FMINEXP  ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
  46 #define FHIGHBIT 0x00800000
  47 #define FMSBIT   0x80000000
  48 #endif
  49
  50 #ifdef WORDS_BIGENDIAN
  51 #define L 1
  52 #define H 0
  53 #else
  54 #define L 0
  55 #define H 1
  56 #endif
  57
  58 #define __abs(a)                (( (a) >= 0 ) ? (a) : (-(a)))
  59
  60 StgDouble
  61 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
  62 {
  63     StgDouble r;
  64     const mp_limb_t *const arr = (const mp_limb_t *)ba;
  65     I_ i;
  66
  67     /* Convert MP_INT to a double; knows a lot about internal rep! */
  68     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
  69         r = (r * GMP_BASE) + arr[i];
  70
  71     /* Now raise to the exponent */
  72     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  73         r = ldexp(r, e);
  74
  75     /* sign is encoded in the size */
  76     if (size < 0)
  77         r = -r;
  78
  79     return r;
  80 }
  81
  82 /* Special version for small Integers */
  83 StgDouble
  84 __int_encodeDouble (I_ j, I_ e)
  85 {
  86   StgDouble r;
  87
  88   r = (StgDouble)__abs(j);
  89
  90   /* Now raise to the exponent */
  91   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
  92     r = ldexp(r, e);
  93
  94   /* sign is encoded in the size */
  95   if (j < 0)
  96     r = -r;
  97
  98   return r;
  99 }
 100
 101 StgFloat
 102 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
 103 {
 104     StgFloat r;
 105     const mp_limb_t *arr = (const mp_limb_t *)ba;
 106     I_ i;
 107
 108     /* Convert MP_INT to a float; knows a lot about internal rep! */
 109     for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
 110         r = (r * GMP_BASE) + arr[i];
 111
 112     /* Now raise to the exponent */
 113     if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 114         r = ldexp(r, e);
 115
 116     /* sign is encoded in the size */
 117     if (size < 0)
 118         r = -r;
 119
 120     return r;
 121 }
 122
 123 /* Special version for small Integers */
 124 StgFloat
 125 __int_encodeFloat (I_ j, I_ e)
 126 {
 127   StgFloat r;
 128
 129   r = (StgFloat)__abs(j);
 130
 131   /* Now raise to the exponent */
 132   if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
 133     r = ldexp(r, e);
 134
 135   /* sign is encoded in the size */
 136   if (j < 0)
 137     r = -r;
 138
 139   return r;
 140 }
 141
 142 /* This only supports IEEE floating point */
 143
 144 void
 145 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
 146 {
 147     /* Do some bit fiddling on IEEE */
 148     unsigned int low, high;             /* assuming 32 bit ints */
 149     int sign, iexp;
 150     union { double d; unsigned int i[2]; } u;   /* assuming 32 bit ints, 64 bit double */
 151
 152     ASSERT(sizeof(unsigned int ) == 4            );
 153     ASSERT(sizeof(dbl          ) == SIZEOF_DOUBLE);
 154     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 155     ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
 156
 157     u.d = dbl;      /* grab chunks of the double */
 158     low = u.i[L];
 159     high = u.i[H];
 160
 161     /* we know the MP_INT* passed in has size zero, so we realloc
 162         no matter what.
 163     */
 164     man->_mp_alloc = DNBIGIT;
 165
 166     if (low == 0 && (high & ~DMSBIT) == 0) {
 167         man->_mp_size = 0;
 168         *exp = 0L;
 169     } else {
 170         man->_mp_size = DNBIGIT;
 171         iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
 172         sign = high;
 173
 174         high &= DHIGHBIT-1;
 175         if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
 176             high |= DHIGHBIT;
 177         else {
 178             iexp++;
 179             /* A denorm, normalize the mantissa */
 180             while (! (high & DHIGHBIT)) {
 181                 high <<= 1;
 182                 if (low & DMSBIT)
 183                     high++;
 184                 low <<= 1;
 185                 iexp--;
 186             }
 187         }
 188         *exp = (I_) iexp;
 189 #if DNBIGIT == 2
 190         man->_mp_d[0] = (mp_limb_t)low;
 191         man->_mp_d[1] = (mp_limb_t)high;
 192 #else
 193 #if DNBIGIT == 1
 194         man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
 195 #else
 196 #error Cannot cope with DNBIGIT
 197 #endif
 198 #endif
 199         if (sign < 0)
 200             man->_mp_size = -man->_mp_size;
 201     }
 202 }
 203
 204 void
 205 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
 206 {
 207     /* Do some bit fiddling on IEEE */
 208     int high, sign;                 /* assuming 32 bit ints */
 209     union { float f; int i; } u;    /* assuming 32 bit float and int */
 210
 211     ASSERT(sizeof(int          ) == 4            );
 212     ASSERT(sizeof(flt          ) == SIZEOF_FLOAT );
 213     ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
 214     ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
 215
 216     u.f = flt;      /* grab the float */
 217     high = u.i;
 218
 219     /* we know the MP_INT* passed in has size zero, so we realloc
 220         no matter what.
 221     */
 222     man->_mp_alloc = FNBIGIT;
 223
 224     if ((high & ~FMSBIT) == 0) {
 225         man->_mp_size = 0;
 226         *exp = 0;
 227     } else {
 228         man->_mp_size = FNBIGIT;
 229         *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
 230         sign = high;
 231
 232         high &= FHIGHBIT-1;
 233         if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
 234             high |= FHIGHBIT;
 235         else {
 236             (*exp)++;
 237             /* A denorm, normalize the mantissa */
 238             while (! (high & FHIGHBIT)) {
 239                 high <<= 1;
 240                 (*exp)--;
 241             }
 242         }
 243 #if FNBIGIT == 1
 244         man->_mp_d[0] = (mp_limb_t)high;
 245 #else
 246 #error Cannot cope with FNBIGIT
 247 #endif
 248         if (sign < 0)
 249             man->_mp_size = -man->_mp_size;
 250     }
 251 }
 252
 253 /* Convenient union types for checking the layout of IEEE 754 types -
 254    based on defs in GNU libc <ieee754.h>
 255 */
 256
 257 union stg_ieee754_flt
 258 {
 259    float f;
 260    struct {
 261
 262 #if WORDS_BIGENDIAN
 263         unsigned int negative:1;
 264         unsigned int exponent:8;
 265         unsigned int mantissa:23;
 266 #else
 267         unsigned int mantissa:23;
 268         unsigned int exponent:8;
 269         unsigned int negative:1;
 270 #endif
 271    } ieee;
 272    struct {
 273
 274 #if WORDS_BIGENDIAN
 275         unsigned int negative:1;
 276         unsigned int exponent:8;
 277         unsigned int quiet_nan:1;
 278         unsigned int mantissa:22;
 279 #else
 280         unsigned int mantissa:22;
 281         unsigned int quiet_nan:1;
 282         unsigned int exponent:8;
 283         unsigned int negative:1;
 284 #endif
 285    } ieee_nan;
 286 };
 287
 288 /*
 289
 290  To recap, here's the representation of a double precision
 291  IEEE floating point number:
 292
 293  sign         63           sign bit (0==positive, 1==negative)
 294  exponent     62-52        exponent (biased by 1023)
 295  fraction     51-0         fraction (bits to right of binary point)
 296 */
 297
 298 union stg_ieee754_dbl
 299 {
 300    double d;
 301    struct {
 302
 303 #if WORDS_BIGENDIAN
 304         unsigned int negative:1;
 305         unsigned int exponent:11;
 306         unsigned int mantissa0:20;
 307         unsigned int mantissa1:32;
 308 #else
 309         unsigned int mantissa1:32;
 310         unsigned int mantissa0:20;
 311         unsigned int exponent:11;
 312         unsigned int negative:1;
 313 #endif
 314    } ieee;
 315     /* This format makes it easier to see if a NaN is a signalling NaN.  */
 316    struct {
 317
 318 #if WORDS_BIGENDIAN
 319         unsigned int negative:1;
 320         unsigned int exponent:11;
 321         unsigned int quiet_nan:1;
 322         unsigned int mantissa0:19;
 323         unsigned int mantissa1:32;
 324 #else
 325         unsigned int mantissa1:32;
 326         unsigned int mantissa0:19;
 327         unsigned int quiet_nan:1;
 328         unsigned int exponent:11;
 329         unsigned int negative:1;
 330 #endif
 331    } ieee_nan;
 332 };
 333
 334 /*
 335  * Predicates for testing for extreme IEEE fp values. Used
 336  * by the bytecode evaluator and the Prelude.
 337  *
 338  */
 339
 340 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
 341 #ifdef IEEE_FLOATING_POINT
 342
 343 StgInt
 344 isDoubleNaN(StgDouble d)
 345 {
 346   union stg_ieee754_dbl u;
 347
 348   u.d = d;
 349
 350   return (
 351     u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
 352     (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
 353         /* and the mantissa non-zero? */
 354     );
 355 }
 356
 357 StgInt
 358 isDoubleInfinite(StgDouble d)
 359 {
 360     union stg_ieee754_dbl u;
 361
 362     u.d = d;
 363
 364     /* Inf iff exponent is all ones, mantissa all zeros */
 365     return (
 366         u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
 367         u.ieee.mantissa0 == 0                   &&
 368         u.ieee.mantissa1 == 0
 369       );
 370 }
 371
 372 StgInt
 373 isDoubleDenormalized(StgDouble d)
 374 {
 375     union stg_ieee754_dbl u;
 376
 377     u.d = d;
 378
 379     /* A (single/double/quad) precision floating point number
 380        is denormalised iff:
 381         - exponent is zero
 382         - mantissa is non-zero.
 383         - (don't care about setting of sign bit.)
 384
 385     */
 386     return (
 387         u.ieee.exponent  == 0 &&
 388         (u.ieee.mantissa0 != 0 ||
 389          u.ieee.mantissa1 != 0)
 390       );
 391
 392 }
 393
 394 StgInt
 395 isDoubleNegativeZero(StgDouble d)
 396 {
 397     union stg_ieee754_dbl u;
 398
 399     u.d = d;
 400     /* sign (bit 63) set (only) => negative zero */
 401
 402     return (
 403         u.ieee.negative  == 1 &&
 404         u.ieee.exponent  == 0 &&
 405         u.ieee.mantissa0 == 0 &&
 406         u.ieee.mantissa1 == 0);
 407 }
 408
 409 /* Same tests, this time for StgFloats. */
 410
 411 /*
 412  To recap, here's the representation of a single precision
 413  IEEE floating point number:
 414
 415  sign         31           sign bit (0 == positive, 1 == negative)
 416  exponent     30-23        exponent (biased by 127)
 417  fraction     22-0         fraction (bits to right of binary point)
 418 */
 419
 420
 421 StgInt
 422 isFloatNaN(StgFloat f)
 423 {
 424     union stg_ieee754_flt u;
 425     u.f = f;
 426
 427    /* Floating point NaN iff exponent is all ones, mantissa is
 428       non-zero (but see below.) */
 429    return (
 430         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 431         u.ieee.mantissa != 0);
 432 }
 433
 434 StgInt
 435 isFloatInfinite(StgFloat f)
 436 {
 437     union stg_ieee754_flt u;
 438     u.f = f;
 439
 440     /* A float is Inf iff exponent is max (all ones),
 441        and mantissa is min(all zeros.) */
 442     return (
 443         u.ieee.exponent == 255 /* 2^8 - 1 */ &&
 444         u.ieee.mantissa == 0);
 445 }
 446
 447 StgInt
 448 isFloatDenormalized(StgFloat f)
 449 {
 450     union stg_ieee754_flt u;
 451     u.f = f;
 452
 453     /* A (single/double/quad) precision floating point number
 454        is denormalised iff:
 455         - exponent is zero
 456         - mantissa is non-zero.
 457         - (don't care about setting of sign bit.)
 458
 459     */
 460     return (
 461         u.ieee.exponent == 0 &&
 462         u.ieee.mantissa != 0);
 463 }
 464
 465 StgInt
 466 isFloatNegativeZero(StgFloat f)
 467 {
 468     union stg_ieee754_flt u;
 469     u.f = f;
 470
 471     /* sign (bit 31) set (only) => negative zero */
 472     return (
 473         u.ieee.negative      &&
 474         u.ieee.exponent == 0 &&
 475         u.ieee.mantissa == 0);
 476 }
 477
 478 #else /* ! IEEE_FLOATING_POINT */
 479
 480 /* Dummy definitions of predicates - they all return false */
 481 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
 482 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
 483 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
 484 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
 485 StgInt isFloatNaN(f) StgFloat f; { return 0; }
 486 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
 487 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
 488 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
 489
 490 #endif /* ! IEEE_FLOATING_POINT */