[project @ 2000-08-18 15:44:28 by simonmar]

[ghc-hetmet.git] / ghc / rts / gmp / mpn / generic / sqrtrem.c
diff --git a/ghc/rts/gmp/mpn/generic/sqrtrem.c b/ghc/rts/gmp/mpn/generic/sqrtrem.c

index 539480d..ee3b514 100644 (file)
--- a/ghc/rts/gmp/mpn/generic/sqrtrem.c
+++ b/ghc/rts/gmp/mpn/generic/sqrtrem.c
@@ -12,21 +12,22 @@
     the function is 0 if OP is a perfect square, and *any* non-zero number
     otherwise.
  
-Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
  The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Library General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at your
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
  option) any later version.
  
  The GNU MP Library is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  License for more details.
  
-You should have received a copy of the GNU Library General Public License
+You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  MA 02111-1307, USA. */
@@ -35,6 +36,7 @@ MA 02111-1307, USA. */
     doesn't help to use CHAR_BIT from limits.h, as the real problem is
     the static arrays.  */
  
+#include <stdio.h> /* for NULL */
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
@@ -59,7 +61,7 @@ MA 02111-1307, USA. */
  /* Define this macro for IEEE P854 machines with a fast sqrt instruction.  */
  #if defined __GNUC__ && ! defined __SOFT_FLOAT
  
-#if defined __sparc__
+#if defined (__sparc__) && BITS_PER_MP_LIMB == 32
  #define SQRT(a) \
    ({                                                                   \
      double __sqrt_res;                                                 \
@@ -68,7 +70,7 @@ MA 02111-1307, USA. */
    })
  #endif
  
-#if defined __HAVE_68881__
+#if defined (__HAVE_68881__)
  #define SQRT(a) \
    ({                                                                   \
      double __sqrt_res;                                                 \
@@ -77,7 +79,7 @@ MA 02111-1307, USA. */
    })
  #endif
  
-#if defined __hppa
+#if defined (__hppa) && BITS_PER_MP_LIMB == 32
  #define SQRT(a) \
    ({                                                                   \
      double __sqrt_res;                                                 \
@@ -86,7 +88,7 @@ MA 02111-1307, USA. */
    })
  #endif
  
-#if defined _ARCH_PWR2
+#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32
  #define SQRT(a) \
    ({                                                                   \
      double __sqrt_res;                                                 \
@@ -95,6 +97,17 @@ MA 02111-1307, USA. */
    })
  #endif
  
+#if 0
+#if defined (__i386__) || defined (__i486__)
+#define SQRT(a) \
+  ({                                                                   \
+    double __sqrt_res;                                                 \
+    asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a));                       \
+    __sqrt_res;                                                                \
+  })
+#endif
+#endif
+
  #endif
  
  #ifndef SQRT
@@ -112,7 +125,7 @@ MA 02111-1307, USA. */
     square root of numbers with the same initial digits and an even
     difference in the total number of digits.  Consider the square root
     of 1, 10, 100, 1000, ...)  */
-static unsigned char even_approx_tab[256] =
+static const unsigned char even_approx_tab[256] =
  {
    0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
    0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
@@ -150,7 +163,7 @@ static unsigned char even_approx_tab[256] =
  
  /* Table to be used for operands with an odd total number of bits.
     (Further comments before previous table.)  */
-static unsigned char odd_approx_tab[256] =
+static const unsigned char odd_approx_tab[256] =
  {
    0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
    0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
@@ -272,9 +285,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
  /* Is there a fast sqrt instruction defined for this machine?  */
  #ifdef SQRT
    {
-    initial_approx = SQRT (t_high0 * 2.0
-                          * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))
-                          + t_high1);
+    initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1);
      /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
         become incorrect due to overflow in the conversion from double to
         mp_limb_t above.  It will typically be zero in that case, but might be
@@ -293,14 +304,14 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
  
    if ((cnt & 1) == 0)
      {
-      /* The most sign bit of t_high0 is set.  */
+      /* The most significant bit of t_high0 is set.  */
        initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
        initial_approx &= 0xff;
        initial_approx = even_approx_tab[initial_approx];
      }
    else
      {
-      /* The most significant bit of T_HIGH0 is unset,
+      /* The most significant bit of t_high0 is unset,
          the second most significant is set.  */
        initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
        initial_approx &= 0xff;
@@ -310,7 +321,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
    initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
  
    /* Perform small precision Newtonian iterations to get a full word
-     approximation.  For small operands, these iteration will make the
+     approximation.  For small operands, these iterations will do the
       entire job.  */
    if (t_high0 == ~(mp_limb_t)0)
      initial_approx = t_high0;
@@ -328,7 +339,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
  
        /* Now get a full word by one (or for > 36 bit machines) several
          iterations.  */
-      for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1)
+      for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1)
         {
           mp_limb_t ignored_remainder;
  
@@ -343,7 +354,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
    rp[0] = initial_approx;
    rsize = 1;
  
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
           printf ("\n\nT = ");
           mpn_dump (tp, tsize);
  #endif
@@ -373,7 +384,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
          time is spent here.  */
  
        /* It is possible to do a great optimization here.  The successive
-        divisors in the mpn_divmod call below has more and more leading
+        divisors in the mpn_divmod call below have more and more leading
          words equal to its predecessor.  Therefore the beginning of
          each division will repeat the same work as did the last
          division.  If we could guarantee that the leading words of two
@@ -392,7 +403,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
        while (--i >= 0)
         {
           mp_limb_t cy;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
           mp_limb_t old_least_sign_r = rp[0];
           mp_size_t old_rsize = rsize;
  
@@ -408,7 +419,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
           cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
           xsize = tsize - rsize;
  
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
           printf ("X =%d ", cy);
           mpn_dump (xp, xsize);
  #endif
@@ -435,7 +446,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
           mpn_rshift (rp, xp, xsize, 1);
           rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
           rsize = xsize;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
           if (old_least_sign_r != rp[rsize - old_rsize])
             printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
                     i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
@@ -444,7 +455,7 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
         }
      }
  
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
    printf ("(final) R = ");
    mpn_dump (rp, rsize);
  #endif
@@ -470,12 +481,12 @@ mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
        /* These operations can't overflow.  */
        cy_limb  = mpn_sub_n (tp, tp, rp, rsize);
        cy_limb += mpn_sub_n (tp, tp, rp, rsize);
-      mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb);
-      mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1);
+      mpn_decr_u (tp + rsize, cy_limb);
+      mpn_incr_u (tp, (mp_limb_t) 1);
  
-      mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1);
+      mpn_decr_u (rp, (mp_limb_t) 1);
  
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
        printf ("(adjusted) R = ");
        mpn_dump (rp, rsize);
  #endif