the function is 0 if OP is a perfect square, and *any* non-zero number
otherwise.
-Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Library General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at your
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
-You should have received a copy of the GNU Library General Public License
+You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
doesn't help to use CHAR_BIT from limits.h, as the real problem is
the static arrays. */
+#include <stdio.h> /* for NULL */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */
#if defined __GNUC__ && ! defined __SOFT_FLOAT
-#if defined __sparc__
+#if defined (__sparc__) && BITS_PER_MP_LIMB == 32
#define SQRT(a) \
({ \
double __sqrt_res; \
})
#endif
-#if defined __HAVE_68881__
+#if defined (__HAVE_68881__)
#define SQRT(a) \
({ \
double __sqrt_res; \
})
#endif
-#if defined __hppa
+#if defined (__hppa) && BITS_PER_MP_LIMB == 32
#define SQRT(a) \
({ \
double __sqrt_res; \
})
#endif
-#if defined _ARCH_PWR2
+#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32
#define SQRT(a) \
({ \
double __sqrt_res; \
})
#endif
+#if 0
+#if defined (__i386__) || defined (__i486__)
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a)); \
+ __sqrt_res; \
+ })
+#endif
+#endif
+
#endif
#ifndef SQRT
square root of numbers with the same initial digits and an even
difference in the total number of digits. Consider the square root
of 1, 10, 100, 1000, ...) */
-static unsigned char even_approx_tab[256] =
+static const unsigned char even_approx_tab[256] =
{
0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
/* Table to be used for operands with an odd total number of bits.
(Further comments before previous table.) */
-static unsigned char odd_approx_tab[256] =
+static const unsigned char odd_approx_tab[256] =
{
0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
/* Is there a fast sqrt instruction defined for this machine? */
#ifdef SQRT
{
- initial_approx = SQRT (t_high0 * 2.0
- * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))
- + t_high1);
+ initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1);
/* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
become incorrect due to overflow in the conversion from double to
mp_limb_t above. It will typically be zero in that case, but might be
if ((cnt & 1) == 0)
{
- /* The most sign bit of t_high0 is set. */
+ /* The most significant bit of t_high0 is set. */
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
initial_approx &= 0xff;
initial_approx = even_approx_tab[initial_approx];
}
else
{
- /* The most significant bit of T_HIGH0 is unset,
+ /* The most significant bit of t_high0 is unset,
the second most significant is set. */
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
initial_approx &= 0xff;
initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
/* Perform small precision Newtonian iterations to get a full word
- approximation. For small operands, these iteration will make the
+ approximation. For small operands, these iterations will do the
entire job. */
if (t_high0 == ~(mp_limb_t)0)
initial_approx = t_high0;
/* Now get a full word by one (or for > 36 bit machines) several
iterations. */
- for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1)
+ for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1)
{
mp_limb_t ignored_remainder;
rp[0] = initial_approx;
rsize = 1;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
printf ("\n\nT = ");
mpn_dump (tp, tsize);
#endif
time is spent here. */
/* It is possible to do a great optimization here. The successive
- divisors in the mpn_divmod call below has more and more leading
+ divisors in the mpn_divmod call below have more and more leading
words equal to its predecessor. Therefore the beginning of
each division will repeat the same work as did the last
division. If we could guarantee that the leading words of two
while (--i >= 0)
{
mp_limb_t cy;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
mp_limb_t old_least_sign_r = rp[0];
mp_size_t old_rsize = rsize;
cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
xsize = tsize - rsize;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
printf ("X =%d ", cy);
mpn_dump (xp, xsize);
#endif
mpn_rshift (rp, xp, xsize, 1);
rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
rsize = xsize;
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
if (old_least_sign_r != rp[rsize - old_rsize])
printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
}
}
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
printf ("(final) R = ");
mpn_dump (rp, rsize);
#endif
/* These operations can't overflow. */
cy_limb = mpn_sub_n (tp, tp, rp, rsize);
cy_limb += mpn_sub_n (tp, tp, rp, rsize);
- mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb);
- mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1);
+ mpn_decr_u (tp + rsize, cy_limb);
+ mpn_incr_u (tp, (mp_limb_t) 1);
- mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1);
+ mpn_decr_u (rp, (mp_limb_t) 1);
-#ifdef DEBUG
+#ifdef SQRT_DEBUG
printf ("(adjusted) R = ");
mpn_dump (rp, rsize);
#endif