ghc/rts/gmp/mpn/generic/sqrtrem.c

   1 /* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
   2
   3    Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
   4    Write the remainder at REM_PTR, if REM_PTR != NULL.
   5    Return the size of the remainder.
   6    (The size of the root is always half of the size of the operand.)
   7
   8    OP_PTR and ROOT_PTR may not point to the same object.
   9    OP_PTR and REM_PTR may point to the same object.
  10
  11    If REM_PTR is NULL, only the root is computed and the return value of
  12    the function is 0 if OP is a perfect square, and *any* non-zero number
  13    otherwise.
  14
  15 Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
  16
  17 This file is part of the GNU MP Library.
  18
  19 The GNU MP Library is free software; you can redistribute it and/or modify
  20 it under the terms of the GNU Library General Public License as published by
  21 the Free Software Foundation; either version 2 of the License, or (at your
  22 option) any later version.
  23
  24 The GNU MP Library is distributed in the hope that it will be useful, but
  25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  26 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  27 License for more details.
  28
  29 You should have received a copy of the GNU Library General Public License
  30 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  31 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  32 MA 02111-1307, USA. */
  33
  34 /* This code is just correct if "unsigned char" has at least 8 bits.  It
  35    doesn't help to use CHAR_BIT from limits.h, as the real problem is
  36    the static arrays.  */
  37
  38 #include "gmp.h"
  39 #include "gmp-impl.h"
  40 #include "longlong.h"
  41
  42 /* Square root algorithm:
  43
  44    1. Shift OP (the input) to the left an even number of bits s.t. there
  45       are an even number of words and either (or both) of the most
  46       significant bits are set.  This way, sqrt(OP) has exactly half as
  47       many words as OP, and has its most significant bit set.
  48
  49    2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
  50       This approximation is used for the first single-precision
  51       iterations of Newton's method, yielding a full-word approximation
  52       to sqrt(OP).
  53
  54    3. Perform multiple-precision Newton iteration until we have the
  55       exact result.  Only about half of the input operand is used in
  56       this calculation, as the square root is perfectly determinable
  57       from just the higher half of a number.  */
  58 \f
  59 /* Define this macro for IEEE P854 machines with a fast sqrt instruction.  */
  60 #if defined __GNUC__ && ! defined __SOFT_FLOAT
  61
  62 #if defined __sparc__
  63 #define SQRT(a) \
  64   ({                                                                    \
  65     double __sqrt_res;                                                  \
  66     asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a));                 \
  67     __sqrt_res;                                                         \
  68   })
  69 #endif
  70
  71 #if defined __HAVE_68881__
  72 #define SQRT(a) \
  73   ({                                                                    \
  74     double __sqrt_res;                                                  \
  75     asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a));                 \
  76     __sqrt_res;                                                         \
  77   })
  78 #endif
  79
  80 #if defined __hppa
  81 #define SQRT(a) \
  82   ({                                                                    \
  83     double __sqrt_res;                                                  \
  84     asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a));            \
  85     __sqrt_res;                                                         \
  86   })
  87 #endif
  88
  89 #if defined _ARCH_PWR2
  90 #define SQRT(a) \
  91   ({                                                                    \
  92     double __sqrt_res;                                                  \
  93     asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a));                  \
  94     __sqrt_res;                                                         \
  95   })
  96 #endif
  97
  98 #endif
  99
 100 #ifndef SQRT
 101
 102 /* Tables for initial approximation of the square root.  These are
 103    indexed with bits 1-8 of the operand for which the square root is
 104    calculated, where bit 0 is the most significant non-zero bit.  I.e.
 105    the most significant one-bit is not used, since that per definition
 106    is one.  Likewise, the tables don't return the highest bit of the
 107    result.  That bit must be inserted by or:ing the returned value with
 108    0x100.  This way, we get a 9-bit approximation from 8-bit tables!  */
 109
 110 /* Table to be used for operands with an even total number of bits.
 111    (Exactly as in the decimal system there are similarities between the
 112    square root of numbers with the same initial digits and an even
 113    difference in the total number of digits.  Consider the square root
 114    of 1, 10, 100, 1000, ...)  */
 115 static unsigned char even_approx_tab[256] =
 116 {
 117   0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
 118   0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
 119   0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
 120   0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
 121   0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
 122   0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
 123   0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
 124   0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
 125   0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
 126   0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
 127   0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
 128   0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
 129   0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
 130   0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
 131   0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
 132   0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
 133   0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
 134   0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
 135   0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
 136   0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
 137   0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
 138   0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
 139   0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
 140   0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
 141   0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
 142   0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
 143   0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
 144   0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
 145   0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
 146   0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
 147   0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
 148   0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
 149 };
 150
 151 /* Table to be used for operands with an odd total number of bits.
 152    (Further comments before previous table.)  */
 153 static unsigned char odd_approx_tab[256] =
 154 {
 155   0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
 156   0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
 157   0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
 158   0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
 159   0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
 160   0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
 161   0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
 162   0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
 163   0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
 164   0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
 165   0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
 166   0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
 167   0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
 168   0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
 169   0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
 170   0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
 171   0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
 172   0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
 173   0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
 174   0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
 175   0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
 176   0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
 177   0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
 178   0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
 179   0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
 180   0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
 181   0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
 182   0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
 183   0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
 184   0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
 185   0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
 186   0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
 187 };
 188 #endif
 189
 190 \f
 191 mp_size_t
 192 #if __STDC__
 193 mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size)
 194 #else
 195 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
 196      mp_ptr root_ptr;
 197      mp_ptr rem_ptr;
 198      mp_srcptr op_ptr;
 199      mp_size_t op_size;
 200 #endif
 201 {
 202   /* R (root result) */
 203   mp_ptr rp;                    /* Pointer to least significant word */
 204   mp_size_t rsize;              /* The size in words */
 205
 206   /* T (OP shifted to the left a.k.a. normalized) */
 207   mp_ptr tp;                    /* Pointer to least significant word */
 208   mp_size_t tsize;              /* The size in words */
 209   mp_ptr t_end_ptr;             /* Pointer right beyond most sign. word */
 210   mp_limb_t t_high0, t_high1;   /* The two most significant words */
 211
 212   /* TT (temporary for numerator/remainder) */
 213   mp_ptr ttp;                   /* Pointer to least significant word */
 214
 215   /* X (temporary for quotient in main loop) */
 216   mp_ptr xp;                    /* Pointer to least significant word */
 217   mp_size_t xsize;              /* The size in words */
 218
 219   unsigned cnt;
 220   mp_limb_t initial_approx;     /* Initially made approximation */
 221   mp_size_t tsizes[BITS_PER_MP_LIMB];   /* Successive calculation precisions */
 222   mp_size_t tmp;
 223   mp_size_t i;
 224
 225   mp_limb_t cy_limb;
 226   TMP_DECL (marker);
 227
 228   /* If OP is zero, both results are zero.  */
 229   if (op_size == 0)
 230     return 0;
 231
 232   count_leading_zeros (cnt, op_ptr[op_size - 1]);
 233   tsize = op_size;
 234   if ((tsize & 1) != 0)
 235     {
 236       cnt += BITS_PER_MP_LIMB;
 237       tsize++;
 238     }
 239
 240   rsize = tsize / 2;
 241   rp = root_ptr;
 242
 243   TMP_MARK (marker);
 244
 245   /* Shift OP an even number of bits into T, such that either the most or
 246      the second most significant bit is set, and such that the number of
 247      words in T becomes even.  This way, the number of words in R=sqrt(OP)
 248      is exactly half as many as in OP, and the most significant bit of R
 249      is set.
 250
 251      Also, the initial approximation is simplified by this up-shifted OP.
 252
 253      Finally, the Newtonian iteration which is the main part of this
 254      program performs division by R.  The fast division routine expects
 255      the divisor to be "normalized" in exactly the sense of having the
 256      most significant bit set.  */
 257
 258   tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
 259
 260   if ((cnt & ~1) % BITS_PER_MP_LIMB != 0)
 261     t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size,
 262                           (cnt & ~1) % BITS_PER_MP_LIMB);
 263   else
 264     MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size);
 265
 266   if (cnt >= BITS_PER_MP_LIMB)
 267     tp[0] = 0;
 268
 269   t_high0 = tp[tsize - 1];
 270   t_high1 = tp[tsize - 2];      /* Never stray.  TSIZE is >= 2.  */
 271
 272 /* Is there a fast sqrt instruction defined for this machine?  */
 273 #ifdef SQRT
 274   {
 275     initial_approx = SQRT (t_high0 * 2.0
 276                            * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))
 277                            + t_high1);
 278     /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
 279        become incorrect due to overflow in the conversion from double to
 280        mp_limb_t above.  It will typically be zero in that case, but might be
 281        a small number on some machines.  The most significant bit of
 282        INITIAL_APPROX should be set, so that bit is a good overflow
 283        indication.  */
 284     if ((mp_limb_signed_t) initial_approx >= 0)
 285       initial_approx = ~(mp_limb_t)0;
 286   }
 287 #else
 288   /* Get a 9 bit approximation from the tables.  The tables expect to
 289      be indexed with the 8 high bits right below the highest bit.
 290      Also, the highest result bit is not returned by the tables, and
 291      must be or:ed into the result.  The scheme gives 9 bits of start
 292      approximation with just 256-entry 8 bit tables.  */
 293
 294   if ((cnt & 1) == 0)
 295     {
 296       /* The most sign bit of t_high0 is set.  */
 297       initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
 298       initial_approx &= 0xff;
 299       initial_approx = even_approx_tab[initial_approx];
 300     }
 301   else
 302     {
 303       /* The most significant bit of T_HIGH0 is unset,
 304          the second most significant is set.  */
 305       initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
 306       initial_approx &= 0xff;
 307       initial_approx = odd_approx_tab[initial_approx];
 308     }
 309   initial_approx |= 0x100;
 310   initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
 311
 312   /* Perform small precision Newtonian iterations to get a full word
 313      approximation.  For small operands, these iteration will make the
 314      entire job.  */
 315   if (t_high0 == ~(mp_limb_t)0)
 316     initial_approx = t_high0;
 317   else
 318     {
 319       mp_limb_t quot;
 320
 321       if (t_high0 >= initial_approx)
 322         initial_approx = t_high0 + 1;
 323
 324       /* First get about 18 bits with pure C arithmetics.  */
 325       quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2;
 326       initial_approx = (initial_approx + quot) / 2;
 327       initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
 328
 329       /* Now get a full word by one (or for > 36 bit machines) several
 330          iterations.  */
 331       for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1)
 332         {
 333           mp_limb_t ignored_remainder;
 334
 335           udiv_qrnnd (quot, ignored_remainder,
 336                       t_high0, t_high1, initial_approx);
 337           initial_approx = (initial_approx + quot) / 2;
 338           initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
 339         }
 340     }
 341 #endif
 342
 343   rp[0] = initial_approx;
 344   rsize = 1;
 345
 346 #ifdef DEBUG
 347           printf ("\n\nT = ");
 348           mpn_dump (tp, tsize);
 349 #endif
 350
 351   if (tsize > 2)
 352     {
 353       /* Determine the successive precisions to use in the iteration.  We
 354          minimize the precisions, beginning with the highest (i.e. last
 355          iteration) to the lowest (i.e. first iteration).  */
 356
 357       xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
 358       ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
 359
 360       t_end_ptr = tp + tsize;
 361
 362       tmp = tsize / 2;
 363       for (i = 0;; i++)
 364         {
 365           tsize = (tmp + 1) / 2;
 366           if (tmp == tsize)
 367             break;
 368           tsizes[i] = tsize + tmp;
 369           tmp = tsize;
 370         }
 371
 372       /* Main Newton iteration loop.  For big arguments, most of the
 373          time is spent here.  */
 374
 375       /* It is possible to do a great optimization here.  The successive
 376          divisors in the mpn_divmod call below has more and more leading
 377          words equal to its predecessor.  Therefore the beginning of
 378          each division will repeat the same work as did the last
 379          division.  If we could guarantee that the leading words of two
 380          consecutive divisors are the same (i.e. in this case, a later
 381          divisor has just more digits at the end) it would be a simple
 382          matter of just using the old remainder of the last division in
 383          a subsequent division, to take care of this optimization.  This
 384          idea would surely make a difference even for small arguments.  */
 385
 386       /* Loop invariants:
 387
 388          R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1.
 389          X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X.
 390          R <= shiftdown_to_same_size(X).  */
 391
 392       while (--i >= 0)
 393         {
 394           mp_limb_t cy;
 395 #ifdef DEBUG
 396           mp_limb_t old_least_sign_r = rp[0];
 397           mp_size_t old_rsize = rsize;
 398
 399           printf ("R = ");
 400           mpn_dump (rp, rsize);
 401 #endif
 402           tsize = tsizes[i];
 403
 404           /* Need to copy the numerator into temporary space, as
 405              mpn_divmod overwrites its numerator argument with the
 406              remainder (which we currently ignore).  */
 407           MPN_COPY (ttp, t_end_ptr - tsize, tsize);
 408           cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
 409           xsize = tsize - rsize;
 410
 411 #ifdef DEBUG
 412           printf ("X =%d ", cy);
 413           mpn_dump (xp, xsize);
 414 #endif
 415
 416           /* Add X and R with the most significant limbs aligned,
 417              temporarily ignoring at least one limb at the low end of X.  */
 418           tmp = xsize - rsize;
 419           cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize);
 420
 421           /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get
 422              intermediate roots that'd need an extra bit.  We don't want to
 423              handle that since it would make the subsequent divisor
 424              non-normalized, so round such roots down to be only ones in the
 425              current precision.  */
 426           if (cy == 2)
 427             {
 428               mp_size_t j;
 429               for (j = xsize; j >= 0; j--)
 430                 xp[j] = ~(mp_limb_t)0;
 431             }
 432
 433           /* Divide X by 2 and put the result in R.  This is the new
 434              approximation.  Shift in the carry from the addition.  */
 435           mpn_rshift (rp, xp, xsize, 1);
 436           rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
 437           rsize = xsize;
 438 #ifdef DEBUG
 439           if (old_least_sign_r != rp[rsize - old_rsize])
 440             printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
 441                     i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
 442                     2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]);
 443 #endif
 444         }
 445     }
 446
 447 #ifdef DEBUG
 448   printf ("(final) R = ");
 449   mpn_dump (rp, rsize);
 450 #endif
 451
 452   /* We computed the square root of OP * 2**(2*floor(cnt/2)).
 453      This has resulted in R being 2**floor(cnt/2) to large.
 454      Shift it down here to fix that.  */
 455   if (cnt / 2 != 0)
 456     {
 457       mpn_rshift (rp, rp, rsize, cnt/2);
 458       rsize -= rp[rsize - 1] == 0;
 459     }
 460
 461   /* Calculate the remainder.  */
 462   mpn_mul_n (tp, rp, rp, rsize);
 463   tsize = rsize + rsize;
 464   tsize -= tp[tsize - 1] == 0;
 465   if (op_size < tsize
 466       || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0))
 467     {
 468       /* R is too large.  Decrement it.  */
 469
 470       /* These operations can't overflow.  */
 471       cy_limb  = mpn_sub_n (tp, tp, rp, rsize);
 472       cy_limb += mpn_sub_n (tp, tp, rp, rsize);
 473       mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb);
 474       mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1);
 475
 476       mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1);
 477
 478 #ifdef DEBUG
 479       printf ("(adjusted) R = ");
 480       mpn_dump (rp, rsize);
 481 #endif
 482     }
 483
 484   if (rem_ptr != NULL)
 485     {
 486       cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize);
 487       MPN_NORMALIZE (rem_ptr, op_size);
 488       TMP_FREE (marker);
 489       return op_size;
 490     }
 491   else
 492     {
 493       int res;
 494       res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size);
 495       TMP_FREE (marker);
 496       return res;
 497     }
 498 }