rts/gmp/mpn/generic/tdiv_qr.c

   1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
   2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
   3    qxn is non-zero, generate that many fraction limbs and append them after the
   4    other quotient limbs, and update the remainder accordningly.  The input
   5    operands are unaffected.
   6
   7    Preconditions:
   8    1. The most significant limb of of the divisor must be non-zero.
   9    2. No argument overlap is permitted.  (??? relax this ???)
  10    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
  11
  12    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
  13    complexity of multiplication.
  14
  15 Copyright (C) 1997, 2000 Free Software Foundation, Inc.
  16
  17 This file is part of the GNU MP Library.
  18
  19 The GNU MP Library is free software; you can redistribute it and/or modify
  20 it under the terms of the GNU Lesser General Public License as published by
  21 the Free Software Foundation; either version 2.1 of the License, or (at your
  22 option) any later version.
  23
  24 The GNU MP Library is distributed in the hope that it will be useful, but
  25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  26 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  27 License for more details.
  28
  29 You should have received a copy of the GNU Lesser General Public License
  30 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  31 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  32 MA 02111-1307, USA. */
  33
  34 #include "gmp.h"
  35 #include "gmp-impl.h"
  36 #include "longlong.h"
  37
  38 #ifndef BZ_THRESHOLD
  39 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
  40 #endif
  41
  42 /* Extract the middle limb from ((h,,l) << cnt) */
  43 #define SHL(h,l,cnt) \
  44   ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
  45
  46 void
  47 #if __STDC__
  48 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
  49              mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
  50 #else
  51 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
  52      mp_ptr qp;
  53      mp_ptr rp;
  54      mp_size_t qxn;
  55      mp_srcptr np;
  56      mp_size_t nn;
  57      mp_srcptr dp;
  58      mp_size_t dn;
  59 #endif
  60 {
  61   /* FIXME:
  62      1. qxn
  63      2. pass allocated storage in additional parameter?
  64   */
  65   if (qxn != 0)
  66     abort ();
  67
  68   switch (dn)
  69     {
  70     case 0:
  71       DIVIDE_BY_ZERO;
  72
  73     case 1:
  74       {
  75         rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
  76         return;
  77       }
  78
  79     case 2:
  80       {
  81         int cnt;
  82         mp_ptr n2p, d2p;
  83         mp_limb_t qhl, cy;
  84         TMP_DECL (marker);
  85         TMP_MARK (marker);
  86         count_leading_zeros (cnt, dp[dn - 1]);
  87         if (cnt != 0)
  88           {
  89             d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
  90             mpn_lshift (d2p, dp, dn, cnt);
  91             n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
  92             cy = mpn_lshift (n2p, np, nn, cnt);
  93             n2p[nn] = cy;
  94             qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
  95             if (cy == 0)
  96               qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
  97           }
  98         else
  99           {
 100             d2p = (mp_ptr) dp;
 101             n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
 102             MPN_COPY (n2p, np, nn);
 103             qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
 104             qp[nn - 2] = qhl;   /* always store nn-dn+1 quotient limbs */
 105           }
 106
 107         if (cnt != 0)
 108           mpn_rshift (rp, n2p, dn, cnt);
 109         else
 110           MPN_COPY (rp, n2p, dn);
 111         TMP_FREE (marker);
 112         return;
 113       }
 114
 115     default:
 116       {
 117         int adjust;
 118         TMP_DECL (marker);
 119         TMP_MARK (marker);
 120         adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
 121         if (nn + adjust >= 2 * dn)
 122           {
 123             mp_ptr n2p, d2p;
 124             mp_limb_t cy;
 125             int cnt;
 126             count_leading_zeros (cnt, dp[dn - 1]);
 127
 128             qp[nn - dn] = 0;                    /* zero high quotient limb */
 129             if (cnt != 0)                       /* normalize divisor if needed */
 130               {
 131                 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 132                 mpn_lshift (d2p, dp, dn, cnt);
 133                 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 134                 cy = mpn_lshift (n2p, np, nn, cnt);
 135                 n2p[nn] = cy;
 136                 nn += adjust;
 137               }
 138             else
 139               {
 140                 d2p = (mp_ptr) dp;
 141                 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 142                 MPN_COPY (n2p, np, nn);
 143                 n2p[nn] = 0;
 144                 nn += adjust;
 145               }
 146
 147             if (dn == 2)
 148               mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
 149             else if (dn < BZ_THRESHOLD)
 150               mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
 151             else
 152               {
 153                 /* Perform 2*dn / dn limb divisions as long as the limbs
 154                    in np last.  */
 155                 mp_ptr q2p = qp + nn - 2 * dn;
 156                 n2p += nn - 2 * dn;
 157                 mpn_bz_divrem_n (q2p, n2p, d2p, dn);
 158                 nn -= dn;
 159                 while (nn >= 2 * dn)
 160                   {
 161                     mp_limb_t c;
 162                     q2p -= dn;  n2p -= dn;
 163                     c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
 164                     ASSERT_ALWAYS (c == 0);
 165                     nn -= dn;
 166                   }
 167
 168                 if (nn != dn)
 169                   {
 170                     n2p -= nn - dn;
 171                     /* In theory, we could fall out to the cute code below
 172                        since we now have exactly the situation that code
 173                        is designed to handle.  We botch this badly and call
 174                        the basic mpn_sb_divrem_mn!  */
 175                     if (dn == 2)
 176                       mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
 177                     else
 178                       mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
 179                   }
 180               }
 181
 182
 183             if (cnt != 0)
 184               mpn_rshift (rp, n2p, dn, cnt);
 185             else
 186               MPN_COPY (rp, n2p, dn);
 187             TMP_FREE (marker);
 188             return;
 189           }
 190
 191         /* When we come here, the numerator/partial remainder is less
 192            than twice the size of the denominator.  */
 193
 194           {
 195             /* Problem:
 196
 197                Divide a numerator N with nn limbs by a denominator D with dn
 198                limbs forming a quotient of nn-dn+1 limbs.  When qn is small
 199                compared to dn, conventional division algorithms perform poorly.
 200                We want an algorithm that has an expected running time that is
 201                dependent only on qn.  It is assumed that the most significant
 202                limb of the numerator is smaller than the most significant limb
 203                of the denominator.
 204
 205                Algorithm (very informally stated):
 206
 207                1) Divide the 2 x qn most significant limbs from the numerator
 208                   by the qn most significant limbs from the denominator.  Call
 209                   the result qest.  This is either the correct quotient, but
 210                   might be 1 or 2 too large.  Compute the remainder from the
 211                   division.  (This step is implemented by a mpn_divrem call.)
 212
 213                2) Is the most significant limb from the remainder < p, where p
 214                   is the product of the most significant limb from the quotient
 215                   and the next(d).  (Next(d) denotes the next ignored limb from
 216                   the denominator.)  If it is, decrement qest, and adjust the
 217                   remainder accordingly.
 218
 219                3) Is the remainder >= qest?  If it is, qest is the desired
 220                   quotient.  The algorithm terminates.
 221
 222                4) Subtract qest x next(d) from the remainder.  If there is
 223                   borrow out, decrement qest, and adjust the remainder
 224                   accordingly.
 225
 226                5) Skip one word from the denominator (i.e., let next(d) denote
 227                   the next less significant limb.  */
 228
 229             mp_size_t qn;
 230             mp_ptr n2p, d2p;
 231             mp_ptr tp;
 232             mp_limb_t cy;
 233             mp_size_t in, rn;
 234             mp_limb_t quotient_too_large;
 235             int cnt;
 236
 237             qn = nn - dn;
 238             qp[qn] = 0;                         /* zero high quotient limb */
 239             qn += adjust;                       /* qn cannot become bigger */
 240
 241             if (qn == 0)
 242               {
 243                 MPN_COPY (rp, np, dn);
 244                 TMP_FREE (marker);
 245                 return;
 246               }
 247
 248             in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
 249             /* Normalize denominator by shifting it to the left such that its
 250                most significant bit is set.  Then shift the numerator the same
 251                amount, to mathematically preserve quotient.  */
 252             count_leading_zeros (cnt, dp[dn - 1]);
 253             if (cnt != 0)
 254               {
 255                 d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
 256
 257                 mpn_lshift (d2p, dp + in, qn, cnt);
 258                 d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
 259
 260                 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 261                 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
 262                 if (adjust)
 263                   {
 264                     n2p[2 * qn] = cy;
 265                     n2p++;
 266                   }
 267                 else
 268                   {
 269                     n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
 270                   }
 271               }
 272             else
 273               {
 274                 d2p = (mp_ptr) dp + in;
 275
 276                 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 277                 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
 278                 if (adjust)
 279                   {
 280                     n2p[2 * qn] = 0;
 281                     n2p++;
 282                   }
 283               }
 284
 285             /* Get an approximate quotient using the extracted operands.  */
 286             if (qn == 1)
 287               {
 288                 mp_limb_t q0, r0;
 289                 mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
 290                 /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
 291                    temps here.  This doesn't hurt code quality on any machines
 292                    so we do it unconditionally.  */
 293                 gcc272bug_n1 = n2p[1];
 294                 gcc272bug_n0 = n2p[0];
 295                 gcc272bug_d0 = d2p[0];
 296                 udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
 297                 n2p[0] = r0;
 298                 qp[0] = q0;
 299               }
 300             else if (qn == 2)
 301               mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
 302             else if (qn < BZ_THRESHOLD)
 303               mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
 304             else
 305               mpn_bz_divrem_n (qp, n2p, d2p, qn);
 306
 307             rn = qn;
 308             /* Multiply the first ignored divisor limb by the most significant
 309                quotient limb.  If that product is > the partial remainder's
 310                most significant limb, we know the quotient is too large.  This
 311                test quickly catches most cases where the quotient is too large;
 312                it catches all cases where the quotient is 2 too large.  */
 313             {
 314               mp_limb_t dl, x;
 315               mp_limb_t h, l;
 316
 317               if (in - 2 < 0)
 318                 dl = 0;
 319               else
 320                 dl = dp[in - 2];
 321
 322               x = SHL (dp[in - 1], dl, cnt);
 323               umul_ppmm (h, l, x, qp[qn - 1]);
 324
 325               if (n2p[qn - 1] < h)
 326                 {
 327                   mp_limb_t cy;
 328
 329                   mpn_decr_u (qp, (mp_limb_t) 1);
 330                   cy = mpn_add_n (n2p, n2p, d2p, qn);
 331                   if (cy)
 332                     {
 333                       /* The partial remainder is safely large.  */
 334                       n2p[qn] = cy;
 335                       ++rn;
 336                     }
 337                 }
 338             }
 339
 340             quotient_too_large = 0;
 341             if (cnt != 0)
 342               {
 343                 mp_limb_t cy1, cy2;
 344
 345                 /* Append partially used numerator limb to partial remainder.  */
 346                 cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
 347                 n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
 348
 349                 /* Update partial remainder with partially used divisor limb.  */
 350                 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
 351                 if (qn != rn)
 352                   {
 353                     if (n2p[qn] < cy2)
 354                       abort ();
 355                     n2p[qn] -= cy2;
 356                   }
 357                 else
 358                   {
 359                     n2p[qn] = cy1 - cy2;
 360
 361                     quotient_too_large = (cy1 < cy2);
 362                     ++rn;
 363                   }
 364                 --in;
 365               }
 366             /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
 367
 368             tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 369
 370             if (in < qn)
 371               {
 372                 if (in == 0)
 373                   {
 374                     MPN_COPY (rp, n2p, rn);
 375                     if (rn != dn)
 376                       abort ();
 377                     goto foo;
 378                   }
 379                 mpn_mul (tp, qp, qn, dp, in);
 380               }
 381             else
 382               mpn_mul (tp, dp, in, qp, qn);
 383
 384             cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
 385             MPN_COPY (rp + in, n2p, dn - in);
 386             quotient_too_large |= cy;
 387             cy = mpn_sub_n (rp, np, tp, in);
 388             cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
 389             quotient_too_large |= cy;
 390           foo:
 391             if (quotient_too_large)
 392               {
 393                 mpn_decr_u (qp, (mp_limb_t) 1);
 394                 mpn_add_n (rp, rp, dp, dn);
 395               }
 396           }
 397         TMP_FREE (marker);
 398         return;
 399       }
 400     }
 401 }