1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2 write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
3 qxn is non-zero, generate that many fraction limbs and append them after the
4 other quotient limbs, and update the remainder accordningly. The input
5 operands are unaffected.
8 1. The most significant limb of of the divisor must be non-zero.
9 2. No argument overlap is permitted. (??? relax this ???)
10 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
12 The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
13 complexity of multiplication.
15 Copyright (C) 1997, 2000 Free Software Foundation, Inc.
17 This file is part of the GNU MP Library.
19 The GNU MP Library is free software; you can redistribute it and/or modify
20 it under the terms of the GNU Lesser General Public License as published by
21 the Free Software Foundation; either version 2.1 of the License, or (at your
22 option) any later version.
24 The GNU MP Library is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
27 License for more details.
29 You should have received a copy of the GNU Lesser General Public License
30 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
31 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
32 MA 02111-1307, USA. */
39 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
42 /* Extract the middle limb from ((h,,l) << cnt) */
43 #define SHL(h,l,cnt) \
44 ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
48 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
49 mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
51 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
63 2. pass allocated storage in additional parameter?
75 rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
86 count_leading_zeros (cnt, dp[dn - 1]);
89 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
90 mpn_lshift (d2p, dp, dn, cnt);
91 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
92 cy = mpn_lshift (n2p, np, nn, cnt);
94 qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
96 qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
101 n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
102 MPN_COPY (n2p, np, nn);
103 qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
104 qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
108 mpn_rshift (rp, n2p, dn, cnt);
110 MPN_COPY (rp, n2p, dn);
120 adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
121 if (nn + adjust >= 2 * dn)
126 count_leading_zeros (cnt, dp[dn - 1]);
128 qp[nn - dn] = 0; /* zero high quotient limb */
129 if (cnt != 0) /* normalize divisor if needed */
131 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
132 mpn_lshift (d2p, dp, dn, cnt);
133 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
134 cy = mpn_lshift (n2p, np, nn, cnt);
141 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
142 MPN_COPY (n2p, np, nn);
148 mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
149 else if (dn < BZ_THRESHOLD)
150 mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
153 /* Perform 2*dn / dn limb divisions as long as the limbs
155 mp_ptr q2p = qp + nn - 2 * dn;
157 mpn_bz_divrem_n (q2p, n2p, d2p, dn);
162 q2p -= dn; n2p -= dn;
163 c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
164 ASSERT_ALWAYS (c == 0);
171 /* In theory, we could fall out to the cute code below
172 since we now have exactly the situation that code
173 is designed to handle. We botch this badly and call
174 the basic mpn_sb_divrem_mn! */
176 mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
178 mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
184 mpn_rshift (rp, n2p, dn, cnt);
186 MPN_COPY (rp, n2p, dn);
191 /* When we come here, the numerator/partial remainder is less
192 than twice the size of the denominator. */
197 Divide a numerator N with nn limbs by a denominator D with dn
198 limbs forming a quotient of nn-dn+1 limbs. When qn is small
199 compared to dn, conventional division algorithms perform poorly.
200 We want an algorithm that has an expected running time that is
201 dependent only on qn. It is assumed that the most significant
202 limb of the numerator is smaller than the most significant limb
205 Algorithm (very informally stated):
207 1) Divide the 2 x qn most significant limbs from the numerator
208 by the qn most significant limbs from the denominator. Call
209 the result qest. This is either the correct quotient, but
210 might be 1 or 2 too large. Compute the remainder from the
211 division. (This step is implemented by a mpn_divrem call.)
213 2) Is the most significant limb from the remainder < p, where p
214 is the product of the most significant limb from the quotient
215 and the next(d). (Next(d) denotes the next ignored limb from
216 the denominator.) If it is, decrement qest, and adjust the
217 remainder accordingly.
219 3) Is the remainder >= qest? If it is, qest is the desired
220 quotient. The algorithm terminates.
222 4) Subtract qest x next(d) from the remainder. If there is
223 borrow out, decrement qest, and adjust the remainder
226 5) Skip one word from the denominator (i.e., let next(d) denote
227 the next less significant limb. */
234 mp_limb_t quotient_too_large;
238 qp[qn] = 0; /* zero high quotient limb */
239 qn += adjust; /* qn cannot become bigger */
243 MPN_COPY (rp, np, dn);
248 in = dn - qn; /* (at least partially) ignored # of limbs in ops */
249 /* Normalize denominator by shifting it to the left such that its
250 most significant bit is set. Then shift the numerator the same
251 amount, to mathematically preserve quotient. */
252 count_leading_zeros (cnt, dp[dn - 1]);
255 d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
257 mpn_lshift (d2p, dp + in, qn, cnt);
258 d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
260 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
261 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
269 n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
274 d2p = (mp_ptr) dp + in;
276 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
277 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
285 /* Get an approximate quotient using the extracted operands. */
289 mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
290 /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
291 temps here. This doesn't hurt code quality on any machines
292 so we do it unconditionally. */
293 gcc272bug_n1 = n2p[1];
294 gcc272bug_n0 = n2p[0];
295 gcc272bug_d0 = d2p[0];
296 udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
301 mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
302 else if (qn < BZ_THRESHOLD)
303 mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
305 mpn_bz_divrem_n (qp, n2p, d2p, qn);
308 /* Multiply the first ignored divisor limb by the most significant
309 quotient limb. If that product is > the partial remainder's
310 most significant limb, we know the quotient is too large. This
311 test quickly catches most cases where the quotient is too large;
312 it catches all cases where the quotient is 2 too large. */
322 x = SHL (dp[in - 1], dl, cnt);
323 umul_ppmm (h, l, x, qp[qn - 1]);
329 mpn_decr_u (qp, (mp_limb_t) 1);
330 cy = mpn_add_n (n2p, n2p, d2p, qn);
333 /* The partial remainder is safely large. */
340 quotient_too_large = 0;
345 /* Append partially used numerator limb to partial remainder. */
346 cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
347 n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
349 /* Update partial remainder with partially used divisor limb. */
350 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
361 quotient_too_large = (cy1 < cy2);
366 /* True: partial remainder now is neutral, i.e., it is not shifted up. */
368 tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
374 MPN_COPY (rp, n2p, rn);
379 mpn_mul (tp, qp, qn, dp, in);
382 mpn_mul (tp, dp, in, qp, qn);
384 cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
385 MPN_COPY (rp + in, n2p, dn - in);
386 quotient_too_large |= cy;
387 cy = mpn_sub_n (rp, np, tp, in);
388 cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
389 quotient_too_large |= cy;
391 if (quotient_too_large)
393 mpn_decr_u (qp, (mp_limb_t) 1);
394 mpn_add_n (rp, rp, dp, dn);