1 /* mpn_bz_divrem_n and auxilliary routines.
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
4 INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
5 IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
9 Copyright (C) 2000 Free Software Foundation, Inc.
10 Contributed by Paul Zimmermann.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 2.1 of the License, or (at your
17 option) any later version.
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 MA 02111-1307, USA. */
33 [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
34 Technical report MPI-I-98-1-022, october 1998.
35 http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
38 static mp_limb_t mpn_bz_div_3_halves_by_2 _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
41 static mp_limb_t mpn_bz_divrem_aux _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
44 /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
45 div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
46 i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
48 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
53 unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
61 /* This might be useful: */
65 mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
66 MPN_COPY (tp + qxn - nn, np, nn);
68 c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
69 /* Maybe copy proper part of tp to np? Documentation is unclear about
70 the returned np value when qxn != 0 */
76 /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
77 by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
78 Returns most significant limb of the quotient, which is 0 or 1.
79 Requires that the most significant bit of the divisor is set. */
83 mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
85 mpn_bz_divrem_n (qp, np, dp, n)
93 if (mpn_cmp (np + n, dp, n) >= 0)
96 mpn_sub_n (np + n, np + n, dp, n);
101 /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
102 if (n < BZ_THRESHOLD)
103 qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1);
105 qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
106 /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
107 (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
108 if (mpn_sub_1 (np + n, np + n, 1,
109 mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
111 /* quotient too large */
112 qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
113 if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
114 { /* still too large */
115 qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
116 mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
119 /* now divide (np, n + 1) by (dp, n) */
120 qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
121 mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
129 tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
130 qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
131 qhl += mpn_add_1 (qp + n2, qp + n2, n2,
132 mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
138 /* Like mpn_bz_divrem_n, but without memory allocation. Also
139 assumes mpn_cmp (np + n, dp, n) < 0 */
143 mpn_bz_divrem_aux (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr tmp)
145 mpn_bz_divrem_aux (qp, np, dp, n, tmp)
157 /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
158 qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp);
159 /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
160 (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
161 if (mpn_sub_1 (np + n, np + n, 1,
162 mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
164 /* quotient too large */
165 qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
166 if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
167 { /* still too large */
168 qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
169 mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
172 /* now divide (np, n + 1) by (dp, n) */
173 qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
174 mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
179 qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
180 qhl += mpn_add_1 (qp + n2, qp + n2, n2,
181 mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
186 /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
187 the remainder in (np, 2n) */
191 mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
194 mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp)
202 mp_size_t twon = n + n;
205 if (n < BZ_THRESHOLD)
206 qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
208 qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp);
209 /* q = (qp, n), c = (np + n, n) with the notations of [1] */
210 mpn_mul_n (tmp, qp, dp, n);
212 mpn_add_n (tmp + n, tmp + n, dp, n);
213 if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */
215 qhl -= mpn_sub_1 (qp, qp, n, 1L);
216 if (mpn_add_n (np, np, dp, twon) == 0)
217 { /* qp still too large */
218 qhl -= mpn_sub_1 (qp, qp, n, 1L);
219 mpn_add_n (np, np, dp, twon); /* always carry here */