1 /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
3 Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc.
4 Contributed by Paul Zimmermann.
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21 MA 02111-1307, USA. */
31 /* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */
34 mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim)
36 mpz_redc (c, a, b, m, Nprim)
44 mp_ptr cp, mp = PTR (m);
45 mp_limb_t cy, cout = 0;
47 size_t j, n = ABSIZ (m);
49 ASSERT (ALLOC (c) >= 2 * n);
54 MPN_ZERO (cp + j, 2 * n - j);
55 for (j = 0; j < n; j++)
58 cy = mpn_addmul_1 (cp, mp, n, q);
59 cout += mpn_add_1 (cp + n, cp + n, n - j, cy);
65 cy = cout - mpn_sub_n (cp, cp + n, mp, n);
67 cy -= mpn_sub_n (cp, cp, mp, n);
70 MPN_COPY (cp, cp + n, n);
71 MPN_NORMALIZE (cp, n);
72 SIZ (c) = SIZ (c) < 0 ? -n : n;
75 /* average number of calls to redc for an exponent of n bits
76 with the sliding window algorithm of base 2^k: the optimal is
77 obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
80 128 156* 159 171 200 261
81 256 309 307* 316 343 403
82 512 617 607* 610 632 688
83 1024 1231 1204 1195* 1207 1256
84 2048 2461 2399 2366 2360* 2396
85 4096 4918 4787 4707 4665* 4670
91 mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod)
93 mpz_powm (res, base, e, mod)
99 #else /* BERKELEY_MP */
102 pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res)
104 pow (base, e, mod, res)
110 #endif /* BERKELEY_MP */
112 mp_limb_t invm, *ep, c, mask;
114 mp_size_t n, i, K, j, l, k;
130 /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
131 depending on if MOD equals 1. */
132 SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1;
137 /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.
138 In REDC each modular multiplication costs about 2*n^2 limbs operations,
139 whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a
140 multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
141 for example using Burnikel-Ziegler's algorithm. This gives a theoretical
142 threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
144 /* For now, also disable REDC when MOD is even, as the inverse can't
147 #ifndef POWM_THRESHOLD
148 #define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3)
151 use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0);
154 /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
155 modlimb_invert (invm, PTR(mod)[0]);
159 /* determines optimal value of k */
160 l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */
163 while (2 * l > K * (2 + k * (3 + k)))
169 g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t));
170 /* compute x*R^n where R=2^BITS_PER_MP_LIMB */
174 mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB);
175 mpz_mod (g[0], g[0], mod);
178 mpz_mod (g[0], base, mod);
180 /* compute xx^g for odd g < 2^k */
184 _mpz_realloc (xx, 2 * n);
185 mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */
189 mpz_mul (xx, g[0], g[0]);
190 mpz_mod (xx, xx, mod);
192 for (i = 1; i < K / 2; i++)
197 _mpz_realloc (g[i], 2 * n);
198 mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */
202 mpz_mul (g[i], g[i - 1], xx);
203 mpz_mod (g[i], g[i], mod);
207 /* now starts the real stuff */
208 mask = (mp_limb_t) ((1<<k) - 1);
210 i = ABSIZ (e) - 1; /* current index */
211 c = ep[i]; /* current limb */
212 count_leading_zeros (sh, c);
213 sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */
214 sh -= k; /* index of lower bit of ep[i] to take into account */
216 { /* k-sh extra bits are needed */
220 c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
221 sh += BITS_PER_MP_LIMB;
227 printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm);
236 mpz_set (xx, g[c >> 1]);
240 mpz_redc (xx, xx, xx, mod, invm);
243 mpz_mul (xx, xx, xx);
244 mpz_mod (xx, xx, mod);
249 printf ("x^"); mpz_out_str (0, 10, exp);
250 printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
254 while (i > 0 || sh > 0)
258 l = k; /* number of bits treated */
264 c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
265 sh += BITS_PER_MP_LIMB;
269 l += sh; /* may be less bits than k here */
270 c = c & ((1<<l) - 1);
277 /* this while loop implements the sliding window improvement */
278 while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0))
280 if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
283 mpz_mul (xx, xx, xx);
284 mpz_mod (xx, xx, mod);
289 c = (c<<1) + ((ep[i]>>sh) & 1);
294 sh = BITS_PER_MP_LIMB - 1;
295 c = (c<<1) + (ep[i]>>sh);
300 printf ("l=%u c=%lu\n", l, c);
301 mpz_mul_2exp (exp, exp, k);
302 mpz_add_ui (exp, exp, c);
305 /* now replace xx by xx^(2^k)*x^c */
314 /* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */
317 if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
320 mpz_mul (xx, xx, xx);
321 mpz_mod (xx, xx, mod);
324 mpz_redc (xx, xx, g[c >> 1], mod, invm);
327 mpz_mul (xx, xx, g[c >> 1]);
328 mpz_mod (xx, xx, mod);
332 j = l; /* case c=0 */
336 mpz_redc (xx, xx, xx, mod, invm);
339 mpz_mul (xx, xx, xx);
340 mpz_mod (xx, xx, mod);
344 printf ("x^"); mpz_out_str (0, 10, exp);
345 printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
350 /* now convert back xx to xx/R^n */
353 mpz_set_ui (g[0], 1);
354 mpz_redc (xx, xx, g[0], mod, invm);
355 if (mpz_cmp (xx, mod) >= 0)
356 mpz_sub (xx, xx, mod);
361 for (i = 0; i < K / 2; i++)
363 (*_mp_free_func) (g, K / 2 * sizeof (mpz_t));