1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 MA 02111-1307, USA. */
27 #include "gmp-mparam.h"
28 /* #include "longlong.h" */
30 /* When using gcc, make sure to use its builtin alloca. */
31 #if ! defined (alloca) && defined (__GNUC__)
32 #define alloca __builtin_alloca
36 /* When using cc, do whatever necessary to allow use of alloca. For many
37 machines, this means including alloca.h. IBM's compilers need a #pragma
38 in "each module that needs to use alloca". */
39 #if ! defined (alloca)
40 /* We need lots of variants for MIPS, to cover all versions and perversions
42 #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
43 || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
44 || defined (__alpha) || defined (__sparc) || defined (sparc) \
54 #define alloca(x) __ALLOCA(x)
65 #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
66 #include "stack-alloc.h"
69 #define TMP_ALLOC(x) alloca(x)
74 /* Allocating various types. */
75 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
76 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
77 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
80 #if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
81 __DECC understands __inline */
82 #define inline /* Empty */
85 #define ABS(x) (x >= 0 ? x : -x)
86 #define MIN(l,o) ((l) < (o) ? (l) : (o))
87 #define MAX(h,i) ((h) > (i) ? (h) : (i))
88 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
90 /* Field access macros. */
91 #define SIZ(x) ((x)->_mp_size)
92 #define ABSIZ(x) ABS (SIZ (x))
93 #define PTR(x) ((x)->_mp_d)
94 #define LIMBS(x) ((x)->_mp_d)
95 #define EXP(x) ((x)->_mp_exp)
96 #define PREC(x) ((x)->_mp_prec)
97 #define ALLOC(x) ((x)->_mp_alloc)
99 /* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
100 rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
101 about arithmetic overflow. */
102 #define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
103 #define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
104 #define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
105 #define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
106 #define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
108 #define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
109 #define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
111 #define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
114 #define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
116 #define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
117 #define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
119 #define LONG_MAX SIGNED_TYPE_MAX (long)
123 #define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
125 #define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
126 #define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
128 #define SHORT_MAX SIGNED_TYPE_MAX (short)
134 #define MP_LIMB_T_SWAP(x, y) \
136 mp_limb_t __mp_limb_t_swap__tmp = (x); \
138 (y) = __mp_limb_t_swap__tmp; \
140 #define MP_SIZE_T_SWAP(x, y) \
142 mp_size_t __mp_size_t_swap__tmp = (x); \
144 (y) = __mp_size_t_swap__tmp; \
147 #define MP_PTR_SWAP(x, y) \
149 mp_ptr __mp_ptr_swap__tmp = (x); \
151 (y) = __mp_ptr_swap__tmp; \
153 #define MP_SRCPTR_SWAP(x, y) \
155 mp_srcptr __mp_srcptr_swap__tmp = (x); \
157 (y) = __mp_srcptr_swap__tmp; \
160 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
162 MP_PTR_SWAP (xp, yp); \
163 MP_SIZE_T_SWAP (xs, ys); \
165 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
167 MP_SRCPTR_SWAP (xp, yp); \
168 MP_SIZE_T_SWAP (xs, ys); \
171 #define MPZ_PTR_SWAP(x, y) \
173 mpz_ptr __mpz_ptr_swap__tmp = (x); \
175 (y) = __mpz_ptr_swap__tmp; \
177 #define MPZ_SRCPTR_SWAP(x, y) \
179 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
181 (y) = __mpz_srcptr_swap__tmp; \
185 #if defined (__cplusplus)
189 /* FIXME: These are purely internal, so do a search and replace to change
190 them to __gmp forms, rather than using these macros. */
191 #define _mp_allocate_func __gmp_allocate_func
192 #define _mp_reallocate_func __gmp_reallocate_func
193 #define _mp_free_func __gmp_free_func
194 #define _mp_default_allocate __gmp_default_allocate
195 #define _mp_default_reallocate __gmp_default_reallocate
196 #define _mp_default_free __gmp_default_free
198 extern void * (*_mp_allocate_func) _PROTO ((size_t));
199 extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
200 extern void (*_mp_free_func) _PROTO ((void *, size_t));
202 void *_mp_default_allocate _PROTO ((size_t));
203 void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
204 void _mp_default_free _PROTO ((void *, size_t));
206 #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
207 ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
208 #define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
210 #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
211 #define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
214 #if (__STDC__-0) || defined (__cplusplus)
218 #define const /* Empty */
219 #define signed /* Empty */
223 #if defined (__GNUC__) && defined (__i386__)
224 #if 0 /* check that these actually improve things */
225 #define MPN_COPY_INCR(DST, SRC, N) \
226 __asm__ ("cld\n\trep\n\tmovsl" : : \
227 "D" (DST), "S" (SRC), "c" (N) : \
228 "cx", "di", "si", "memory")
229 #define MPN_COPY_DECR(DST, SRC, N) \
230 __asm__ ("std\n\trep\n\tmovsl" : : \
231 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
232 "cx", "di", "si", "memory")
233 #define MPN_NORMALIZE_NOT_ZERO(P, N) \
235 __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
236 "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
243 #if HAVE_NATIVE_mpn_copyi
244 #define mpn_copyi __MPN(copyi)
245 void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
248 /* Remap names of internal mpn functions. */
249 #define __clz_tab __MPN(clz_tab)
250 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
251 #define mpn_reciprocal __MPN(reciprocal)
253 #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
254 #define mpn_bz_divrem_n __MPN(bz_divrem_n)
255 /* #define mpn_tdiv_q __MPN(tdiv_q) */
257 #define mpn_kara_mul_n __MPN(kara_mul_n)
258 void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
260 #define mpn_kara_sqr_n __MPN(kara_sqr_n)
261 void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
263 #define mpn_toom3_mul_n __MPN(toom3_mul_n)
264 void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
266 #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
267 void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
269 #define mpn_fft_best_k __MPN(fft_best_k)
270 int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
272 #define mpn_mul_fft __MPN(mul_fft)
273 void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
274 mp_srcptr n, mp_size_t nl,
275 mp_srcptr m, mp_size_t ml,
278 #define mpn_mul_fft_full __MPN(mul_fft_full)
279 void mpn_mul_fft_full _PROTO ((mp_ptr op,
280 mp_srcptr n, mp_size_t nl,
281 mp_srcptr m, mp_size_t ml));
283 #define mpn_fft_next_size __MPN(fft_next_size)
284 mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
286 mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
287 mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
288 /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
290 /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
291 #ifndef MPN_COPY_INCR
292 #if HAVE_NATIVE_mpn_copyi
293 #define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
295 #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
298 for (__i = 0; __i < (NLIMBS); __i++) \
299 (DST)[__i] = (SRC)[__i]; \
304 #if HAVE_NATIVE_mpn_copyd
305 #define mpn_copyd __MPN(copyd)
306 void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
309 /* NLIMBS==0 allowed */
310 #ifndef MPN_COPY_DECR
311 #if HAVE_NATIVE_mpn_copyd
312 #define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
314 #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
317 for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
318 (DST)[__i] = (SRC)[__i]; \
323 /* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
324 rely on function inlining. */
325 #if defined (_CRAY) || defined (__uxp__)
327 _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
329 int i; /* Faster for Cray with plain int */
330 #pragma _CRI ivdep /* Cray PVP systems */
331 #pragma loop noalias d,s /* Fujitsu VPP systems */
332 for (i = 0; i < n; i++)
335 #define MPN_COPY _MPN_COPY
339 #define MPN_COPY MPN_COPY_INCR
342 /* Zero NLIMBS *limbs* AT DST. */
344 #define MPN_ZERO(DST, NLIMBS) \
347 for (__i = 0; __i < (NLIMBS); __i++) \
352 #ifndef MPN_NORMALIZE
353 #define MPN_NORMALIZE(DST, NLIMBS) \
357 if ((DST)[(NLIMBS) - 1] != 0) \
363 #ifndef MPN_NORMALIZE_NOT_ZERO
364 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
368 if ((DST)[(NLIMBS) - 1] != 0) \
375 /* Strip least significant zero limbs from ptr,size by incrementing ptr and
376 decrementing size. The number in ptr,size must be non-zero, ie. size!=0
377 and somewhere a non-zero limb. */
378 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
381 ASSERT ((size) != 0); \
382 while ((ptr)[0] == 0) \
386 ASSERT (size >= 0); \
391 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
392 temporary variable; it will be automatically cleared out at function
393 return. We use __x here to make it possible to accept both mpz_ptr and
395 #define MPZ_TMP_INIT(X, NLIMBS) \
398 __x->_mp_alloc = (NLIMBS); \
399 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
402 /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
403 #define MPZ_REALLOC(what,needed) \
405 if ((needed) > ALLOC (what)) \
406 _mpz_realloc (what, needed); \
409 /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
410 value which is good on most machines. */
411 #ifndef KARATSUBA_MUL_THRESHOLD
412 #define KARATSUBA_MUL_THRESHOLD 32
415 /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
416 value which is good on most machines. */
417 #ifndef TOOM3_MUL_THRESHOLD
418 #define TOOM3_MUL_THRESHOLD 256
421 #ifndef KARATSUBA_SQR_THRESHOLD
422 #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
425 #ifndef TOOM3_SQR_THRESHOLD
426 #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
429 /* First k to use for an FFT modF multiply. A modF FFT is an order
430 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
431 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
432 #define FFT_FIRST_K 4
434 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
435 #ifndef FFT_MODF_MUL_THRESHOLD
436 #define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
438 #ifndef FFT_MODF_SQR_THRESHOLD
439 #define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
442 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
443 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
444 NxN->2N multiply and not recursing into itself is an order
445 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
446 is the first better than toom3. */
447 #ifndef FFT_MUL_THRESHOLD
448 #define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
450 #ifndef FFT_SQR_THRESHOLD
451 #define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
454 /* Table of thresholds for successive modF FFT "k"s. The first entry is
455 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
456 etc. See mpn_fft_best_k(). */
457 #ifndef FFT_MUL_TABLE
458 #define FFT_MUL_TABLE \
459 { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
460 TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
461 TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
462 TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
463 TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
464 TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
467 #ifndef FFT_SQR_TABLE
468 #define FFT_SQR_TABLE \
469 { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
470 TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
471 TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
472 TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
473 TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
474 TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
478 #ifndef FFT_TABLE_ATTRS
479 #define FFT_TABLE_ATTRS static const
482 #define MPN_FFT_TABLE_SIZE 16
485 /* Return non-zero if xp,xsize and yp,ysize overlap.
486 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
487 overlap. If both these are false, there's an overlap. */
488 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
489 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
492 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
493 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
494 does it always. Generally assertions are meant for development, but
495 might help when looking for a problem later too.
497 ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
498 assertion checking is disabled, the expression is still evaluated. This
499 is meant for use with routines like mpn_add_n() where the return value
500 represents a carry or whatever that shouldn't occur. For example,
501 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
504 #define ASSERT_LINE __LINE__
506 #define ASSERT_LINE -1
510 #define ASSERT_FILE __FILE__
512 #define ASSERT_FILE ""
515 int __gmp_assert_fail _PROTO((const char *filename, int linenum,
519 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
521 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
525 #define CAST_TO_VOID (void)
530 #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
533 #define ASSERT(expr) ASSERT_ALWAYS (expr)
534 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
537 #define ASSERT(expr) (CAST_TO_VOID 0)
538 #define ASSERT_NOCARRY(expr) (expr)
542 #if HAVE_NATIVE_mpn_com_n
543 #define mpn_com_n __MPN(com_n)
544 void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
546 #define mpn_com_n(d,s,n) \
550 mp_srcptr __s = (s); \
551 mp_size_t __n = (n); \
559 #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
563 mp_srcptr __s1 = (s1); \
564 mp_srcptr __s2 = (s2); \
565 mp_size_t __n = (n); \
567 *__d++ = dop (*__s1++ op s2op *__s2++); \
572 #if HAVE_NATIVE_mpn_and_n
573 #define mpn_and_n __MPN(and_n)
574 void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
576 #define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
579 #if HAVE_NATIVE_mpn_andn_n
580 #define mpn_andn_n __MPN(andn_n)
581 void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
583 #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
586 #if HAVE_NATIVE_mpn_nand_n
587 #define mpn_nand_n __MPN(nand_n)
588 void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
590 #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
593 #if HAVE_NATIVE_mpn_ior_n
594 #define mpn_ior_n __MPN(ior_n)
595 void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
597 #define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
600 #if HAVE_NATIVE_mpn_iorn_n
601 #define mpn_iorn_n __MPN(iorn_n)
602 void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
604 #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
607 #if HAVE_NATIVE_mpn_nior_n
608 #define mpn_nior_n __MPN(nior_n)
609 void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
611 #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
614 #if HAVE_NATIVE_mpn_xor_n
615 #define mpn_xor_n __MPN(xor_n)
616 void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
618 #define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
621 #if HAVE_NATIVE_mpn_xnor_n
622 #define mpn_xnor_n __MPN(xnor_n)
623 void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
625 #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
628 /* Structure for conversion between internal binary format and
629 strings in base 2..36. */
632 /* Number of digits in the conversion base that always fits in an mp_limb_t.
633 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
634 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
637 /* log(2)/log(conversion_base) */
638 double chars_per_bit_exactly;
640 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
641 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
642 i.e. the number of bits used to represent each digit in the base. */
645 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
646 fixed-point number. Instead of dividing by big_base an application can
647 choose to multiply by big_base_inverted. */
648 mp_limb_t big_base_inverted;
651 #define __mp_bases __MPN(mp_bases)
652 extern const struct bases __mp_bases[];
653 extern mp_size_t __gmp_default_fp_limb_precision;
655 #if defined (__i386__)
656 #define TARGET_REGISTER_STARVED 1
658 #define TARGET_REGISTER_STARVED 0
661 /* Use a library function for invert_limb, if available. */
662 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
663 #define mpn_invert_limb __MPN(invert_limb)
664 mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
665 #define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
669 #define invert_limb(invxl,xl) \
673 invxl = ~(mp_limb_t) 0; \
675 udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
679 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
680 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
681 If this would yield overflow, DI should be the largest possible number
682 (i.e., only ones). For correct operation, the most significant bit of D
683 has to be set. Put the quotient in Q and the remainder in R. */
684 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
686 mp_limb_t _q, _ql, _r; \
687 mp_limb_t _xh, _xl; \
688 umul_ppmm (_q, _ql, (nh), (di)); \
689 _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
690 umul_ppmm (_xh, _xl, _q, (d)); \
691 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
694 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
698 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
710 /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
711 so that its most significant bit is set. LGUP is ceil(log2(D)). */
712 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
714 mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
715 mp_limb_t _xh, _xl; \
716 _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
717 _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
718 _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
719 _nadj = _n10 + (_n1 & (dnorm)); \
720 umul_ppmm (_xh, _xl, di, _n2 - _n1); \
721 add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
722 _q1 = ~(_n2 + _xh); \
723 umul_ppmm (_xh, _xl, _q1, d); \
724 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
726 (r) = _xl + ((d) & _xh); \
729 /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
731 #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
733 mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
734 mp_limb_t _xh, _xl; \
737 _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
738 _nadj = _n10 + (_n1 & (d)); \
739 umul_ppmm (_xh, _xl, di, _n2 - _n1); \
740 add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
741 _q1 = ~(_n2 + _xh); \
742 umul_ppmm (_xh, _xl, _q1, d); \
743 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
745 (r) = _xl + ((d) & _xh); \
750 /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
751 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
752 "n" must be odd (otherwise such an inverse doesn't exist).
754 This is not to be confused with invert_limb(), which is completely
757 The table lookup gives an inverse with the low 8 bits valid, and each
758 multiply step doubles the number of bits. See Jebelean's exact division
759 paper, end of section 4 (reference in gmp.texi). */
761 #define modlimb_invert_table __gmp_modlimb_invert_table
762 extern const unsigned char modlimb_invert_table[128];
764 #if BITS_PER_MP_LIMB <= 32
765 #define modlimb_invert(inv,n) \
767 mp_limb_t __n = (n); \
769 ASSERT ((__n & 1) == 1); \
770 __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
771 __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
772 __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
773 ASSERT (__inv * __n == 1); \
778 #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
779 #define modlimb_invert(inv,n) \
781 mp_limb_t __n = (n); \
783 ASSERT ((__n & 1) == 1); \
784 __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
785 __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
786 __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
787 __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
788 ASSERT (__inv * __n == 1); \
794 /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
795 between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
797 #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
798 /* Define stuff for longlong.h. */
799 typedef unsigned int UQItype __attribute__ ((mode (QI)));
800 typedef int SItype __attribute__ ((mode (SI)));
801 typedef unsigned int USItype __attribute__ ((mode (SI)));
802 typedef int DItype __attribute__ ((mode (DI)));
803 typedef unsigned int UDItype __attribute__ ((mode (DI)));
805 typedef unsigned char UQItype;
807 typedef unsigned long USItype;
808 #if defined _LONGLONG || defined _LONG_LONG_LIMB
809 typedef long long int DItype;
810 typedef unsigned long long int UDItype;
811 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
812 typedef long int DItype;
813 typedef unsigned long int UDItype;
817 typedef mp_limb_t UWtype;
818 typedef unsigned int UHWtype;
819 #define W_TYPE_SIZE BITS_PER_MP_LIMB
821 /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
823 #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
824 /* Special case for little endian ARM since floats remain in big-endian. */
825 #define _GMP_IEEE_FLOATS 1
826 union ieee_double_extract
830 unsigned int manh:20;
833 unsigned int manl:32;
838 #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
839 || defined (__alpha) \
840 || defined (__clipper__) \
841 || defined (__cris) \
842 || defined (__i386__) \
843 || defined (__i860__) \
844 || defined (__i960__) \
845 || defined (MIPSEL) || defined (_MIPSEL) \
846 || defined (__ns32000__) \
847 || defined (__WINNT) || defined (_WIN32)
848 #define _GMP_IEEE_FLOATS 1
849 union ieee_double_extract
853 unsigned int manl:32;
854 unsigned int manh:20;
860 #else /* Need this as an #else since the tests aren't made exclusive. */
861 #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
862 || defined (__a29k__) || defined (_AM29K) \
863 || defined (__arm__) \
864 || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
865 || defined (_CRAYMPP) \
866 || defined (__i370__) || defined (__mvs__) \
867 || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
868 || defined(mc68020) \
869 || defined (__m88000__) \
870 || defined (MIPSEB) || defined (_MIPSEB) \
871 || defined (__hppa) || defined (__hppa__) \
872 || defined (__pyr__) \
873 || defined (__ibm032__) \
874 || defined (_IBMR2) || defined (_ARCH_PPC) \
875 || defined (__sh__) \
876 || defined (__sparc) || defined (sparc) \
877 || defined (__we32k__)
878 #define _GMP_IEEE_FLOATS 1
879 union ieee_double_extract
885 unsigned int manh:20;
886 unsigned int manl:32;
894 /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
895 SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
896 presumably due to treating the mp_limb_t constant as signed rather than
898 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
899 #if BITS_PER_MP_LIMB == 64
900 #define LIMBS_PER_DOUBLE 2
902 #define LIMBS_PER_DOUBLE 3
905 double __gmp_scale2 _PROTO ((double, int));
906 int __gmp_extract_double _PROTO ((mp_ptr, double));
908 extern int __gmp_junk;
909 extern const int __gmp_0;
910 #define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
911 #define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
912 #define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
914 #if defined _LONG_LONG_LIMB
915 #if defined (__STDC__)
916 #define CNST_LIMB(C) C##LL
918 #define CNST_LIMB(C) C/**/LL
920 #else /* not _LONG_LONG_LIMB */
921 #if defined (__STDC__)
922 #define CNST_LIMB(C) C##L
924 #define CNST_LIMB(C) C/**/L
926 #endif /* _LONG_LONG_LIMB */
928 /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
929 #if BITS_PER_MP_LIMB == 32
930 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
931 #define PP_INVERTED 0x53E5645CL
932 #define PP_MAXPRIME 29
933 #define PP_MASK 0x208A28A8L
936 #if BITS_PER_MP_LIMB == 64
937 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
938 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
939 #define PP_MAXPRIME 53
940 #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
944 /* BIT1 means a result value in bit 1 (second least significant bit), with a
945 zero bit representing +1 and a one bit representing -1. Bits other than
948 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
949 and their speed is important. Expressions are used rather than
950 conditionals to accumulate sign changes, which effectively means XORs
951 instead of conditional JUMPs. */
953 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
954 #define JACOBI_S0(a) \
955 (((a) == 1) | ((a) == -1))
957 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
958 #define JACOBI_U0(a) \
961 /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
962 An mpz_t always has at least one limb of allocated space, so the fetch of
963 the low limb is valid. */
964 #define JACOBI_Z0(a) \
965 (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
967 /* Convert a bit1 to +1 or -1. */
968 #define JACOBI_BIT1_TO_PN(result_bit1) \
969 (1 - ((result_bit1) & 2))
971 /* (2/b), with b unsigned and odd;
972 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
973 hence obtained from (b>>1)^b */
974 #define JACOBI_TWO_U_BIT1(b) \
975 (ASSERT (b & 1), (((b) >> 1) ^ (b)))
977 /* (2/b)^twos, with b unsigned and odd */
978 #define JACOBI_TWOS_U_BIT1(twos, b) \
979 (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
981 /* (2/b)^twos, with b unsigned and odd */
982 #define JACOBI_TWOS_U(twos, b) \
983 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
985 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
986 is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
987 #define JACOBI_ASGN_SU_BIT1(a, b) \
988 ((((a) < 0) << 1) & (b))
990 /* (a/b) effect due to sign of b: signed/mpz;
991 is -1 if a and b both negative, +1 otherwise */
992 #define JACOBI_BSGN_SZ_BIT1(a, b) \
993 ((((a) < 0) & (SIZ(b) < 0)) << 1)
995 /* (a/b) effect due to sign of b: mpz/signed */
996 #define JACOBI_BSGN_ZS_BIT1(a, b) \
997 JACOBI_BSGN_SZ_BIT1(b, a)
999 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
1000 Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
1001 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
1002 because this is used in a couple of places with only bit 1 of a or b
1004 #define JACOBI_RECIP_UU_BIT1(a, b) \
1008 /* For testing and debugging. */
1009 #define MPZ_CHECK_FORMAT(z) \
1010 (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
1011 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
1012 #define MPZ_PROVOKE_REALLOC(z) \
1013 do { ALLOC(z) = ABSIZ(z); } while (0)
1016 #if TUNE_PROGRAM_BUILD
1017 /* Some extras wanted when recompiling some .c files for use by the tune
1018 program. Not part of a normal build. */
1020 extern mp_size_t mul_threshold[];
1021 extern mp_size_t fft_modf_mul_threshold;
1022 extern mp_size_t sqr_threshold[];
1023 extern mp_size_t fft_modf_sqr_threshold;
1024 extern mp_size_t bz_threshold[];
1025 extern mp_size_t fib_threshold[];
1026 extern mp_size_t powm_threshold[];
1027 extern mp_size_t gcd_accel_threshold[];
1028 extern mp_size_t gcdext_threshold[];
1030 #undef KARATSUBA_MUL_THRESHOLD
1031 #undef TOOM3_MUL_THRESHOLD
1032 #undef FFT_MUL_TABLE
1033 #undef FFT_MUL_THRESHOLD
1034 #undef FFT_MODF_MUL_THRESHOLD
1035 #undef KARATSUBA_SQR_THRESHOLD
1036 #undef TOOM3_SQR_THRESHOLD
1037 #undef FFT_SQR_TABLE
1038 #undef FFT_SQR_THRESHOLD
1039 #undef FFT_MODF_SQR_THRESHOLD
1041 #undef FIB_THRESHOLD
1042 #undef POWM_THRESHOLD
1043 #undef GCD_ACCEL_THRESHOLD
1044 #undef GCDEXT_THRESHOLD
1046 #define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
1047 #define TOOM3_MUL_THRESHOLD mul_threshold[1]
1048 #define FFT_MUL_TABLE 0
1049 #define FFT_MUL_THRESHOLD mul_threshold[2]
1050 #define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
1051 #define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
1052 #define TOOM3_SQR_THRESHOLD sqr_threshold[1]
1053 #define FFT_SQR_TABLE 0
1054 #define FFT_SQR_THRESHOLD sqr_threshold[2]
1055 #define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
1056 #define BZ_THRESHOLD bz_threshold[0]
1057 #define FIB_THRESHOLD fib_threshold[0]
1058 #define POWM_THRESHOLD powm_threshold[0]
1059 #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
1060 #define GCDEXT_THRESHOLD gcdext_threshold[0]
1062 #define TOOM3_MUL_THRESHOLD_LIMIT 700
1064 #undef FFT_TABLE_ATTRS
1065 #define FFT_TABLE_ATTRS
1066 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
1068 #endif /* TUNE_PROGRAM_BUILD */
1070 #if defined (__cplusplus)