1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 MA 02111-1307, USA. */
27 #include "gmp-mparam.h"
28 /* #include "longlong.h" */
30 /* When using gcc, make sure to use its builtin alloca. */
31 #if ! defined (alloca) && defined (__GNUC__)
32 #define alloca __builtin_alloca
36 /* When using cc, do whatever necessary to allow use of alloca. For many
37 machines, this means including alloca.h. IBM's compilers need a #pragma
38 in "each module that needs to use alloca". */
39 #if ! defined (alloca)
40 /* We need lots of variants for MIPS, to cover all versions and perversions
42 #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
43 || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
44 || defined (__alpha) || defined (__sparc) || defined (sparc) \
54 #define alloca(x) __ALLOCA(x)
63 #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
64 #include "stack-alloc.h"
67 #define TMP_ALLOC(x) alloca(x)
72 /* Allocating various types. */
73 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
74 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
75 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
78 #if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
79 __DECC understands __inline */
80 #define inline /* Empty */
83 #define ABS(x) (x >= 0 ? x : -x)
84 #define MIN(l,o) ((l) < (o) ? (l) : (o))
85 #define MAX(h,i) ((h) > (i) ? (h) : (i))
86 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
88 /* Field access macros. */
89 #define SIZ(x) ((x)->_mp_size)
90 #define ABSIZ(x) ABS (SIZ (x))
91 #define PTR(x) ((x)->_mp_d)
92 #define LIMBS(x) ((x)->_mp_d)
93 #define EXP(x) ((x)->_mp_exp)
94 #define PREC(x) ((x)->_mp_prec)
95 #define ALLOC(x) ((x)->_mp_alloc)
97 /* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
98 rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
99 about arithmetic overflow. */
100 #define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
101 #define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
102 #define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
103 #define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
104 #define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
106 #define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
107 #define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
109 #define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
112 #define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
114 #define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
115 #define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
117 #define LONG_MAX SIGNED_TYPE_MAX (long)
121 #define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
123 #define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
124 #define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
126 #define SHORT_MAX SIGNED_TYPE_MAX (short)
132 #define MP_LIMB_T_SWAP(x, y) \
134 mp_limb_t __mp_limb_t_swap__tmp = (x); \
136 (y) = __mp_limb_t_swap__tmp; \
138 #define MP_SIZE_T_SWAP(x, y) \
140 mp_size_t __mp_size_t_swap__tmp = (x); \
142 (y) = __mp_size_t_swap__tmp; \
145 #define MP_PTR_SWAP(x, y) \
147 mp_ptr __mp_ptr_swap__tmp = (x); \
149 (y) = __mp_ptr_swap__tmp; \
151 #define MP_SRCPTR_SWAP(x, y) \
153 mp_srcptr __mp_srcptr_swap__tmp = (x); \
155 (y) = __mp_srcptr_swap__tmp; \
158 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
160 MP_PTR_SWAP (xp, yp); \
161 MP_SIZE_T_SWAP (xs, ys); \
163 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
165 MP_SRCPTR_SWAP (xp, yp); \
166 MP_SIZE_T_SWAP (xs, ys); \
169 #define MPZ_PTR_SWAP(x, y) \
171 mpz_ptr __mpz_ptr_swap__tmp = (x); \
173 (y) = __mpz_ptr_swap__tmp; \
175 #define MPZ_SRCPTR_SWAP(x, y) \
177 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
179 (y) = __mpz_srcptr_swap__tmp; \
183 #if defined (__cplusplus)
187 /* FIXME: These are purely internal, so do a search and replace to change
188 them to __gmp forms, rather than using these macros. */
189 #define _mp_allocate_func __gmp_allocate_func
190 #define _mp_reallocate_func __gmp_reallocate_func
191 #define _mp_free_func __gmp_free_func
192 #define _mp_default_allocate __gmp_default_allocate
193 #define _mp_default_reallocate __gmp_default_reallocate
194 #define _mp_default_free __gmp_default_free
196 extern void * (*_mp_allocate_func) _PROTO ((size_t));
197 extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
198 extern void (*_mp_free_func) _PROTO ((void *, size_t));
200 void *_mp_default_allocate _PROTO ((size_t));
201 void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
202 void _mp_default_free _PROTO ((void *, size_t));
204 #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
205 ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
206 #define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
208 #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
209 #define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
212 #if (__STDC__-0) || defined (__cplusplus)
216 #define const /* Empty */
217 #define signed /* Empty */
221 #if defined (__GNUC__) && defined (__i386__)
222 #if 0 /* check that these actually improve things */
223 #define MPN_COPY_INCR(DST, SRC, N) \
224 __asm__ ("cld\n\trep\n\tmovsl" : : \
225 "D" (DST), "S" (SRC), "c" (N) : \
226 "cx", "di", "si", "memory")
227 #define MPN_COPY_DECR(DST, SRC, N) \
228 __asm__ ("std\n\trep\n\tmovsl" : : \
229 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
230 "cx", "di", "si", "memory")
231 #define MPN_NORMALIZE_NOT_ZERO(P, N) \
233 __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
234 "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
241 #if HAVE_NATIVE_mpn_copyi
242 #define mpn_copyi __MPN(copyi)
243 void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
246 /* Remap names of internal mpn functions. */
247 #define __clz_tab __MPN(clz_tab)
248 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
249 #define mpn_reciprocal __MPN(reciprocal)
251 #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
252 #define mpn_bz_divrem_n __MPN(bz_divrem_n)
253 /* #define mpn_tdiv_q __MPN(tdiv_q) */
255 #define mpn_kara_mul_n __MPN(kara_mul_n)
256 void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
258 #define mpn_kara_sqr_n __MPN(kara_sqr_n)
259 void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
261 #define mpn_toom3_mul_n __MPN(toom3_mul_n)
262 void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
264 #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
265 void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
267 #define mpn_fft_best_k __MPN(fft_best_k)
268 int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
270 #define mpn_mul_fft __MPN(mul_fft)
271 void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
272 mp_srcptr n, mp_size_t nl,
273 mp_srcptr m, mp_size_t ml,
276 #define mpn_mul_fft_full __MPN(mul_fft_full)
277 void mpn_mul_fft_full _PROTO ((mp_ptr op,
278 mp_srcptr n, mp_size_t nl,
279 mp_srcptr m, mp_size_t ml));
281 #define mpn_fft_next_size __MPN(fft_next_size)
282 mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
284 mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
285 mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
286 /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
288 /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
289 #ifndef MPN_COPY_INCR
290 #if HAVE_NATIVE_mpn_copyi
291 #define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
293 #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
296 for (__i = 0; __i < (NLIMBS); __i++) \
297 (DST)[__i] = (SRC)[__i]; \
302 #if HAVE_NATIVE_mpn_copyd
303 #define mpn_copyd __MPN(copyd)
304 void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
307 /* NLIMBS==0 allowed */
308 #ifndef MPN_COPY_DECR
309 #if HAVE_NATIVE_mpn_copyd
310 #define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
312 #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
315 for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
316 (DST)[__i] = (SRC)[__i]; \
321 /* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
322 rely on function inlining. */
323 #if defined (_CRAY) || defined (__uxp__)
325 _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
327 int i; /* Faster for Cray with plain int */
328 #pragma _CRI ivdep /* Cray PVP systems */
329 #pragma loop noalias d,s /* Fujitsu VPP systems */
330 for (i = 0; i < n; i++)
333 #define MPN_COPY _MPN_COPY
337 #define MPN_COPY MPN_COPY_INCR
340 /* Zero NLIMBS *limbs* AT DST. */
342 #define MPN_ZERO(DST, NLIMBS) \
345 for (__i = 0; __i < (NLIMBS); __i++) \
350 #ifndef MPN_NORMALIZE
351 #define MPN_NORMALIZE(DST, NLIMBS) \
355 if ((DST)[(NLIMBS) - 1] != 0) \
361 #ifndef MPN_NORMALIZE_NOT_ZERO
362 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
366 if ((DST)[(NLIMBS) - 1] != 0) \
373 /* Strip least significant zero limbs from ptr,size by incrementing ptr and
374 decrementing size. The number in ptr,size must be non-zero, ie. size!=0
375 and somewhere a non-zero limb. */
376 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
379 ASSERT ((size) != 0); \
380 while ((ptr)[0] == 0) \
384 ASSERT (size >= 0); \
389 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
390 temporary variable; it will be automatically cleared out at function
391 return. We use __x here to make it possible to accept both mpz_ptr and
393 #define MPZ_TMP_INIT(X, NLIMBS) \
396 __x->_mp_alloc = (NLIMBS); \
397 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
400 /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
401 #define MPZ_REALLOC(what,needed) \
403 if ((needed) > ALLOC (what)) \
404 _mpz_realloc (what, needed); \
407 /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
408 value which is good on most machines. */
409 #ifndef KARATSUBA_MUL_THRESHOLD
410 #define KARATSUBA_MUL_THRESHOLD 32
413 /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
414 value which is good on most machines. */
415 #ifndef TOOM3_MUL_THRESHOLD
416 #define TOOM3_MUL_THRESHOLD 256
419 #ifndef KARATSUBA_SQR_THRESHOLD
420 #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
423 #ifndef TOOM3_SQR_THRESHOLD
424 #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
427 /* First k to use for an FFT modF multiply. A modF FFT is an order
428 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
429 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
430 #define FFT_FIRST_K 4
432 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
433 #ifndef FFT_MODF_MUL_THRESHOLD
434 #define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
436 #ifndef FFT_MODF_SQR_THRESHOLD
437 #define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
440 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
441 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
442 NxN->2N multiply and not recursing into itself is an order
443 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
444 is the first better than toom3. */
445 #ifndef FFT_MUL_THRESHOLD
446 #define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
448 #ifndef FFT_SQR_THRESHOLD
449 #define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
452 /* Table of thresholds for successive modF FFT "k"s. The first entry is
453 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
454 etc. See mpn_fft_best_k(). */
455 #ifndef FFT_MUL_TABLE
456 #define FFT_MUL_TABLE \
457 { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
458 TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
459 TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
460 TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
461 TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
462 TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
465 #ifndef FFT_SQR_TABLE
466 #define FFT_SQR_TABLE \
467 { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
468 TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
469 TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
470 TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
471 TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
472 TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
476 #ifndef FFT_TABLE_ATTRS
477 #define FFT_TABLE_ATTRS static const
480 #define MPN_FFT_TABLE_SIZE 16
483 /* Return non-zero if xp,xsize and yp,ysize overlap.
484 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
485 overlap. If both these are false, there's an overlap. */
486 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
487 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
490 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
491 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
492 does it always. Generally assertions are meant for development, but
493 might help when looking for a problem later too.
495 ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
496 assertion checking is disabled, the expression is still evaluated. This
497 is meant for use with routines like mpn_add_n() where the return value
498 represents a carry or whatever that shouldn't occur. For example,
499 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
502 #define ASSERT_LINE __LINE__
504 #define ASSERT_LINE -1
508 #define ASSERT_FILE __FILE__
510 #define ASSERT_FILE ""
513 int __gmp_assert_fail _PROTO((const char *filename, int linenum,
517 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
519 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
523 #define CAST_TO_VOID (void)
528 #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
531 #define ASSERT(expr) ASSERT_ALWAYS (expr)
532 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
535 #define ASSERT(expr) (CAST_TO_VOID 0)
536 #define ASSERT_NOCARRY(expr) (expr)
540 #if HAVE_NATIVE_mpn_com_n
541 #define mpn_com_n __MPN(com_n)
542 void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
544 #define mpn_com_n(d,s,n) \
548 mp_srcptr __s = (s); \
549 mp_size_t __n = (n); \
557 #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
561 mp_srcptr __s1 = (s1); \
562 mp_srcptr __s2 = (s2); \
563 mp_size_t __n = (n); \
565 *__d++ = dop (*__s1++ op s2op *__s2++); \
570 #if HAVE_NATIVE_mpn_and_n
571 #define mpn_and_n __MPN(and_n)
572 void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
574 #define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
577 #if HAVE_NATIVE_mpn_andn_n
578 #define mpn_andn_n __MPN(andn_n)
579 void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
581 #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
584 #if HAVE_NATIVE_mpn_nand_n
585 #define mpn_nand_n __MPN(nand_n)
586 void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
588 #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
591 #if HAVE_NATIVE_mpn_ior_n
592 #define mpn_ior_n __MPN(ior_n)
593 void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
595 #define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
598 #if HAVE_NATIVE_mpn_iorn_n
599 #define mpn_iorn_n __MPN(iorn_n)
600 void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
602 #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
605 #if HAVE_NATIVE_mpn_nior_n
606 #define mpn_nior_n __MPN(nior_n)
607 void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
609 #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
612 #if HAVE_NATIVE_mpn_xor_n
613 #define mpn_xor_n __MPN(xor_n)
614 void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
616 #define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
619 #if HAVE_NATIVE_mpn_xnor_n
620 #define mpn_xnor_n __MPN(xnor_n)
621 void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
623 #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
626 /* Structure for conversion between internal binary format and
627 strings in base 2..36. */
630 /* Number of digits in the conversion base that always fits in an mp_limb_t.
631 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
632 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
635 /* log(2)/log(conversion_base) */
636 double chars_per_bit_exactly;
638 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
639 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
640 i.e. the number of bits used to represent each digit in the base. */
643 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
644 fixed-point number. Instead of dividing by big_base an application can
645 choose to multiply by big_base_inverted. */
646 mp_limb_t big_base_inverted;
649 #define __mp_bases __MPN(mp_bases)
650 extern const struct bases __mp_bases[];
651 extern mp_size_t __gmp_default_fp_limb_precision;
653 #if defined (__i386__)
654 #define TARGET_REGISTER_STARVED 1
656 #define TARGET_REGISTER_STARVED 0
659 /* Use a library function for invert_limb, if available. */
660 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
661 #define mpn_invert_limb __MPN(invert_limb)
662 mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
663 #define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
667 #define invert_limb(invxl,xl) \
671 invxl = ~(mp_limb_t) 0; \
673 udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
677 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
678 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
679 If this would yield overflow, DI should be the largest possible number
680 (i.e., only ones). For correct operation, the most significant bit of D
681 has to be set. Put the quotient in Q and the remainder in R. */
682 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
684 mp_limb_t _q, _ql, _r; \
685 mp_limb_t _xh, _xl; \
686 umul_ppmm (_q, _ql, (nh), (di)); \
687 _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
688 umul_ppmm (_xh, _xl, _q, (d)); \
689 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
692 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
696 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
708 /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
709 so that its most significant bit is set. LGUP is ceil(log2(D)). */
710 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
712 mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
713 mp_limb_t _xh, _xl; \
714 _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
715 _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
716 _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
717 _nadj = _n10 + (_n1 & (dnorm)); \
718 umul_ppmm (_xh, _xl, di, _n2 - _n1); \
719 add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
720 _q1 = ~(_n2 + _xh); \
721 umul_ppmm (_xh, _xl, _q1, d); \
722 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
724 (r) = _xl + ((d) & _xh); \
727 /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
729 #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
731 mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
732 mp_limb_t _xh, _xl; \
735 _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
736 _nadj = _n10 + (_n1 & (d)); \
737 umul_ppmm (_xh, _xl, di, _n2 - _n1); \
738 add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
739 _q1 = ~(_n2 + _xh); \
740 umul_ppmm (_xh, _xl, _q1, d); \
741 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
743 (r) = _xl + ((d) & _xh); \
748 /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
749 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
750 "n" must be odd (otherwise such an inverse doesn't exist).
752 This is not to be confused with invert_limb(), which is completely
755 The table lookup gives an inverse with the low 8 bits valid, and each
756 multiply step doubles the number of bits. See Jebelean's exact division
757 paper, end of section 4 (reference in gmp.texi). */
759 #define modlimb_invert_table __gmp_modlimb_invert_table
760 extern const unsigned char modlimb_invert_table[128];
762 #if BITS_PER_MP_LIMB <= 32
763 #define modlimb_invert(inv,n) \
765 mp_limb_t __n = (n); \
767 ASSERT ((__n & 1) == 1); \
768 __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
769 __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
770 __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
771 ASSERT (__inv * __n == 1); \
776 #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
777 #define modlimb_invert(inv,n) \
779 mp_limb_t __n = (n); \
781 ASSERT ((__n & 1) == 1); \
782 __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
783 __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
784 __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
785 __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
786 ASSERT (__inv * __n == 1); \
792 /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
793 between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
795 #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
796 /* Define stuff for longlong.h. */
797 typedef unsigned int UQItype __attribute__ ((mode (QI)));
798 typedef int SItype __attribute__ ((mode (SI)));
799 typedef unsigned int USItype __attribute__ ((mode (SI)));
800 typedef int DItype __attribute__ ((mode (DI)));
801 typedef unsigned int UDItype __attribute__ ((mode (DI)));
803 typedef unsigned char UQItype;
805 typedef unsigned long USItype;
806 #if defined _LONGLONG || defined _LONG_LONG_LIMB
807 typedef long long int DItype;
808 typedef unsigned long long int UDItype;
809 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
810 typedef long int DItype;
811 typedef unsigned long int UDItype;
815 typedef mp_limb_t UWtype;
816 typedef unsigned int UHWtype;
817 #define W_TYPE_SIZE BITS_PER_MP_LIMB
819 /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
821 #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
822 /* Special case for little endian ARM since floats remain in big-endian. */
823 #define _GMP_IEEE_FLOATS 1
824 union ieee_double_extract
828 unsigned int manh:20;
831 unsigned int manl:32;
836 #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
837 || defined (__alpha) \
838 || defined (__clipper__) \
839 || defined (__cris) \
840 || defined (__i386__) \
841 || defined (__i860__) \
842 || defined (__i960__) \
843 || defined (MIPSEL) || defined (_MIPSEL) \
844 || defined (__ns32000__) \
845 || defined (__WINNT) || defined (_WIN32)
846 #define _GMP_IEEE_FLOATS 1
847 union ieee_double_extract
851 unsigned int manl:32;
852 unsigned int manh:20;
858 #else /* Need this as an #else since the tests aren't made exclusive. */
859 #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
860 || defined (__a29k__) || defined (_AM29K) \
861 || defined (__arm__) \
862 || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
863 || defined (_CRAYMPP) \
864 || defined (__i370__) || defined (__mvs__) \
865 || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
866 || defined(mc68020) \
867 || defined (__m88000__) \
868 || defined (MIPSEB) || defined (_MIPSEB) \
869 || defined (__hppa) || defined (__hppa__) \
870 || defined (__pyr__) \
871 || defined (__ibm032__) \
872 || defined (_IBMR2) || defined (_ARCH_PPC) \
873 || defined (__sh__) \
874 || defined (__sparc) || defined (sparc) \
875 || defined (__we32k__)
876 #define _GMP_IEEE_FLOATS 1
877 union ieee_double_extract
883 unsigned int manh:20;
884 unsigned int manl:32;
892 /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
893 SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
894 presumably due to treating the mp_limb_t constant as signed rather than
896 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
897 #if BITS_PER_MP_LIMB == 64
898 #define LIMBS_PER_DOUBLE 2
900 #define LIMBS_PER_DOUBLE 3
903 double __gmp_scale2 _PROTO ((double, int));
904 int __gmp_extract_double _PROTO ((mp_ptr, double));
906 extern int __gmp_junk;
907 extern const int __gmp_0;
908 #define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
909 #define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
910 #define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
912 #if defined _LONG_LONG_LIMB
913 #if defined (__STDC__)
914 #define CNST_LIMB(C) C##LL
916 #define CNST_LIMB(C) C/**/LL
918 #else /* not _LONG_LONG_LIMB */
919 #if defined (__STDC__)
920 #define CNST_LIMB(C) C##L
922 #define CNST_LIMB(C) C/**/L
924 #endif /* _LONG_LONG_LIMB */
926 /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
927 #if BITS_PER_MP_LIMB == 32
928 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
929 #define PP_INVERTED 0x53E5645CL
930 #define PP_MAXPRIME 29
931 #define PP_MASK 0x208A28A8L
934 #if BITS_PER_MP_LIMB == 64
935 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
936 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
937 #define PP_MAXPRIME 53
938 #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
942 /* BIT1 means a result value in bit 1 (second least significant bit), with a
943 zero bit representing +1 and a one bit representing -1. Bits other than
946 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
947 and their speed is important. Expressions are used rather than
948 conditionals to accumulate sign changes, which effectively means XORs
949 instead of conditional JUMPs. */
951 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
952 #define JACOBI_S0(a) \
953 (((a) == 1) | ((a) == -1))
955 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
956 #define JACOBI_U0(a) \
959 /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
960 An mpz_t always has at least one limb of allocated space, so the fetch of
961 the low limb is valid. */
962 #define JACOBI_Z0(a) \
963 (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
965 /* Convert a bit1 to +1 or -1. */
966 #define JACOBI_BIT1_TO_PN(result_bit1) \
967 (1 - ((result_bit1) & 2))
969 /* (2/b), with b unsigned and odd;
970 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
971 hence obtained from (b>>1)^b */
972 #define JACOBI_TWO_U_BIT1(b) \
973 (ASSERT (b & 1), (((b) >> 1) ^ (b)))
975 /* (2/b)^twos, with b unsigned and odd */
976 #define JACOBI_TWOS_U_BIT1(twos, b) \
977 (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
979 /* (2/b)^twos, with b unsigned and odd */
980 #define JACOBI_TWOS_U(twos, b) \
981 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
983 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
984 is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
985 #define JACOBI_ASGN_SU_BIT1(a, b) \
986 ((((a) < 0) << 1) & (b))
988 /* (a/b) effect due to sign of b: signed/mpz;
989 is -1 if a and b both negative, +1 otherwise */
990 #define JACOBI_BSGN_SZ_BIT1(a, b) \
991 ((((a) < 0) & (SIZ(b) < 0)) << 1)
993 /* (a/b) effect due to sign of b: mpz/signed */
994 #define JACOBI_BSGN_ZS_BIT1(a, b) \
995 JACOBI_BSGN_SZ_BIT1(b, a)
997 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
998 Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
999 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
1000 because this is used in a couple of places with only bit 1 of a or b
1002 #define JACOBI_RECIP_UU_BIT1(a, b) \
1006 /* For testing and debugging. */
1007 #define MPZ_CHECK_FORMAT(z) \
1008 (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
1009 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
1010 #define MPZ_PROVOKE_REALLOC(z) \
1011 do { ALLOC(z) = ABSIZ(z); } while (0)
1014 #if TUNE_PROGRAM_BUILD
1015 /* Some extras wanted when recompiling some .c files for use by the tune
1016 program. Not part of a normal build. */
1018 extern mp_size_t mul_threshold[];
1019 extern mp_size_t fft_modf_mul_threshold;
1020 extern mp_size_t sqr_threshold[];
1021 extern mp_size_t fft_modf_sqr_threshold;
1022 extern mp_size_t bz_threshold[];
1023 extern mp_size_t fib_threshold[];
1024 extern mp_size_t powm_threshold[];
1025 extern mp_size_t gcd_accel_threshold[];
1026 extern mp_size_t gcdext_threshold[];
1028 #undef KARATSUBA_MUL_THRESHOLD
1029 #undef TOOM3_MUL_THRESHOLD
1030 #undef FFT_MUL_TABLE
1031 #undef FFT_MUL_THRESHOLD
1032 #undef FFT_MODF_MUL_THRESHOLD
1033 #undef KARATSUBA_SQR_THRESHOLD
1034 #undef TOOM3_SQR_THRESHOLD
1035 #undef FFT_SQR_TABLE
1036 #undef FFT_SQR_THRESHOLD
1037 #undef FFT_MODF_SQR_THRESHOLD
1039 #undef FIB_THRESHOLD
1040 #undef POWM_THRESHOLD
1041 #undef GCD_ACCEL_THRESHOLD
1042 #undef GCDEXT_THRESHOLD
1044 #define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
1045 #define TOOM3_MUL_THRESHOLD mul_threshold[1]
1046 #define FFT_MUL_TABLE 0
1047 #define FFT_MUL_THRESHOLD mul_threshold[2]
1048 #define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
1049 #define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
1050 #define TOOM3_SQR_THRESHOLD sqr_threshold[1]
1051 #define FFT_SQR_TABLE 0
1052 #define FFT_SQR_THRESHOLD sqr_threshold[2]
1053 #define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
1054 #define BZ_THRESHOLD bz_threshold[0]
1055 #define FIB_THRESHOLD fib_threshold[0]
1056 #define POWM_THRESHOLD powm_threshold[0]
1057 #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
1058 #define GCDEXT_THRESHOLD gcdext_threshold[0]
1060 #define TOOM3_MUL_THRESHOLD_LIMIT 700
1062 #undef FFT_TABLE_ATTRS
1063 #define FFT_TABLE_ATTRS
1064 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
1066 #endif /* TUNE_PROGRAM_BUILD */
1068 #if defined (__cplusplus)