From a796e51208fb0e56d66f223c01ee6a4fbcbf8e98 Mon Sep 17 00:00:00 2001 From: simonm Date: Fri, 5 Jun 1998 14:37:50 +0000 Subject: [PATCH] [project @ 1998-06-05 14:37:50 by simonm] Import GMP 2.0.2 --- ghc/rts/gmp/mpn/cray/gmp-mparam.h | 27 +++ ghc/rts/gmp/mpn/generic/add_n.c | 62 +++++ ghc/rts/gmp/mpn/generic/addmul_1.c | 65 ++++++ ghc/rts/gmp/mpn/generic/bdivmod.c | 129 +++++++++++ ghc/rts/gmp/mpn/generic/cmp.c | 56 +++++ ghc/rts/gmp/mpn/generic/divrem.c | 245 ++++++++++++++++++++ ghc/rts/gmp/mpn/generic/divrem_1.c | 58 +++++ ghc/rts/gmp/mpn/generic/dump.c | 20 ++ ghc/rts/gmp/mpn/generic/gcd.c | 402 ++++++++++++++++++++++++++++++++ ghc/rts/gmp/mpn/generic/gcd_1.c | 73 ++++++ ghc/rts/gmp/mpn/generic/gcdext.c | 441 ++++++++++++++++++++++++++++++++++++ 11 files changed, 1578 insertions(+) create mode 100644 ghc/rts/gmp/mpn/cray/gmp-mparam.h create mode 100644 ghc/rts/gmp/mpn/generic/add_n.c create mode 100644 ghc/rts/gmp/mpn/generic/addmul_1.c create mode 100644 ghc/rts/gmp/mpn/generic/bdivmod.c create mode 100644 ghc/rts/gmp/mpn/generic/cmp.c create mode 100644 ghc/rts/gmp/mpn/generic/divrem.c create mode 100644 ghc/rts/gmp/mpn/generic/divrem_1.c create mode 100644 ghc/rts/gmp/mpn/generic/dump.c create mode 100644 ghc/rts/gmp/mpn/generic/gcd.c create mode 100644 ghc/rts/gmp/mpn/generic/gcd_1.c create mode 100644 ghc/rts/gmp/mpn/generic/gcdext.c diff --git a/ghc/rts/gmp/mpn/cray/gmp-mparam.h b/ghc/rts/gmp/mpn/cray/gmp-mparam.h new file mode 100644 index 0000000..349c812 --- /dev/null +++ b/ghc/rts/gmp/mpn/cray/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 64 +#define BITS_PER_SHORTINT 32 +#define BITS_PER_CHAR 8 diff --git a/ghc/rts/gmp/mpn/generic/add_n.c b/ghc/rts/gmp/mpn/generic/add_n.c new file mode 100644 index 0000000..9d71df1 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/add_n.c @@ -0,0 +1,62 @@ +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/ghc/rts/gmp/mpn/generic/addmul_1.c b/ghc/rts/gmp/mpn/generic/addmul_1.c new file mode 100644 index 0000000..3a5e214 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/addmul_1.c @@ -0,0 +1,65 @@ +/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, add the S1_SIZE least significant limbs of the product to the + limb vector pointed to by RES_PTR. Return the most significant limb of + the product, adjusted for carry-out from the addition. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/ghc/rts/gmp/mpn/generic/bdivmod.c b/ghc/rts/gmp/mpn/generic/bdivmod.c new file mode 100644 index 0000000..f095288 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/bdivmod.c @@ -0,0 +1,129 @@ +/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d). + + Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and + returns the high d%BITS_PER_MP_LIMB bits of Q as the result. + + Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the + low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows + the limb vectors at qp to overwrite the low limbs at up, provided qp <= up. + + Preconditions: + 1. V is odd. + 2. usize * BITS_PER_MP_LIMB >= d. + 3. If Q and U overlap, qp <= up. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + References: + T. Jebelean, An algorithm for exact division, Journal of Symbolic + Computation, v. 15, 1993, pp. 169-180. + + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize, unsigned long int d) +#else +mpn_bdivmod (qp, up, usize, vp, vsize, d) + mp_ptr qp; + mp_ptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; + unsigned long int d; +#endif +{ + /* Cache for v_inv is used to make mpn_accelgcd faster. */ + static mp_limb_t previous_low_vlimb = 0; + static mp_limb_t v_inv; /* 1/V mod 2^BITS_PER_MP_LIMB. */ + + if (vp[0] != previous_low_vlimb) /* Cache miss; compute v_inv. */ + { + mp_limb_t v = previous_low_vlimb = vp[0]; + mp_limb_t make_zero = 1; + mp_limb_t two_i = 1; + v_inv = 0; + do + { + while ((two_i & make_zero) == 0) + two_i <<= 1, v <<= 1; + v_inv += two_i; + make_zero -= v; + } + while (make_zero); + } + + /* Need faster computation for some common cases in mpn_accelgcd. */ + if (usize == 2 && vsize == 2 && + (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB)) + { + mp_limb_t hi, lo; + mp_limb_t q = up[0] * v_inv; + umul_ppmm (hi, lo, q, vp[0]); + up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q; + if (d == 2*BITS_PER_MP_LIMB) + q = up[1] * v_inv, up[1] = 0, qp[1] = q; + return 0; + } + + /* Main loop. */ + while (d >= BITS_PER_MP_LIMB) + { + mp_limb_t q = up[0] * v_inv; + mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + d -= BITS_PER_MP_LIMB; + up += 1, usize -= 1; + *qp++ = q; + } + + if (d) + { + mp_limb_t b; + mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1< vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + return q; + } + + return 0; +} diff --git a/ghc/rts/gmp/mpn/generic/cmp.c b/ghc/rts/gmp/mpn/generic/cmp.c new file mode 100644 index 0000000..4e9c60d --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/cmp.c @@ -0,0 +1,56 @@ +/* mpn_cmp -- Compare two low-level natural-number integers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + There are no restrictions on the relative sizes of + the two arguments. + Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */ + +int +#if __STDC__ +mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) +#else +mpn_cmp (op1_ptr, op2_ptr, size) + mp_srcptr op1_ptr; + mp_srcptr op2_ptr; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) + { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + diff: + /* This can *not* be simplified to + op2_word - op2_word + since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/ghc/rts/gmp/mpn/generic/divrem.c b/ghc/rts/gmp/mpn/generic/divrem.c new file mode 100644 index 0000000..1fe865a --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/divrem.c @@ -0,0 +1,245 @@ +/* mpn_divrem -- Divide natural numbers, producing both remainder and + quotient. + +Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + the NSIZE-DSIZE least significant quotient limbs at QP + and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= DSIZE. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + DSIZE >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. */ + +mp_limb_t +#if __STDC__ +mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +#else +mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize) + mp_ptr qp; + mp_size_t qextra_limbs; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; + mp_size_t dsize; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + + switch (dsize) + { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + return 1 / dsize; + + case 1: + { + mp_size_t i; + mp_limb_t n1; + mp_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) + { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mp_size_t i; + mp_limb_t n1, n0, n2; + mp_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) + { + /* Q should be either 111..111 or 111..110. Need special + treatment of this rare case as normal division would + give overflow. */ + q = ~(mp_limb_t) 0; + + r = n0 + d1; + if (r < d1) /* Carry in the addition? */ + { + add_ssaaaa (n1, n0, r - d0, np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0); + n0 = -d0; + } + else + { + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + q_test: + if (n1 > r || (n1 == r && n0 > n2)) + { + /* The estimated Q was too large. */ + q--; + + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mp_size_t i; + mp_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) + { + if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0) + { + mpn_sub_n (np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t n1, n2; + mp_limb_t cy_limb; + + if (i >= qextra_limbs) + { + np--; + n2 = np[dsize]; + } + else + { + n2 = np[dsize - 1]; + MPN_COPY_DECR (np + 1, np, dsize); + np[0] = 0; + } + + if (n0 == dX) + /* This might over-estimate q, but it's probably not worth + the extra code here to find out. */ + q = ~(mp_limb_t) 0; + else + { + mp_limb_t r; + + udiv_qrnnd (q, r, n0, np[dsize - 1], dX); + umul_ppmm (n1, n0, d1, q); + + while (n1 > r || (n1 == r && n0 > np[dsize - 2])) + { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + after the calculation of q. Taking advantage of that, we + could make this loop make two iterations less. */ + + cy_limb = mpn_submul_1 (np, dp, dsize, q); + + if (n2 != cy_limb) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} diff --git a/ghc/rts/gmp/mpn/generic/divrem_1.c b/ghc/rts/gmp/mpn/generic/divrem_1.c new file mode 100644 index 0000000..d213267 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/divrem_1.c @@ -0,0 +1,58 @@ +/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_divrem_1 (mp_ptr qp, mp_size_t qsize, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_divrem_1 (qp, qsize, dividend_ptr, dividend_size, divisor_limb) + mp_ptr qp; + mp_size_t qsize; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_limb_t rlimb; + long i; + + /* Develop integer part of quotient. */ + rlimb = mpn_divmod_1 (qp + qsize, dividend_ptr, dividend_size, divisor_limb); + + if (qsize != 0) + { + for (i = qsize - 1; i >= 0; i--) + udiv_qrnnd (qp[i], rlimb, rlimb, 0, divisor_limb); + } + return rlimb; +} diff --git a/ghc/rts/gmp/mpn/generic/dump.c b/ghc/rts/gmp/mpn/generic/dump.c new file mode 100644 index 0000000..a5831c4 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/dump.c @@ -0,0 +1,20 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" + +void +mpn_dump (ptr, size) + mp_srcptr ptr; + mp_size_t size; +{ + if (size == 0) + printf ("0\n"); + { + while (size) + { + size--; + printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]); + } + printf ("\n"); + } +} diff --git a/ghc/rts/gmp/mpn/generic/gcd.c b/ghc/rts/gmp/mpn/generic/gcd.c new file mode 100644 index 0000000..8c2bbf0 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/gcd.c @@ -0,0 +1,402 @@ +/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Integer greatest common divisor of two unsigned integers, using + the accelerated algorithm (see reference below). + + mp_size_t mpn_gcd (vp, vsize, up, usize). + + Preconditions [U = (up, usize) and V = (vp, vsize)]: + + 1. V is odd. + 2. numbits(U) >= numbits(V). + + Both U and V are destroyed by the operation. The result is left at vp, + and its size is returned. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + Refer to + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is + used, otherwise the binary algorithm is used. This may be adjusted for + different architectures. */ +#ifndef ACCEL_THRESHOLD +#define ACCEL_THRESHOLD 4 +#endif + +/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated + algorithm reduces using the bmod operation. Otherwise, the k-ary reduction + is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */ +enum + { + BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 + }; + +#define SIGN_BIT (~(~(mp_limb_t)0 >> 1)) + + +#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0) +#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0) +#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0) +#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0) + +/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. + Both U and V must be odd. */ +static __gmp_inline mp_size_t +#if __STDC__ +gcd_2 (mp_ptr vp, mp_srcptr up) +#else +gcd_2 (vp, up) + mp_ptr vp; + mp_srcptr up; +#endif +{ + mp_limb_t u0, u1, v0, v1; + mp_size_t vsize; + + u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1]; + + while (u1 != v1 && u0 != v0) + { + unsigned long int r; + if (u1 > v1) + { + u1 -= v1 + (u0 < v0), u0 -= v0; + count_trailing_zeros (r, u0); + u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r; + u1 >>= r; + } + else /* u1 < v1. */ + { + v1 -= u1 + (v0 < u0), v0 -= u0; + count_trailing_zeros (r, v0); + v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r; + v1 >>= r; + } + } + + vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0); + + /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ + if (u1 == v1 && u0 == v0) + return vsize; + + v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0; + vp[0] = mpn_gcd_1 (vp, vsize, v0); + + return 1; +} + +/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists + 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB). + In the reference article, D was computed along with N, but it is better to + compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating + the result as a twos' complement signed integer. + + Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference + article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use + 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double + precision. If N2 > N1 initially, the first iteration of the while loop + will swap them. In all other situations, N1 >= N2 is maintained. */ + +static __gmp_inline mp_limb_t +#if __STDC__ +find_a (mp_srcptr cp) +#else +find_a (cp) + mp_srcptr cp; +#endif +{ + unsigned long int leading_zero_bits = 0; + + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_h = cp[1]; + + mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ + mp_limb_t n2_h = ~n1_h; + + /* Main loop. */ + while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + { + /* N1 <-- N1 % N2. */ + if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0) + { + unsigned long int i; + count_leading_zeros (i, n2_h); + i -= leading_zero_bits, leading_zero_bits += i; + n2_h = n2_h<>(BITS_PER_MP_LIMB - i), n2_l <<= i; + do + { + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + i -= 1; + } + while (i); + } + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + + SWAP_LIMB (n1_h, n2_h); + SWAP_LIMB (n1_l, n2_l); + } + + return n2_l; +} + +mp_size_t +#if __STDC__ +mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize) +#else +mpn_gcd (gp, vp, vsize, up, usize) + mp_ptr gp; + mp_ptr vp; + mp_size_t vsize; + mp_ptr up; + mp_size_t usize; +#endif +{ + mp_ptr orig_vp = vp; + mp_size_t orig_vsize = vsize; + int binary_gcd_ctr; /* Number of times binary gcd will execute. */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD. + Two EXTRA limbs for U and V are required for kary reduction. */ + if (vsize > ACCEL_THRESHOLD) + { + unsigned long int vbitsize, d; + mp_ptr orig_up = up; + mp_size_t orig_usize = usize; + mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB); + + MPN_COPY (anchor_up, orig_up, usize); + up = anchor_up; + + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + /* Use bmod reduction to quickly discover whether V divides U. */ + up[usize++] = 0; /* Insert leading zero. */ + mpn_bdivmod (up, up, usize, vp, vsize, d); + + /* Now skip U/V mod 2^d and any low zero limbs. */ + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + while (usize != 0 && up[0] == 0) + up++, usize--; + + if (usize == 0) /* GCD == ORIG_V. */ + goto done; + + vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB); + MPN_COPY (vp, orig_vp, vsize); + + do /* Main loop. */ + { + if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */ + { + mp_size_t i; + anchor_up[0] = -up[0]; + for (i = 1; i < usize; i++) + anchor_up[i] = ~up[i]; + up = anchor_up; + } + + MPN_NORMALIZE_NOT_ZERO (up, usize); + + if ((up[0] & 1) == 0) /* Result even; remove twos. */ + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (anchor_up, up, usize, r); + usize -= (anchor_up[usize-1] == 0); + } + else if (anchor_up != up) + MPN_COPY (anchor_up, up, usize); + + SWAP_MPN (anchor_up, usize, vp, vsize); + up = anchor_up; + + if (vsize <= 2) /* Kary can't handle < 2 limbs and */ + break; /* isn't efficient for == 2 limbs. */ + + d = vbitsize; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + if (d > BMOD_THRESHOLD) /* Bmod reduction. */ + { + up[usize++] = 0; + mpn_bdivmod (up, up, usize, vp, vsize, d); + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + } + else /* Kary reduction. */ + { + mp_limb_t bp[2], cp[2]; + + /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ + cp[0] = vp[0], cp[1] = vp[1]; + mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB); + + /* U <-- find_a (C) * U. */ + up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); + usize++; + + /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). + bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and + bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */ + bp[0] = up[0], bp[1] = up[1]; + mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB); + bp[1] &= 1; /* Since V is odd, division is unnecessary. */ + + up[usize++] = 0; + if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ + { + mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]); + mpn_add_1 (up + vsize, up + vsize, usize - vsize, c); + } + else /* B >= 0: U <-- U - B * V. */ + { + mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]); + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + } + + up += 2, usize -= 2; /* At least two low limbs are zero. */ + } + + /* Must remove low zero limbs before complementing. */ + while (usize != 0 && up[0] == 0) + up++, usize--; + } + while (usize); + + /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */ + up = orig_up, usize = orig_usize; + binary_gcd_ctr = 2; + } + else + binary_gcd_ctr = 1; + + /* Finish up with the binary algorithm. Executes once or twice. */ + for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize) + { + if (usize > 2) /* First make U close to V in size. */ + { + unsigned long int vbitsize, d; + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize - 1; + if (d != -(unsigned long int)1 && d > 2) + { + mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */ + d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d; + } + } + + /* Start binary GCD. */ + do + { + mp_size_t zeros; + + /* Make sure U is odd. */ + MPN_NORMALIZE (up, usize); + while (up[0] == 0) + up += 1, usize -= 1; + if ((up[0] & 1) == 0) + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (up, up, usize, r); + usize -= (up[usize-1] == 0); + } + + /* Keep usize >= vsize. */ + if (usize < vsize) + SWAP_MPN (up, usize, vp, vsize); + + if (usize <= 2) /* Double precision. */ + { + if (vsize == 1) + vp[0] = mpn_gcd_1 (up, usize, vp[0]); + else + vsize = gcd_2 (vp, up); + break; /* Binary GCD done. */ + } + + /* Count number of low zero limbs of U - V. */ + for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; ) + continue; + + /* If U < V, swap U and V; in any case, subtract V from U. */ + if (zeros == vsize) /* Subtract done. */ + up += zeros, usize -= zeros; + else if (usize == vsize) + { + mp_size_t size = vsize; + do + size--; + while (up[size] == vp[size]); + if (up[size] < vp[size]) /* usize == vsize. */ + SWAP_PTR (up, vp); + up += zeros, usize = size + 1 - zeros; + mpn_sub_n (up, up, vp + zeros, usize); + } + else + { + mp_size_t size = vsize - zeros; + up += zeros, usize -= zeros; + if (mpn_sub_n (up, up, vp + zeros, size)) + { + while (up[size] == 0) /* Propagate borrow. */ + up[size++] = -(mp_limb_t)1; + up[size] -= 1; + } + } + } + while (usize); /* End binary GCD. */ + } + +done: + if (vp != gp) + MPN_COPY (gp, vp, vsize); + TMP_FREE (marker); + return vsize; +} diff --git a/ghc/rts/gmp/mpn/generic/gcd_1.c b/ghc/rts/gmp/mpn/generic/gcd_1.c new file mode 100644 index 0000000..ebcdfb5 --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/gcd_1.c @@ -0,0 +1,73 @@ +/* mpn_gcd_1 -- + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Does not work for U == 0 or V == 0. It would be tough to make it work for + V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */ + +mp_limb_t +mpn_gcd_1 (up, size, vlimb) + mp_srcptr up; + mp_size_t size; + mp_limb_t vlimb; +{ + mp_limb_t ulimb; + unsigned long int u_low_zero_bits, v_low_zero_bits; + + if (size > 1) + { + ulimb = mpn_mod_1 (up, size, vlimb); + if (ulimb == 0) + return vlimb; + } + else + ulimb = up[0]; + + /* Need to eliminate low zero bits. */ + count_trailing_zeros (u_low_zero_bits, ulimb); + ulimb >>= u_low_zero_bits; + + count_trailing_zeros (v_low_zero_bits, vlimb); + vlimb >>= v_low_zero_bits; + + while (ulimb != vlimb) + { + if (ulimb > vlimb) + { + ulimb -= vlimb; + do + ulimb >>= 1; + while ((ulimb & 1) == 0); + } + else /* vlimb > ulimb. */ + { + vlimb -= ulimb; + do + vlimb >>= 1; + while ((vlimb & 1) == 0); + } + } + + return ulimb << MIN (u_low_zero_bits, v_low_zero_bits); +} diff --git a/ghc/rts/gmp/mpn/generic/gcdext.c b/ghc/rts/gmp/mpn/generic/gcdext.c new file mode 100644 index 0000000..245e20a --- /dev/null +++ b/ghc/rts/gmp/mpn/generic/gcdext.c @@ -0,0 +1,441 @@ +/* mpn_gcdext -- Extended Greatest Common Divisor. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef EXTEND +#define EXTEND 1 +#endif + +#if STAT +int arr[BITS_PER_MP_LIMB]; +#endif + +#define SGN(A) (((A) < 0) ? -1 : ((A) > 0)) + +/* Idea 1: After we have performed a full division, don't shift operands back, + but instead account for the extra factors-of-2 thus introduced. + Idea 2: Simple generalization to use divide-and-conquer would give us an + algorithm that runs faster than O(n^2). + Idea 3: The input numbers need less space as the computation progresses, + while the s0 and s1 variables need more space. To save space, we + could make them share space, and have the latter variables grow + into the former. */ + +/* Precondition: U >= V. */ + +mp_size_t +#if EXTEND +#if __STDC__ +mpn_gcdext (mp_ptr gp, mp_ptr s0p, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcdext (gp, s0p, up, size, vp, vsize) + mp_ptr gp; + mp_ptr s0p; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#else +#if __STDC__ +mpn_gcd (mp_ptr gp, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcd (gp, up, size, vp, vsize) + mp_ptr gp; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#endif +{ + mp_limb_t uh, vh; + mp_limb_signed_t A, B, C, D; + int cnt; + mp_ptr tp, wp; +#if RECORD + mp_limb_signed_t min = 0, max = 0; +#endif +#if EXTEND + mp_ptr s1p; + mp_ptr orig_s0p = s0p; + mp_size_t ssize, orig_size = size; + TMP_DECL (mark); + + TMP_MARK (mark); + + tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB); + + MPN_ZERO (s0p, size); + MPN_ZERO (s1p, size); + + s0p[0] = 1; + s1p[0] = 0; + ssize = 1; +#endif + + if (size > vsize) + { + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + /* This is really what it boils down to in this case... */ + s0p[0] = 0; + s1p[0] = 1; +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + } + + for (;;) + { + /* Figure out exact size of V. */ + vsize = size; + MPN_NORMALIZE (vp, vsize); + if (vsize <= 1) + break; + + /* Make UH be the most significant limb of U, and make VH be + corresponding bits from V. */ + uh = up[size - 1]; + vh = vp[size - 1]; + count_leading_zeros (cnt, uh); + if (cnt != 0) + { + uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + } + +#if 0 + /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes + room for sign bit. */ + uh >>= 1; + vh >>= 1; +#endif + A = 1; + B = 0; + C = 0; + D = 1; + + for (;;) + { + mp_limb_signed_t q, T; + if (vh + C == 0 || vh + D == 0) + break; + + q = (uh + A) / (vh + C); + if (q != (uh + B) / (vh + D)) + break; + + T = A - q * C; + A = C; + C = T; + T = B - q * D; + B = D; + D = T; + T = uh - q * vh; + uh = vh; + vh = T; + } + +#if RECORD + min = MIN (A, min); min = MIN (B, min); + min = MIN (C, min); min = MIN (D, min); + max = MAX (A, max); max = MAX (B, max); + max = MAX (C, max); max = MAX (D, max); +#endif + + if (B == 0) + { + mp_limb_t qh; + mp_size_t i; + + /* This is quite rare. I.e., optimize something else! */ + + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + qh = mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size - vsize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); + if (cy != 0) + tp[ssize++] = cy; + } + if (qh != 0) + { + mp_limb_t cy; + abort (); + /* XXX since qh == 1, mpn_addmul_1 is overkill */ + cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */ + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + MPN_NORMALIZE (up, size); + } + else + { + /* T = U*A + V*B + W = U*C + V*D + U = T + V = W */ + + if (SGN(A) == SGN(B)) /* should be different sign */ + abort (); + if (SGN(C) == SGN(D)) /* should be different sign */ + abort (); +#if STAT + { mp_limb_t x; + x = ABS (A) | ABS (B) | ABS (C) | ABS (D); + count_leading_zeros (cnt, x); + arr[BITS_PER_MP_LIMB - cnt]++; } +#endif + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, vp, size); + } + else + { + if (A < 0) + { + mpn_mul_1 (tp, vp, size, B); + mpn_submul_1 (tp, up, size, -A); + } + else + { + mpn_mul_1 (tp, up, size, A); + mpn_submul_1 (tp, vp, size, -B); + } + } + if (C < 0) + { + mpn_mul_1 (wp, vp, size, D); + mpn_submul_1 (wp, up, size, -C); + } + else + { + mpn_mul_1 (wp, up, size, C); + mpn_submul_1 (wp, vp, size, -D); + } + + { + mp_ptr xp; + xp = tp; tp = up; up = xp; + xp = wp; wp = vp; vp = xp; + } + +#if EXTEND + { mp_limb_t cy; + MPN_ZERO (tp, orig_size); + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, s1p, ssize); + } + else + { + if (A < 0) + { + cy = mpn_mul_1 (tp, s1p, ssize, B); + cy += mpn_addmul_1 (tp, s0p, ssize, -A); + } + else + { + cy = mpn_mul_1 (tp, s0p, ssize, A); + cy += mpn_addmul_1 (tp, s1p, ssize, -B); + } + if (cy != 0) + tp[ssize++] = cy; + } + MPN_ZERO (wp, orig_size); + if (C < 0) + { + cy = mpn_mul_1 (wp, s1p, ssize, D); + cy += mpn_addmul_1 (wp, s0p, ssize, -C); + } + else + { + cy = mpn_mul_1 (wp, s0p, ssize, C); + cy += mpn_addmul_1 (wp, s1p, ssize, -D); + } + if (cy != 0) + wp[ssize++] = cy; + } + { + mp_ptr xp; + xp = tp; tp = s0p; s0p = xp; + xp = wp; wp = s1p; s1p = xp; + } +#endif +#if 0 /* Is it a win to remove multiple zeros here? */ + MPN_NORMALIZE (up, size); +#else + if (up[size - 1] == 0) + size--; +#endif + } + } + +#if RECORD + printf ("min: %ld\n", min); + printf ("max: %ld\n", max); +#endif + + if (vsize == 0) + { + if (gp != up) + MPN_COPY (gp, up, size); +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return size; + } + else + { + mp_limb_t vl, ul, t; +#if EXTEND + mp_limb_t cy; + mp_size_t i; +#endif + vl = vp[0]; +#if EXTEND + t = mpn_divmod_1 (wp, up, size, vl); + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size; i++) + { + cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#else + t = mpn_mod_1 (up, size, vl); +#endif + ul = vl; + vl = t; + while (vl != 0) + { + mp_limb_t t; +#if EXTEND + mp_limb_t q, cy; + q = ul / vl; + t = ul - q*vl; + + MPN_COPY (tp, s0p, ssize); + cy = mpn_addmul_1 (tp, s1p, ssize, q); + if (cy != 0) + tp[ssize++] = cy; +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif + +#else + t = ul % vl; +#endif + ul = vl; + vl = t; + } + gp[0] = ul; +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return 1; + } +} -- 1.7.10.4