rts/gmp/mpn/generic/bz_divrem_n.c

   1 /* mpn_bz_divrem_n and auxilliary routines.
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
   4    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
   5    IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
   6    FUTURE GNU MP RELEASE.
   7
   8
   9 Copyright (C) 2000 Free Software Foundation, Inc.
  10 Contributed by Paul Zimmermann.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU Lesser General Public License as published by
  16 the Free Software Foundation; either version 2.1 of the License, or (at your
  17 option) any later version.
  18
  19 The GNU MP Library is distributed in the hope that it will be useful, but
  20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  22 License for more details.
  23
  24 You should have received a copy of the GNU Lesser General Public License
  25 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  26 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27 MA 02111-1307, USA. */
  28
  29 #include "gmp.h"
  30 #include "gmp-impl.h"
  31
  32 /*
  33 [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
  34     Technical report MPI-I-98-1-022, october 1998.
  35     http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
  36 */
  37
  38 static mp_limb_t mpn_bz_div_3_halves_by_2
  39   _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
  40
  41
  42 /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
  43    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
  44    i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
  45 #ifndef BZ_THRESHOLD
  46 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
  47 #endif
  48
  49 #if 0
  50 static
  51 unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
  52      mp_ptr qp;
  53      mp_size_t qxn;
  54      mp_ptr np;
  55      mp_size_t nn;
  56      mp_srcptr dp;
  57      mp_size_t dn;
  58 {
  59   /* This might be useful: */
  60   if (qxn != 0)
  61     {
  62       mp_limb_t c;
  63       mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
  64       MPN_COPY (tp + qxn - nn, np, nn);
  65       MPN_ZERO (tp, qxn);
  66       c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
  67       /* Maybe copy proper part of tp to np?  Documentation is unclear about
  68          the returned np value when qxn != 0 */
  69       return c;
  70     }
  71 }
  72 #endif
  73
  74
  75 /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
  76    by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
  77    Returns most significant limb of the quotient, which is 0 or 1.
  78    Requires that the most significant bit of the divisor is set.  */
  79
  80 mp_limb_t
  81 #if __STDC__
  82 mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
  83 #else
  84 mpn_bz_divrem_n (qp, np, dp, n)
  85      mp_ptr qp;
  86      mp_ptr np;
  87      mp_srcptr dp;
  88      mp_size_t n;
  89 #endif
  90 {
  91   mp_limb_t qhl, cc;
  92
  93   if (n % 2 != 0)
  94     {
  95       qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
  96       cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
  97       cc = mpn_sub_1 (np + n, np + n, 1, cc);
  98       if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
  99       while (cc)
 100         {
 101           qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
 102           cc -= mpn_add_n (np + 1, np + 1, dp, n);
 103         }
 104       qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
 105                         mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
 106     }
 107   else
 108     {
 109       mp_size_t n2 = n/2;
 110       qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
 111       qhl += mpn_add_1 (qp + n2, qp + n2, n2,
 112                         mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
 113     }
 114   return qhl;
 115 }
 116
 117
 118 /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
 119    the remainder in (np, 2n) */
 120
 121 static mp_limb_t
 122 #if __STDC__
 123 mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
 124 #else
 125 mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
 126      mp_ptr qp;
 127      mp_ptr np;
 128      mp_srcptr dp;
 129      mp_size_t n;
 130 #endif
 131 {
 132   mp_size_t twon = n + n;
 133   mp_limb_t qhl, cc;
 134   mp_ptr tmp;
 135   TMP_DECL (marker);
 136
 137   TMP_MARK (marker);
 138   if (n < BZ_THRESHOLD)
 139     qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
 140   else
 141     qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
 142   tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
 143   mpn_mul_n (tmp, qp, dp, n);
 144   cc = mpn_sub_n (np, np, tmp, twon);
 145   TMP_FREE (marker);
 146   if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
 147   while (cc)
 148     {
 149       qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
 150       cc -= mpn_add_n (np, np, dp, twon);
 151     }
 152   return qhl;
 153 }