ghc/rts/gmp/mpn/generic/bz_divrem_n.c

   1 /* mpn_bz_divrem_n and auxilliary routines.
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
   4    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
   5    IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
   6    FUTURE GNU MP RELEASE.
   7
   8
   9 Copyright (C) 2000 Free Software Foundation, Inc.
  10 Contributed by Paul Zimmermann.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU Lesser General Public License as published by
  16 the Free Software Foundation; either version 2.1 of the License, or (at your
  17 option) any later version.
  18
  19 The GNU MP Library is distributed in the hope that it will be useful, but
  20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  22 License for more details.
  23
  24 You should have received a copy of the GNU Lesser General Public License
  25 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  26 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27 MA 02111-1307, USA. */
  28
  29 #include "gmp.h"
  30 #include "gmp-impl.h"
  31
  32 /*
  33 [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
  34     Technical report MPI-I-98-1-022, october 1998.
  35     http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
  36 */
  37
  38 static mp_limb_t mpn_bz_div_3_halves_by_2 _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
  39                                                    mp_size_t, mp_ptr));
  40
  41 static mp_limb_t mpn_bz_divrem_aux _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
  42                                             mp_size_t, mp_ptr));
  43
  44 /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
  45    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
  46    i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
  47 #ifndef BZ_THRESHOLD
  48 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
  49 #endif
  50
  51 #if 0
  52 static
  53 unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
  54      mp_ptr qp;
  55      mp_size_t qxn;
  56      mp_ptr np;
  57      mp_size_t nn;
  58      mp_srcptr dp;
  59      mp_size_t dn;
  60 {
  61   /* This might be useful: */
  62   if (qxn != 0)
  63     {
  64       mp_limb_t c;
  65       mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
  66       MPN_COPY (tp + qxn - nn, np, nn);
  67       MPN_ZERO (tp, qxn);
  68       c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
  69       /* Maybe copy proper part of tp to np?  Documentation is unclear about
  70          the returned np value when qxn != 0 */
  71       return c;
  72     }
  73 }
  74 #endif
  75
  76 /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
  77    by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
  78    Returns most significant limb of the quotient, which is 0 or 1.
  79    Requires that the most significant bit of the divisor is set.  */
  80
  81 mp_limb_t
  82 #if __STDC__
  83 mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
  84 #else
  85 mpn_bz_divrem_n (qp, np, dp, n)
  86      mp_ptr qp;
  87      mp_ptr np;
  88      mp_srcptr dp;
  89      mp_size_t n;
  90 #endif
  91 {
  92   mp_limb_t qhl = 0;
  93   if (mpn_cmp (np + n, dp, n) >= 0)
  94     {
  95       qhl = 1;
  96       mpn_sub_n (np + n, np + n, dp, n);
  97       abort ();
  98     }
  99   if (n % 2 != 0)
 100     {
 101       /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
 102       if (n < BZ_THRESHOLD)
 103         qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1);
 104       else
 105         qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
 106       /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
 107          (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
 108       if (mpn_sub_1 (np + n, np + n, 1,
 109                      mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
 110         {
 111           /* quotient too large */
 112           qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
 113           if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
 114             { /* still too large */
 115               qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
 116               mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
 117             }
 118         }
 119       /* now divide (np, n + 1) by (dp, n) */
 120       qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
 121                         mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
 122     }
 123   else
 124     {
 125       mp_ptr tmp;
 126       mp_size_t n2 = n/2;
 127       TMP_DECL (marker);
 128       TMP_MARK (marker);
 129       tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
 130       qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
 131       qhl += mpn_add_1 (qp + n2, qp + n2, n2,
 132                        mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
 133       TMP_FREE (marker);
 134     }
 135   return qhl;
 136 }
 137
 138 /* Like mpn_bz_divrem_n, but without memory allocation.  Also
 139    assumes mpn_cmp (np + n, dp, n) < 0 */
 140
 141 static mp_limb_t
 142 #if __STDC__
 143 mpn_bz_divrem_aux (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr tmp)
 144 #else
 145 mpn_bz_divrem_aux (qp, np, dp, n, tmp)
 146      mp_ptr qp;
 147      mp_ptr np;
 148      mp_srcptr dp;
 149      mp_size_t n;
 150      mp_ptr tmp;
 151 #endif
 152 {
 153   mp_limb_t qhl;
 154
 155   if (n % 2 != 0)
 156     {
 157       /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
 158       qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp);
 159       /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
 160          (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
 161       if (mpn_sub_1 (np + n, np + n, 1,
 162                      mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
 163         {
 164           /* quotient too large */
 165           qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
 166           if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
 167             { /* still too large */
 168               qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
 169               mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
 170             }
 171         }
 172       /* now divide (np, n + 1) by (dp, n) */
 173       qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
 174                         mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
 175     }
 176   else
 177     {
 178       mp_size_t n2 = n/2;
 179       qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
 180       qhl += mpn_add_1 (qp + n2, qp + n2, n2,
 181                        mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
 182     }
 183   return qhl;
 184 }
 185
 186 /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
 187    the remainder in (np, 2n) */
 188
 189 static mp_limb_t
 190 #if __STDC__
 191 mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
 192                           mp_ptr tmp)
 193 #else
 194 mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp)
 195      mp_ptr qp;
 196      mp_ptr np;
 197      mp_srcptr dp;
 198      mp_size_t n;
 199      mp_ptr tmp;
 200 #endif
 201 {
 202   mp_size_t twon = n + n;
 203   mp_limb_t qhl;
 204
 205   if (n < BZ_THRESHOLD)
 206     qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
 207   else
 208     qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp);
 209   /* q = (qp, n), c = (np + n, n) with the notations of [1] */
 210   mpn_mul_n (tmp, qp, dp, n);
 211   if (qhl != 0)
 212     mpn_add_n (tmp + n, tmp + n, dp, n);
 213   if (mpn_sub_n (np, np, tmp, twon))            /* R = (np, 2n) */
 214     {
 215       qhl -= mpn_sub_1 (qp, qp, n, 1L);
 216       if (mpn_add_n (np, np, dp, twon) == 0)
 217         { /* qp still too large */
 218           qhl -= mpn_sub_1 (qp, qp, n, 1L);
 219           mpn_add_n (np, np, dp, twon);         /* always carry here */
 220         }
 221     }
 222   return qhl;
 223 }