rts/gmp/mpn/generic/sb_divrem_mn.c

   1 /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
   2    quotient.
   3
   4    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
   5    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
   6    IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
   7    FUTURE GNU MP RELEASE.
   8
   9
  10 Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU Lesser General Public License as published by
  16 the Free Software Foundation; either version 2.1 of the License, or (at your
  17 option) any later version.
  18
  19 The GNU MP Library is distributed in the hope that it will be useful, but
  20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  22 License for more details.
  23
  24 You should have received a copy of the GNU Lesser General Public License
  25 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  26 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27 MA 02111-1307, USA. */
  28
  29 #include "gmp.h"
  30 #include "gmp-impl.h"
  31 #include "longlong.h"
  32
  33 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
  34    the NSIZE-DSIZE least significant quotient limbs at QP
  35    and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
  36    non-zero, generate that many fraction bits and append them after the
  37    other quotient limbs.
  38    Return the most significant limb of the quotient, this is always 0 or 1.
  39
  40    Preconditions:
  41    0. NSIZE >= DSIZE.
  42    1. The most significant bit of the divisor must be set.
  43    2. QP must either not overlap with the input operands at all, or
  44       QP + DSIZE >= NP must hold true.  (This means that it's
  45       possible to put the quotient in the high part of NUM, right after the
  46       remainder in NUM.
  47    3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
  48    4. DSIZE >= 2.  */
  49
  50
  51 #define PREINVERT_VIABLE \
  52   (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
  53
  54 mp_limb_t
  55 #if __STDC__
  56 mpn_sb_divrem_mn (mp_ptr qp,
  57                mp_ptr np, mp_size_t nsize,
  58                mp_srcptr dp, mp_size_t dsize)
  59 #else
  60 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
  61      mp_ptr qp;
  62      mp_ptr np;
  63      mp_size_t nsize;
  64      mp_srcptr dp;
  65      mp_size_t dsize;
  66 #endif
  67 {
  68   mp_limb_t most_significant_q_limb = 0;
  69   mp_size_t i;
  70   mp_limb_t dx, d1, n0;
  71   mp_limb_t dxinv;
  72   int have_preinv;
  73
  74   ASSERT_ALWAYS (dsize > 2);
  75
  76   np += nsize - dsize;
  77   dx = dp[dsize - 1];
  78   d1 = dp[dsize - 2];
  79   n0 = np[dsize - 1];
  80
  81   if (n0 >= dx)
  82     {
  83       if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
  84         {
  85           mpn_sub_n (np, np, dp, dsize);
  86           most_significant_q_limb = 1;
  87         }
  88     }
  89
  90   /* If multiplication is much faster than division, preinvert the
  91      most significant divisor limb before entering the loop.  */
  92   if (PREINVERT_VIABLE)
  93     {
  94       have_preinv = 0;
  95       if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
  96         {
  97           invert_limb (dxinv, dx);
  98           have_preinv = 1;
  99         }
 100     }
 101
 102   for (i = nsize - dsize - 1; i >= 0; i--)
 103     {
 104       mp_limb_t q;
 105       mp_limb_t nx;
 106       mp_limb_t cy_limb;
 107
 108       nx = np[dsize - 1];
 109       np--;
 110
 111       if (nx == dx)
 112         {
 113           /* This might over-estimate q, but it's probably not worth
 114              the extra code here to find out.  */
 115           q = ~(mp_limb_t) 0;
 116
 117 #if 1
 118           cy_limb = mpn_submul_1 (np, dp, dsize, q);
 119 #else
 120           /* This should be faster on many machines */
 121           cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
 122           cy = mpn_add_n (np, np, dp, dsize);
 123           np[dsize] += cy;
 124 #endif
 125
 126           if (nx != cy_limb)
 127             {
 128               mpn_add_n (np, np, dp, dsize);
 129               q--;
 130             }
 131
 132           qp[i] = q;
 133         }
 134       else
 135         {
 136           mp_limb_t rx, r1, r0, p1, p0;
 137
 138           /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
 139              usage when np[dsize-1] is used in an asm statement like
 140              umul_ppmm in udiv_qrnnd_preinv.  The symptom is seg faults due
 141              to registers being clobbered.  gcc 2.95 i386 doesn't have the
 142              problem. */
 143           {
 144             mp_limb_t  workaround = np[dsize - 1];
 145             if (PREINVERT_VIABLE && have_preinv)
 146               udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
 147             else
 148               udiv_qrnnd (q, r1, nx, workaround, dx);
 149           }
 150           umul_ppmm (p1, p0, d1, q);
 151
 152           r0 = np[dsize - 2];
 153           rx = 0;
 154           if (r1 < p1 || (r1 == p1 && r0 < p0))
 155             {
 156               p1 -= p0 < d1;
 157               p0 -= d1;
 158               q--;
 159               r1 += dx;
 160               rx = r1 < dx;
 161             }
 162
 163           p1 += r0 < p0;        /* cannot carry! */
 164           rx -= r1 < p1;        /* may become 11..1 if q is still too large */
 165           r1 -= p1;
 166           r0 -= p0;
 167
 168           cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
 169
 170           {
 171             mp_limb_t cy1, cy2;
 172             cy1 = r0 < cy_limb;
 173             r0 -= cy_limb;
 174             cy2 = r1 < cy1;
 175             r1 -= cy1;
 176             np[dsize - 1] = r1;
 177             np[dsize - 2] = r0;
 178             if (cy2 != rx)
 179               {
 180                 mpn_add_n (np, np, dp, dsize);
 181                 q--;
 182               }
 183           }
 184           qp[i] = q;
 185         }
 186     }
 187
 188   /* ______ ______ ______
 189     |__rx__|__r1__|__r0__|              partial remainder
 190             ______ ______
 191          - |__p1__|__p0__|              partial product to subtract
 192             ______ ______
 193          - |______|cylimb|
 194
 195      rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
 196      carry out).  If rx=-1 then q is too large.  If rx=0, then q might
 197      be too large, but it is most likely correct.
 198   */
 199
 200   return most_significant_q_limb;
 201 }