--- /dev/null
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 ! 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,LL-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+#endif
+ jmp %g3+%g1
+ nop
+LL:
+LL00: add %o0,-4,%o0
+ b Loop00 /* 4, 8, 12, ... */
+ add %o1,-4,%o1
+ nop
+LL01: b Loop01 /* 1, 5, 9, ... */
+ nop
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ b Loop10
+ add %o1,4,%o1
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ b Loop11
+ add %o1,-8,%o1
+ nop
+
+1: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ rd %y,%g2 ! 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ ld [%o0+4],%g1 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ rd %y,%g2 ! 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ rd %y,%g2 ! 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg 1b
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+
+
+! umul, ld, addxcc, rd, st
+
+! umul, ld, addxcc, rd, ld, addcc, st, addx
+
--- /dev/null
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 8
+ .global C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,LL-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+#endif
+ jmp %g3+%g1
+ ld [%o1+0],%o4 ! 1
+LL:
+LL00: add %o0,-4,%o0
+ add %o1,-4,%o1
+ b Loop00 /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+LL01: b Loop01 /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ add %o1,4,%o1
+ b Loop10
+ orcc %g0,%g0,%g2
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ add %o1,-8,%o1
+ b Loop11
+ orcc %g0,%g0,%g2
+
+Loop: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ st %g3,[%o0+0] ! 1
+ rd %y,%g2 ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ st %g3,[%o0+4] ! 2
+ rd %y,%g2 ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] ! 3
+ rd %y,%g2 ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg Loop
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ st %g3,[%o0+0] ! 4
+ rd %y,%g2 ! 4
+
+ retl
+ addx %g0,%g2,%o0
--- /dev/null
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+ sub %g0,%o2,%o2 ! negate ...
+ sll %o2,2,%o2 ! ... and scale size
+ sub %o1,%o2,%o1 ! o1 is offset s1_ptr
+ sub %o0,%o2,%g1 ! g1 is offset res_ptr
+
+ mov 0,%o0 ! clear cy_limb
+
+Loop: ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne Loop
+ nop
+
+ retl
+ nop
--- /dev/null
+! SuperSPARC __udiv_qrnnd division support, used from longlong.h.
+! This is for SuperSPARC only, to compensate for its semi-functional
+! udiv instruction.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr i0
+! n1 i1
+! n0 i2
+! d i3
+
+#include "sysdep.h"
+#undef ret /* Kludge for glibc */
+
+ .text
+ .align 8
+LC0: .double 0r4294967296
+LC1: .double 0r2147483648
+
+ .align 4
+ .global C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+ !#PROLOGUE# 0
+ save %sp,-104,%sp
+ !#PROLOGUE# 1
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+ sethi %hi(LC0),%o7
+ fitod %f10,%f4
+ ldd [%o7+%lo(LC0)],%f8
+ cmp %i1,0
+ bge L248
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L248:
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L249
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L249:
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L250
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L250:
+ fdivd %f2,%f4,%f2
+ sethi %hi(LC1),%o7
+ ldd [%o7+%lo(LC1)],%f4
+ fcmped %f2,%f4
+ nop
+ fbge,a L251
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L252
+ ld [%fp-8],%i4
+L251:
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L252:
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L253
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L253:
+ blu L246
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L246:
+ st %o7,[%i5]
+ ret
+ restore
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+
+mp_limb_t
+#if __STDC__
+refmpn_add_n (mp_ptr res_ptr,
+ mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to one addend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x + y; /* add other addend */
+ cy = (y < x) + cy; /* get out carry from that add, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t s2[SIZE];
+ mp_limb_t dx[SIZE+1];
+ mp_limb_t dy[SIZE+1];
+ int cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+
+ mpn_random2 (s1, size);
+ mpn_random2 (s2, size);
+
+ dx[size] = 0x12345678;
+ dy[size] = 0x12345678;
+
+#ifdef PRINT
+ mpn_print (s1, size);
+ mpn_print (s2, size);
+#endif
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_add_n (dx, s1, s2, size);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_add_n: %ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = mpn_add_n (dx, s1, s2, size);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_add_n: %ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+#ifndef NOCHECK
+ /* Put garbage in the destination. */
+ for (i = 0; i < size; i++)
+ {
+ dx[i] = 0x7654321;
+ dy[i] = 0x1234567;
+ }
+
+ cyx = refmpn_add_n (dx, s1, s2, size);
+ cyy = mpn_add_n (dy, s1, s2, size);
+ if (cyx != cyy || mpn_cmp (dx, dy, size) != 0
+ || dx[size] != 0x12345678 || dy[size] != 0x12345678)
+ {
+#ifndef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+ printf ("%d ", cyy); mpn_print (dy, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x + prod_low;
+ cy_limb += (prod_low < x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t dx[SIZE+2];
+ mp_limb_t dy[SIZE+2];
+ mp_limb_t cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ mp_limb_t xlimb;
+ mp_size_t size;
+ double cyc;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+
+ mpn_random2 (s1, size);
+ mpn_random2 (dy+1, size);
+
+ if (random () % 0x100 == 0)
+ xlimb = 0;
+ else
+ mpn_random2 (&xlimb, 1);
+
+ dy[size+1] = 0x12345678;
+ dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+ printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+ mpn_print (dy+1, size);
+ mpn_print (s1, size);
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ cyc = ((double) t * CLOCK) / (OPS * 1000.0);
+ printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
+ t,
+ cyc,
+ CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyy = mpn_addmul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ cyc = ((double) t * CLOCK) / (OPS * 1000.0);
+ printf ("mpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
+ t,
+ cyc,
+ CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
+ cyy = mpn_addmul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+ if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+ || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+ {
+#ifndef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 1000
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+main ()
+{
+ mp_limb_t nptr[SIZE];
+ mp_limb_t qptr[SIZE];
+ mp_limb_t pptr[SIZE];
+ mp_limb_t dlimb, rlimb, plimb;
+ mp_size_t nsize, qsize, psize;
+ int test;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ nsize = random () % SIZE + 1;
+#else
+ nsize = SIZE;
+#endif
+
+ mpn_random2 (nptr, nsize);
+
+ mpn_random2 (&dlimb, 1);
+ if (dlimb == 0)
+ abort ();
+
+ rlimb = mpn_divmod_1 (qptr, nptr, nsize, dlimb);
+ qsize = nsize - (qptr[nsize - 1] == 0);
+ if (qsize == 0)
+ {
+ plimb = rlimb;
+ psize = qsize;
+ }
+ else
+ {
+ plimb = mpn_mul_1 (pptr, qptr, qsize, dlimb);
+ psize = qsize;
+ plimb += mpn_add_1 (pptr, pptr, psize, rlimb);
+ }
+ if (plimb != 0)
+ pptr[psize++] = plimb;
+
+
+ if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)
+ abort ();
+ }
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 100
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+main ()
+{
+ mp_limb_t nptr[2 * SIZE];
+ mp_limb_t dptr[SIZE];
+ mp_limb_t qptr[2 * SIZE];
+ mp_limb_t pptr[2 * SIZE];
+ mp_limb_t rptr[2 * SIZE];
+ mp_size_t nsize, dsize, qsize, rsize, psize;
+ int test;
+ mp_limb_t qlimb;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ nsize = random () % (2 * SIZE) + 1;
+ dsize = random () % nsize + 1;
+#else
+ nsize = 2 * SIZE;
+ dsize = SIZE;
+#endif
+
+ mpn_random2 (nptr, nsize);
+ mpn_random2 (dptr, dsize);
+ dptr[dsize - 1] |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+
+ MPN_COPY (rptr, nptr, nsize);
+ qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize);
+ rsize = dsize;
+ qsize = nsize - dsize;
+ qptr[qsize] = qlimb;
+ qsize += qlimb;
+ if (qsize == 0 || qsize > 2 * SIZE)
+ {
+ continue; /* bogus */
+ }
+ else
+ {
+ mp_limb_t cy;
+ if (qsize > dsize)
+ mpn_mul (pptr, qptr, qsize, dptr, dsize);
+ else
+ mpn_mul (pptr, dptr, dsize, qptr, qsize);
+ psize = qsize + dsize;
+ psize -= pptr[psize - 1] == 0;
+ cy = mpn_add (pptr, pptr, psize, rptr, rsize);
+ pptr[psize] = cy;
+ psize += cy;
+ }
+
+ if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)
+ abort ();
+ }
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_lshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from high end to low end, to allow specified input/output
+ overlapping. */
+ for (i = usize - 1; i >= 0; i--)
+ wp[i] = up[i];
+ }
+ return 0;
+ }
+#endif
+
+ wp += 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ i = usize - 1;
+ low_limb = up[i];
+ retval = low_limb >> sh_2;
+ high_limb = low_limb;
+ while (--i >= 0)
+ {
+ low_limb = up[i];
+ wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+ high_limb = low_limb;
+ }
+ wp[i] = high_limb << sh_1;
+
+ return retval;
+}
+
+#ifndef CNT
+#define CNT 4
+#endif
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t dx[SIZE+2];
+ mp_limb_t dy[SIZE+2];
+ mp_limb_t cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ int cnt = CNT;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+ mpn_random2 (s1, size);
+
+ dx[size+1] = 0x12345678;
+ dy[size+1] = 0x12345678;
+ dx[0] = 0x87654321;
+ dy[0] = 0x87654321;
+
+#ifdef PRINT
+ mpn_print (s1, size);
+#endif
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_lshift (dx+1, s1, size, cnt);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_lshift: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size);
+#endif
+
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyy = mpn_lshift (dx+1, s1, size, cnt);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_lshift: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size);
+#endif
+
+#ifndef NOCHECK
+ /* Put garbage in the destination. */
+ for (i = 1; i <= size; i++)
+ {
+ dx[i] = 0x7654321;
+ dy[i] = 0x1234567;
+ }
+
+ cyx = refmpn_lshift (dx+1, s1, size, cnt);
+ cyy = mpn_lshift (dy+1, s1, size, cnt);
+
+ if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+ || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+ {
+#ifndef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+
+ /* The loop counter and index J goes from -S1_SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ res_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t dx[SIZE+2];
+ mp_limb_t dy[SIZE+2];
+ mp_limb_t cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ mp_limb_t xlimb;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+
+ mpn_random2 (s1, size);
+ mpn_random2 (dy+1, size);
+
+ if (random () % 0x100 == 0)
+ xlimb = 0;
+ else
+ mpn_random2 (&xlimb, 1);
+
+ dy[size+1] = 0x12345678;
+ dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+ printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+ mpn_print (s1, size);
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_mul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_mul_1: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyy = mpn_mul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_mul_1: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+ cyx = refmpn_mul_1 (dx+1, s1, size, xlimb);
+ cyy = mpn_mul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+ if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+ || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+ {
+#ifndef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_rshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from low end to high end, to allow specified input/output
+ overlapping. */
+ for (i = 0; i < usize; i++)
+ wp[i] = up[i];
+ }
+ return 0;
+ }
+#endif
+
+ wp -= 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ high_limb = up[0];
+ retval = high_limb << sh_2;
+ low_limb = high_limb;
+
+ for (i = 1; i < usize; i++)
+ {
+ high_limb = up[i];
+ wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+ low_limb = high_limb;
+ }
+ low_limb >>= sh_1;
+ wp[i] = low_limb;
+
+ return retval;
+}
+
+#ifndef CNT
+#define CNT 4
+#endif
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t dx[SIZE+2];
+ mp_limb_t dy[SIZE+2];
+ mp_limb_t cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ int cnt = CNT;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+ mpn_random2 (s1, size);
+
+ dx[size+1] = 0x12345678;
+ dy[size+1] = 0x12345678;
+ dx[0] = 0x87654321;
+ dy[0] = 0x87654321;
+
+#ifdef PRINT
+ mpn_print (s1, size);
+#endif
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_rshift (dx+1, s1, size, cnt);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_rshift: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size);
+#endif
+
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyy = mpn_rshift (dx+1, s1, size, cnt);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_rshift: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size);
+#endif
+
+#ifndef NOCHECK
+ /* Put garbage in the destination. */
+ for (i = 1; i <= size; i++)
+ {
+ dx[i] = 0x7654321;
+ dy[i] = 0x1234567;
+ }
+
+ cyx = refmpn_rshift (dx+1, s1, size, cnt);
+ cyy = mpn_rshift (dy+1, s1, size, cnt);
+
+ if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+ || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+ {
+#ifndef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+
+mp_limb_t
+#if __STDC__
+refmpn_sub_n (mp_ptr res_ptr,
+ mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to subtrahend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x - y; /* main subtract */
+ cy = (y > x) + cy; /* get out carry from the subtract, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t s2[SIZE];
+ mp_limb_t dx[SIZE+1];
+ mp_limb_t dy[SIZE+1];
+ int cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+
+ mpn_random2 (s1, size);
+ mpn_random2 (s2, size);
+
+ dx[size] = 0x12345678;
+ dy[size] = 0x12345678;
+
+#ifdef PRINT
+ mpn_print (s1, size);
+ mpn_print (s2, size);
+#endif
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_sub_n (dx, s1, s2, size);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_sub_n: %ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = mpn_sub_n (dx, s1, s2, size);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_sub_n: %ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+#ifndef NOCHECK
+ /* Put garbage in the destination. */
+ for (i = 0; i < size; i++)
+ {
+ dx[i] = 0x7654321;
+ dy[i] = 0x1234567;
+ }
+
+ cyx = refmpn_sub_n (dx, s1, s2, size);
+ cyy = mpn_sub_n (dy, s1, s2, size);
+ if (cyx != cyy || mpn_cmp (dx, dy, size) != 0
+ || dx[size] != 0x12345678 || dy[size] != 0x12345678)
+ {
+#ifndef PRINT
+ printf ("%d ", cyx); mpn_print (dx, size);
+ printf ("%d ", cyy); mpn_print (dy, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage (0, &rus);
+ return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+ return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x - prod_low;
+ cy_limb += (prod_low > x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_limb_t s1[SIZE];
+ mp_limb_t dx[SIZE+2];
+ mp_limb_t dy[SIZE+2];
+ mp_limb_t cyx, cyy;
+ int i;
+ long t0, t;
+ int test;
+ mp_limb_t xlimb;
+ mp_size_t size;
+
+ for (test = 0; ; test++)
+ {
+#ifdef RANDOM
+ size = (random () % SIZE + 1);
+#else
+ size = SIZE;
+#endif
+
+ mpn_random2 (s1, size);
+ mpn_random2 (dy+1, size);
+
+ if (random () % 0x100 == 0)
+ xlimb = 0;
+ else
+ mpn_random2 (&xlimb, 1);
+
+ dy[size+1] = 0x12345678;
+ dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+ printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+ mpn_print (dy+1, size);
+ mpn_print (s1, size);
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyx = refmpn_submul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("refmpn_submul_1: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ t0 = cputime();
+ for (i = 0; i < TIMES; i++)
+ cyy = mpn_submul_1 (dx+1, s1, size, xlimb);
+ t = cputime() - t0;
+#if TIMES != 1
+ printf ("mpn_submul_1: %5ldms (%.2f cycles/limb)\n",
+ t,
+ ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+ MPN_COPY (dx, dy, size+2);
+ cyx = refmpn_submul_1 (dx+1, s1, size, xlimb);
+ cyy = mpn_submul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+ if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+ || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+ {
+#ifndef PRINT
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+ mpn_print (dx+1, size);
+ printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+ mpn_print (dy+1, size);
+#endif
+ abort();
+ }
+#endif
+ }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+ mp_size_t i;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+ if (i != 0)
+ printf (" ");
+#endif
+ }
+ puts ("");
+}
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define ADD 1
+#define SUB 2
+
+#ifndef METHOD
+#define METHOD ADD
+#endif
+
+#if METHOD == ADD
+#define REFCALL refmpn_add_n
+#define TESTCALL mpn_add_n
+#endif
+
+#if METHOD == SUB
+#define REFCALL refmpn_sub_n
+#define TESTCALL mpn_sub_n
+#endif
+
+mp_limb_t refmpn_add_n ();
+mp_limb_t refmpn_sub_n ();
+
+#define SIZE 100
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ mp_size_t alloc_size, max_size, size, i, cumul_size;
+ mp_ptr s1, s2, dx, dy;
+ int s1_align, s2_align, d_align;
+ long pass, n_passes;
+ mp_limb_t cx, cy;
+
+ max_size = SIZE;
+ n_passes = 1000000;
+
+ argc--; argv++;
+ if (argc)
+ {
+ max_size = atol (*argv);
+ argc--; argv++;
+ }
+
+ alloc_size = max_size + 32;
+ s1 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+ s2 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+ dx = malloc (alloc_size * BYTES_PER_MP_LIMB);
+ dy = malloc (alloc_size * BYTES_PER_MP_LIMB);
+
+ cumul_size = 0;
+ for (pass = 0; pass < n_passes; pass++)
+ {
+ cumul_size += size;
+ if (cumul_size >= 1000000)
+ {
+ cumul_size -= 1000000;
+ printf ("%d ", pass); fflush (stdout);
+ }
+ s1_align = random () % 32;
+ s2_align = random () % 32;
+ d_align = random () % 32;
+
+ size = random () % max_size + 1;
+
+ mpn_random2 (s1 + s1_align, size);
+ mpn_random2 (s2 + s2_align, size);
+
+ for (i = 0; i < alloc_size; i++)
+ dx[i] = dy[i] = i + 0x9876500;
+
+ cx = TESTCALL (dx + d_align, s1 + s1_align, s2 + s2_align, size);
+ cy = REFCALL (dy + d_align, s1 + s1_align, s2 + s2_align, size);
+
+ if (cx != cy || mpn_cmp (dx, dy, alloc_size) != 0)
+ abort ();
+ }
+
+ printf ("%d passes OK\n", n_passes);
+ exit (0);
+}
+
+mp_limb_t
+#if __STDC__
+refmpn_add_n (mp_ptr res_ptr,
+ mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to one addend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x + y; /* add other addend */
+ cy = (y < x) + cy; /* get out carry from that add, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
+
+mp_limb_t
+#if __STDC__
+refmpn_sub_n (mp_ptr res_ptr,
+ mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to subtrahend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x - y; /* main subtract */
+ cy = (y > x) + cy; /* get out carry from the subtract, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
--- /dev/null
+/* i80386 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 12)
+ size (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+ pushl %edi
+ pushl %esi
+
+ movl 12(%esp),%edi /* res_ptr */
+ movl 16(%esp),%esi /* s1_ptr */
+ movl 20(%esp),%edx /* s2_ptr */
+ movl 24(%esp),%ecx /* size */
+
+ movl %ecx,%eax
+ shrl $3,%ecx /* compute count for unrolled loop */
+ negl %eax
+ andl $7,%eax /* get index where to start loop */
+ jz Loop /* necessary special case for 0 */
+ incl %ecx /* adjust loop count */
+ shll $2,%eax /* adjustment for pointers... */
+ subl %eax,%edi /* ... since they are offset ... */
+ subl %eax,%esi /* ... by a constant when we ... */
+ subl %eax,%edx /* ... enter the loop */
+ shrl $2,%eax /* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC. Due to limitations in some
+ assemblers, Loop-L0-3 cannot be put into the leal */
+ call L0
+L0: leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $(Loop-L0-3),%eax
+ addl $4,%esp
+#else
+/* Calculate start address in loop for non-PIC. */
+ leal (Loop - 3)(%eax,%eax,8),%eax
+#endif
+ jmp *%eax /* jump into loop */
+ ALIGN (3)
+Loop: movl (%esi),%eax
+ adcl (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ adcl 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ adcl 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ adcl 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ adcl 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ adcl 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ adcl 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ adcl 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz Loop
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ popl %edi
+ ret
--- /dev/null
+/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+Loop:
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+ INSN1(mul,l ,R(s2_limb))
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(add,l ,MEM_INDEX(res_ptr,size,4),R(eax))
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(mov,l ,R(ebx),R(edx))
+
+ INSN1(inc,l ,R(size))
+ INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(eax),R(ebx))
+
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
--- /dev/null
+/* i80386 __mpn_lshift --
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ size (sp + 12)
+ cnt (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ movl 16(%esp),%edi /* res_ptr */
+ movl 20(%esp),%esi /* s_ptr */
+ movl 24(%esp),%edx /* size */
+ movl 28(%esp),%ecx /* cnt */
+
+ subl $4,%esi /* adjust s_ptr */
+
+ movl (%esi,%edx,4),%ebx /* read most significant limb */
+ xorl %eax,%eax
+ shldl %cl,%ebx,%eax /* compute carry limb */
+ decl %edx
+ jz Lend
+ pushl %eax /* push carry limb onto stack */
+ testb $1,%edx
+ jnz L1 /* enter loop in the middle */
+ movl %ebx,%eax
+
+ ALIGN (3)
+Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */
+ shldl %cl,%ebx,%eax /* compute result limb */
+ movl %eax,(%edi,%edx,4) /* store it */
+ decl %edx
+L1: movl (%esi,%edx,4),%eax
+ shldl %cl,%eax,%ebx
+ movl %ebx,(%edi,%edx,4)
+ decl %edx
+ jnz Loop
+
+ shll %cl,%eax /* compute least significant limb */
+ movl %eax,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+Lend: shll %cl,%ebx /* compute least significant limb */
+ movl %ebx,(%edi) /* store it */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
--- /dev/null
+/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+Loop:
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+ INSN1(mul,l ,R(s2_limb))
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax))
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(mov,l ,R(ebx),R(edx))
+
+ INSN1(inc,l ,R(size))
+ INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(eax),R(ebx))
+
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
--- /dev/null
+/* i80386 __mpn_rshift --
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ size (sp + 12)
+ cnt (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ movl 16(%esp),%edi /* res_ptr */
+ movl 20(%esp),%esi /* s_ptr */
+ movl 24(%esp),%edx /* size */
+ movl 28(%esp),%ecx /* cnt */
+
+ leal -4(%edi,%edx,4),%edi
+ leal (%esi,%edx,4),%esi
+ negl %edx
+
+ movl (%esi,%edx,4),%ebx /* read least significant limb */
+ xorl %eax,%eax
+ shrdl %cl,%ebx,%eax /* compute carry limb */
+ incl %edx
+ jz Lend
+ pushl %eax /* push carry limb onto stack */
+ testb $1,%edx
+ jnz L1 /* enter loop in the middle */
+ movl %ebx,%eax
+
+ ALIGN (3)
+Loop: movl (%esi,%edx,4),%ebx /* load next higher limb */
+ shrdl %cl,%ebx,%eax /* compute result limb */
+ movl %eax,(%edi,%edx,4) /* store it */
+ incl %edx
+L1: movl (%esi,%edx,4),%eax
+ shrdl %cl,%eax,%ebx
+ movl %ebx,(%edi,%edx,4)
+ incl %edx
+ jnz Loop
+
+ shrl %cl,%eax /* compute most significant limb */
+ movl %eax,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+Lend: shrl %cl,%ebx /* compute most significant limb */
+ movl %ebx,(%edi) /* store it */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
--- /dev/null
+/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 12)
+ size (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+ pushl %edi
+ pushl %esi
+
+ movl 12(%esp),%edi /* res_ptr */
+ movl 16(%esp),%esi /* s1_ptr */
+ movl 20(%esp),%edx /* s2_ptr */
+ movl 24(%esp),%ecx /* size */
+
+ movl %ecx,%eax
+ shrl $3,%ecx /* compute count for unrolled loop */
+ negl %eax
+ andl $7,%eax /* get index where to start loop */
+ jz Loop /* necessary special case for 0 */
+ incl %ecx /* adjust loop count */
+ shll $2,%eax /* adjustment for pointers... */
+ subl %eax,%edi /* ... since they are offset ... */
+ subl %eax,%esi /* ... by a constant when we ... */
+ subl %eax,%edx /* ... enter the loop */
+ shrl $2,%eax /* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC. Due to limitations in some
+ assemblers, Loop-L0-3 cannot be put into the leal */
+ call L0
+L0: leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $(Loop-L0-3),%eax
+ addl $4,%esp
+#else
+/* Calculate start address in loop for non-PIC. */
+ leal (Loop - 3)(%eax,%eax,8),%eax
+#endif
+ jmp *%eax /* jump into loop */
+ ALIGN (3)
+Loop: movl (%esi),%eax
+ sbbl (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ sbbl 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ sbbl 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ sbbl 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ sbbl 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ sbbl 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ sbbl 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ sbbl 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz Loop
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ popl %edi
+ ret