From a23f5f26772ecfc9ba745eb8deacdde7f293a547 Mon Sep 17 00:00:00 2001 From: simonm Date: Fri, 5 Jun 1998 14:37:57 +0000 Subject: [PATCH] [project @ 1998-06-05 14:37:56 by simonm] Initial revision --- ghc/rts/gmp/mpn/sparc32/v8/addmul_1.S | 124 ++++++++++++++ ghc/rts/gmp/mpn/sparc32/v8/mul_1.S | 99 +++++++++++ ghc/rts/gmp/mpn/sparc32/v8/submul_1.S | 58 +++++++ ghc/rts/gmp/mpn/sparc32/v8/supersparc/udiv.S | 109 +++++++++++++ ghc/rts/gmp/mpn/tests/add_n.c | 211 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/addmul_1.c | 223 +++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/divmod_1.c | 120 ++++++++++++++ ghc/rts/gmp/mpn/tests/divrem.c | 129 +++++++++++++++ ghc/rts/gmp/mpn/tests/lshift.c | 226 +++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/mul_1.c | 212 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/rshift.c | 227 ++++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/sub_n.c | 211 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/submul_1.c | 218 +++++++++++++++++++++++++ ghc/rts/gmp/mpn/tests/tst-addsub.c | 164 +++++++++++++++++++ ghc/rts/gmp/mpn/x86/add_n.S | 106 ++++++++++++ ghc/rts/gmp/mpn/x86/addmul_1.S | 76 +++++++++ ghc/rts/gmp/mpn/x86/lshift.S | 85 ++++++++++ ghc/rts/gmp/mpn/x86/mul_1.S | 75 +++++++++ ghc/rts/gmp/mpn/x86/rshift.S | 87 ++++++++++ ghc/rts/gmp/mpn/x86/sub_n.S | 106 ++++++++++++ 20 files changed, 2866 insertions(+) create mode 100644 ghc/rts/gmp/mpn/sparc32/v8/addmul_1.S create mode 100644 ghc/rts/gmp/mpn/sparc32/v8/mul_1.S create mode 100644 ghc/rts/gmp/mpn/sparc32/v8/submul_1.S create mode 100644 ghc/rts/gmp/mpn/sparc32/v8/supersparc/udiv.S create mode 100644 ghc/rts/gmp/mpn/tests/add_n.c create mode 100644 ghc/rts/gmp/mpn/tests/addmul_1.c create mode 100644 ghc/rts/gmp/mpn/tests/divmod_1.c create mode 100644 ghc/rts/gmp/mpn/tests/divrem.c create mode 100644 ghc/rts/gmp/mpn/tests/lshift.c create mode 100644 ghc/rts/gmp/mpn/tests/mul_1.c create mode 100644 ghc/rts/gmp/mpn/tests/rshift.c create mode 100644 ghc/rts/gmp/mpn/tests/sub_n.c create mode 100644 ghc/rts/gmp/mpn/tests/submul_1.c create mode 100644 ghc/rts/gmp/mpn/tests/tst-addsub.c create mode 100644 ghc/rts/gmp/mpn/x86/add_n.S create mode 100644 ghc/rts/gmp/mpn/x86/addmul_1.S create mode 100644 ghc/rts/gmp/mpn/x86/lshift.S create mode 100644 ghc/rts/gmp/mpn/x86/mul_1.S create mode 100644 ghc/rts/gmp/mpn/x86/rshift.S create mode 100644 ghc/rts/gmp/mpn/x86/sub_n.S diff --git a/ghc/rts/gmp/mpn/sparc32/v8/addmul_1.S b/ghc/rts/gmp/mpn/sparc32/v8/addmul_1.S new file mode 100644 index 0000000..fb9ea7c --- /dev/null +++ b/ghc/rts/gmp/mpn/sparc32/v8/addmul_1.S @@ -0,0 +1,124 @@ +! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and +! add the result to a second limb vector. + +! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1): + orcc %g0,%g0,%g2 + ld [%o1+0],%o4 ! 1 + + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + nop +LL: +LL00: add %o0,-4,%o0 + b Loop00 /* 4, 8, 12, ... */ + add %o1,-4,%o1 + nop +LL01: b Loop01 /* 1, 5, 9, ... */ + nop + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + b Loop10 + add %o1,4,%o1 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + b Loop11 + add %o1,-8,%o1 + nop + +1: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + rd %y,%g2 ! 1 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + ld [%o0+4],%g1 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + rd %y,%g2 ! 2 + addx %g0,%g2,%g2 + nop + addcc %g1,%g3,%g3 + st %g3,[%o0+4] ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + rd %y,%g2 ! 3 + add %o1,16,%o1 + addx %g0,%g2,%g2 + ld [%o0+8],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+8] ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+12],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg 1b + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 4 + addx %g0,%g2,%o0 + + retl + nop + + +! umul, ld, addxcc, rd, st + +! umul, ld, addxcc, rd, ld, addcc, st, addx + diff --git a/ghc/rts/gmp/mpn/sparc32/v8/mul_1.S b/ghc/rts/gmp/mpn/sparc32/v8/mul_1.S new file mode 100644 index 0000000..b641feb --- /dev/null +++ b/ghc/rts/gmp/mpn/sparc32/v8/mul_1.S @@ -0,0 +1,99 @@ +! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + +! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 8 + .global C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1): + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + ld [%o1+0],%o4 ! 1 +LL: +LL00: add %o0,-4,%o0 + add %o1,-4,%o1 + b Loop00 /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LL01: b Loop01 /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + add %o1,4,%o1 + b Loop10 + orcc %g0,%g0,%g2 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + add %o1,-8,%o1 + b Loop11 + orcc %g0,%g0,%g2 + +Loop: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + st %g3,[%o0+0] ! 1 + rd %y,%g2 ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + st %g3,[%o0+4] ! 2 + rd %y,%g2 ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + add %o1,16,%o1 + st %g3,[%o0+8] ! 3 + rd %y,%g2 ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg Loop + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + st %g3,[%o0+0] ! 4 + rd %y,%g2 ! 4 + + retl + addx %g0,%g2,%o0 diff --git a/ghc/rts/gmp/mpn/sparc32/v8/submul_1.S b/ghc/rts/gmp/mpn/sparc32/v8/submul_1.S new file mode 100644 index 0000000..e40119d --- /dev/null +++ b/ghc/rts/gmp/mpn/sparc32/v8/submul_1.S @@ -0,0 +1,58 @@ +! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and +! subtract the result from a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1): + sub %g0,%o2,%o2 ! negate ... + sll %o2,2,%o2 ! ... and scale size + sub %o1,%o2,%o1 ! o1 is offset s1_ptr + sub %o0,%o2,%g1 ! g1 is offset res_ptr + + mov 0,%o0 ! clear cy_limb + +Loop: ld [%o1+%o2],%o4 + ld [%g1+%o2],%g2 + umul %o4,%o3,%o5 + rd %y,%g3 + addcc %o5,%o0,%o5 + addx %g3,0,%o0 + subcc %g2,%o5,%g2 + addx %o0,0,%o0 + st %g2,[%g1+%o2] + + addcc %o2,4,%o2 + bne Loop + nop + + retl + nop diff --git a/ghc/rts/gmp/mpn/sparc32/v8/supersparc/udiv.S b/ghc/rts/gmp/mpn/sparc32/v8/supersparc/udiv.S new file mode 100644 index 0000000..ed688ee --- /dev/null +++ b/ghc/rts/gmp/mpn/sparc32/v8/supersparc/udiv.S @@ -0,0 +1,109 @@ +! SuperSPARC __udiv_qrnnd division support, used from longlong.h. +! This is for SuperSPARC only, to compensate for its semi-functional +! udiv instruction. + +! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! rem_ptr i0 +! n1 i1 +! n0 i2 +! d i3 + +#include "sysdep.h" +#undef ret /* Kludge for glibc */ + + .text + .align 8 +LC0: .double 0r4294967296 +LC1: .double 0r2147483648 + + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + !#PROLOGUE# 0 + save %sp,-104,%sp + !#PROLOGUE# 1 + st %i1,[%fp-8] + ld [%fp-8],%f10 + sethi %hi(LC0),%o7 + fitod %f10,%f4 + ldd [%o7+%lo(LC0)],%f8 + cmp %i1,0 + bge L248 + mov %i0,%i5 + faddd %f4,%f8,%f4 +L248: + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L249 + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L249: + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L250 + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L250: + fdivd %f2,%f4,%f2 + sethi %hi(LC1),%o7 + ldd [%o7+%lo(LC1)],%f4 + fcmped %f2,%f4 + nop + fbge,a L251 + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L252 + ld [%fp-8],%i4 +L251: + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L252: + umul %i3,%i4,%g3 + rd %y,%i0 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L253 + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L253: + blu L246 + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L246: + st %o7,[%i5] + ret + restore diff --git a/ghc/rts/gmp/mpn/tests/add_n.c b/ghc/rts/gmp/mpn/tests/add_n.c new file mode 100644 index 0000000..c27d347 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/add_n.c @@ -0,0 +1,211 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 328 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + + +mp_limb_t +#if __STDC__ +refmpn_add_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t s2[SIZE]; + mp_limb_t dx[SIZE+1]; + mp_limb_t dy[SIZE+1]; + int cyx, cyy; + int i; + long t0, t; + int test; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (s2, size); + + dx[size] = 0x12345678; + dy[size] = 0x12345678; + +#ifdef PRINT + mpn_print (s1, size); + mpn_print (s2, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_add_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_add_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = mpn_add_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_add_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 0; i < size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_add_n (dx, s1, s2, size); + cyy = mpn_add_n (dy, s1, s2, size); + if (cyx != cyy || mpn_cmp (dx, dy, size) != 0 + || dx[size] != 0x12345678 || dy[size] != 0x12345678) + { +#ifndef PRINT + printf ("%d ", cyx); mpn_print (dx, size); + printf ("%d ", cyy); mpn_print (dy, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/addmul_1.c b/ghc/rts/gmp/mpn/tests/addmul_1.c new file mode 100644 index 0000000..23952a1 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/addmul_1.c @@ -0,0 +1,223 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + double cyc; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (dy+1, size); + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + cyc = ((double) t * CLOCK) / (OPS * 1000.0); + printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n", + t, + cyc, + CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_addmul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + cyc = ((double) t * CLOCK) / (OPS * 1000.0); + printf ("mpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n", + t, + cyc, + CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB); +#endif + + MPN_COPY (dx, dy, size+2); + cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb); + cyy = mpn_addmul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/divmod_1.c b/ghc/rts/gmp/mpn/tests/divmod_1.c new file mode 100644 index 0000000..f6b541e --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/divmod_1.c @@ -0,0 +1,120 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 1000 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +main () +{ + mp_limb_t nptr[SIZE]; + mp_limb_t qptr[SIZE]; + mp_limb_t pptr[SIZE]; + mp_limb_t dlimb, rlimb, plimb; + mp_size_t nsize, qsize, psize; + int test; + + for (test = 0; ; test++) + { +#ifdef RANDOM + nsize = random () % SIZE + 1; +#else + nsize = SIZE; +#endif + + mpn_random2 (nptr, nsize); + + mpn_random2 (&dlimb, 1); + if (dlimb == 0) + abort (); + + rlimb = mpn_divmod_1 (qptr, nptr, nsize, dlimb); + qsize = nsize - (qptr[nsize - 1] == 0); + if (qsize == 0) + { + plimb = rlimb; + psize = qsize; + } + else + { + plimb = mpn_mul_1 (pptr, qptr, qsize, dlimb); + psize = qsize; + plimb += mpn_add_1 (pptr, pptr, psize, rlimb); + } + if (plimb != 0) + pptr[psize++] = plimb; + + + if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0) + abort (); + } +} diff --git a/ghc/rts/gmp/mpn/tests/divrem.c b/ghc/rts/gmp/mpn/tests/divrem.c new file mode 100644 index 0000000..6eafc99 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/divrem.c @@ -0,0 +1,129 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 100 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +main () +{ + mp_limb_t nptr[2 * SIZE]; + mp_limb_t dptr[SIZE]; + mp_limb_t qptr[2 * SIZE]; + mp_limb_t pptr[2 * SIZE]; + mp_limb_t rptr[2 * SIZE]; + mp_size_t nsize, dsize, qsize, rsize, psize; + int test; + mp_limb_t qlimb; + + for (test = 0; ; test++) + { +#ifdef RANDOM + nsize = random () % (2 * SIZE) + 1; + dsize = random () % nsize + 1; +#else + nsize = 2 * SIZE; + dsize = SIZE; +#endif + + mpn_random2 (nptr, nsize); + mpn_random2 (dptr, dsize); + dptr[dsize - 1] |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + + MPN_COPY (rptr, nptr, nsize); + qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize); + rsize = dsize; + qsize = nsize - dsize; + qptr[qsize] = qlimb; + qsize += qlimb; + if (qsize == 0 || qsize > 2 * SIZE) + { + continue; /* bogus */ + } + else + { + mp_limb_t cy; + if (qsize > dsize) + mpn_mul (pptr, qptr, qsize, dptr, dsize); + else + mpn_mul (pptr, dptr, dsize, qptr, qsize); + psize = qsize + dsize; + psize -= pptr[psize - 1] == 0; + cy = mpn_add (pptr, pptr, psize, rptr, rsize); + pptr[psize] = cy; + psize += cy; + } + + if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0) + abort (); + } +} diff --git a/ghc/rts/gmp/mpn/tests/lshift.c b/ghc/rts/gmp/mpn/tests/lshift.c new file mode 100644 index 0000000..f50c5dc --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/lshift.c @@ -0,0 +1,226 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_lshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from high end to low end, to allow specified input/output + overlapping. */ + for (i = usize - 1; i >= 0; i--) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp += 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} + +#ifndef CNT +#define CNT 4 +#endif + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + int cnt = CNT; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + mpn_random2 (s1, size); + + dx[size+1] = 0x12345678; + dy[size+1] = 0x12345678; + dx[0] = 0x87654321; + dy[0] = 0x87654321; + +#ifdef PRINT + mpn_print (s1, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_lshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_lshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_lshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_lshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 1; i <= size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_lshift (dx+1, s1, size, cnt); + cyy = mpn_lshift (dy+1, s1, size, cnt); + + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/mul_1.c b/ghc/rts/gmp/mpn/tests/mul_1.c new file mode 100644 index 0000000..2b522fa --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/mul_1.c @@ -0,0 +1,212 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_mul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_mul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_mul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_mul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + cyx = refmpn_mul_1 (dx+1, s1, size, xlimb); + cyy = mpn_mul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/rshift.c b/ghc/rts/gmp/mpn/tests/rshift.c new file mode 100644 index 0000000..2482bf3 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/rshift.c @@ -0,0 +1,227 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_rshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from low end to high end, to allow specified input/output + overlapping. */ + for (i = 0; i < usize; i++) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp -= 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + + for (i = 1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + low_limb >>= sh_1; + wp[i] = low_limb; + + return retval; +} + +#ifndef CNT +#define CNT 4 +#endif + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + int cnt = CNT; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + mpn_random2 (s1, size); + + dx[size+1] = 0x12345678; + dy[size+1] = 0x12345678; + dx[0] = 0x87654321; + dy[0] = 0x87654321; + +#ifdef PRINT + mpn_print (s1, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_rshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_rshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_rshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_rshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 1; i <= size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_rshift (dx+1, s1, size, cnt); + cyy = mpn_rshift (dy+1, s1, size, cnt); + + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/sub_n.c b/ghc/rts/gmp/mpn/tests/sub_n.c new file mode 100644 index 0000000..2b9031b --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/sub_n.c @@ -0,0 +1,211 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 328 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + + +mp_limb_t +#if __STDC__ +refmpn_sub_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t s2[SIZE]; + mp_limb_t dx[SIZE+1]; + mp_limb_t dy[SIZE+1]; + int cyx, cyy; + int i; + long t0, t; + int test; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (s2, size); + + dx[size] = 0x12345678; + dy[size] = 0x12345678; + +#ifdef PRINT + mpn_print (s1, size); + mpn_print (s2, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_sub_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_sub_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = mpn_sub_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_sub_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 0; i < size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_sub_n (dx, s1, s2, size); + cyy = mpn_sub_n (dy, s1, s2, size); + if (cyx != cyy || mpn_cmp (dx, dy, size) != 0 + || dx[size] != 0x12345678 || dy[size] != 0x12345678) + { +#ifndef PRINT + printf ("%d ", cyx); mpn_print (dx, size); + printf ("%d ", cyy); mpn_print (dy, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/submul_1.c b/ghc/rts/gmp/mpn/tests/submul_1.c new file mode 100644 index 0000000..0e464e6 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/submul_1.c @@ -0,0 +1,218 @@ +#include +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include +#include + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += (prod_low > x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (dy+1, size); + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_submul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_submul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_submul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_submul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + cyx = refmpn_submul_1 (dx+1, s1, size, xlimb); + cyy = mpn_submul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/ghc/rts/gmp/mpn/tests/tst-addsub.c b/ghc/rts/gmp/mpn/tests/tst-addsub.c new file mode 100644 index 0000000..e02b9d5 --- /dev/null +++ b/ghc/rts/gmp/mpn/tests/tst-addsub.c @@ -0,0 +1,164 @@ +#include +#include +#include "gmp.h" +#include "gmp-impl.h" + +#define ADD 1 +#define SUB 2 + +#ifndef METHOD +#define METHOD ADD +#endif + +#if METHOD == ADD +#define REFCALL refmpn_add_n +#define TESTCALL mpn_add_n +#endif + +#if METHOD == SUB +#define REFCALL refmpn_sub_n +#define TESTCALL mpn_sub_n +#endif + +mp_limb_t refmpn_add_n (); +mp_limb_t refmpn_sub_n (); + +#define SIZE 100 + +main (argc, argv) + int argc; + char **argv; +{ + mp_size_t alloc_size, max_size, size, i, cumul_size; + mp_ptr s1, s2, dx, dy; + int s1_align, s2_align, d_align; + long pass, n_passes; + mp_limb_t cx, cy; + + max_size = SIZE; + n_passes = 1000000; + + argc--; argv++; + if (argc) + { + max_size = atol (*argv); + argc--; argv++; + } + + alloc_size = max_size + 32; + s1 = malloc (alloc_size * BYTES_PER_MP_LIMB); + s2 = malloc (alloc_size * BYTES_PER_MP_LIMB); + dx = malloc (alloc_size * BYTES_PER_MP_LIMB); + dy = malloc (alloc_size * BYTES_PER_MP_LIMB); + + cumul_size = 0; + for (pass = 0; pass < n_passes; pass++) + { + cumul_size += size; + if (cumul_size >= 1000000) + { + cumul_size -= 1000000; + printf ("%d ", pass); fflush (stdout); + } + s1_align = random () % 32; + s2_align = random () % 32; + d_align = random () % 32; + + size = random () % max_size + 1; + + mpn_random2 (s1 + s1_align, size); + mpn_random2 (s2 + s2_align, size); + + for (i = 0; i < alloc_size; i++) + dx[i] = dy[i] = i + 0x9876500; + + cx = TESTCALL (dx + d_align, s1 + s1_align, s2 + s2_align, size); + cy = REFCALL (dy + d_align, s1 + s1_align, s2 + s2_align, size); + + if (cx != cy || mpn_cmp (dx, dy, alloc_size) != 0) + abort (); + } + + printf ("%d passes OK\n", n_passes); + exit (0); +} + +mp_limb_t +#if __STDC__ +refmpn_add_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +mp_limb_t +#if __STDC__ +refmpn_sub_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/ghc/rts/gmp/mpn/x86/add_n.S b/ghc/rts/gmp/mpn/x86/add_n.S new file mode 100644 index 0000000..5bbbd0d --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/add_n.S @@ -0,0 +1,106 @@ +/* i80386 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n:) + pushl %edi + pushl %esi + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + adcl (%edx),%eax + movl %eax,(%edi) + movl 4(%esi),%eax + adcl 4(%edx),%eax + movl %eax,4(%edi) + movl 8(%esi),%eax + adcl 8(%edx),%eax + movl %eax,8(%edi) + movl 12(%esi),%eax + adcl 12(%edx),%eax + movl %eax,12(%edi) + movl 16(%esi),%eax + adcl 16(%edx),%eax + movl %eax,16(%edi) + movl 20(%esi),%eax + adcl 20(%edx),%eax + movl %eax,20(%edi) + movl 24(%esi),%eax + adcl 24(%edx),%eax + movl %eax,24(%edi) + movl 28(%esi),%eax + adcl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/addmul_1.S b/ghc/rts/gmp/mpn/x86/addmul_1.S new file mode 100644 index 0000000..c11209d --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/addmul_1.S @@ -0,0 +1,76 @@ +/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/lshift.S b/ghc/rts/gmp/mpn/x86/lshift.S new file mode 100644 index 0000000..8173b92 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/lshift.S @@ -0,0 +1,85 @@ +/* i80386 __mpn_lshift -- + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_lshift) +C_SYMBOL_NAME(__mpn_lshift:) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%edi /* res_ptr */ + movl 20(%esp),%esi /* s_ptr */ + movl 24(%esp),%edx /* size */ + movl 28(%esp),%ecx /* cnt */ + + subl $4,%esi /* adjust s_ptr */ + + movl (%esi,%edx,4),%ebx /* read most significant limb */ + xorl %eax,%eax + shldl %cl,%ebx,%eax /* compute carry limb */ + decl %edx + jz Lend + pushl %eax /* push carry limb onto stack */ + testb $1,%edx + jnz L1 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */ + shldl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + decl %edx +L1: movl (%esi,%edx,4),%eax + shldl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + decl %edx + jnz Loop + + shll %cl,%eax /* compute least significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend: shll %cl,%ebx /* compute least significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/mul_1.S b/ghc/rts/gmp/mpn/x86/mul_1.S new file mode 100644 index 0000000..b271399 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/mul_1.S @@ -0,0 +1,75 @@ +/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/rshift.S b/ghc/rts/gmp/mpn/x86/rshift.S new file mode 100644 index 0000000..9abbf9a --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/rshift.S @@ -0,0 +1,87 @@ +/* i80386 __mpn_rshift -- + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_rshift) +C_SYMBOL_NAME(__mpn_rshift:) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%edi /* res_ptr */ + movl 20(%esp),%esi /* s_ptr */ + movl 24(%esp),%edx /* size */ + movl 28(%esp),%ecx /* cnt */ + + leal -4(%edi,%edx,4),%edi + leal (%esi,%edx,4),%esi + negl %edx + + movl (%esi,%edx,4),%ebx /* read least significant limb */ + xorl %eax,%eax + shrdl %cl,%ebx,%eax /* compute carry limb */ + incl %edx + jz Lend + pushl %eax /* push carry limb onto stack */ + testb $1,%edx + jnz L1 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop: movl (%esi,%edx,4),%ebx /* load next higher limb */ + shrdl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + incl %edx +L1: movl (%esi,%edx,4),%eax + shrdl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + incl %edx + jnz Loop + + shrl %cl,%eax /* compute most significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend: shrl %cl,%ebx /* compute most significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/sub_n.S b/ghc/rts/gmp/mpn/x86/sub_n.S new file mode 100644 index 0000000..1774f3a --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/sub_n.S @@ -0,0 +1,106 @@ +/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n:) + pushl %edi + pushl %esi + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + sbbl (%edx),%eax + movl %eax,(%edi) + movl 4(%esi),%eax + sbbl 4(%edx),%eax + movl %eax,4(%edi) + movl 8(%esi),%eax + sbbl 8(%edx),%eax + movl %eax,8(%edi) + movl 12(%esi),%eax + sbbl 12(%edx),%eax + movl %eax,12(%edi) + movl 16(%esi),%eax + sbbl 16(%edx),%eax + movl %eax,16(%edi) + movl 20(%esi),%eax + sbbl 20(%edx),%eax + movl %eax,20(%edi) + movl 24(%esi),%eax + sbbl 24(%edx),%eax + movl %eax,24(%edi) + movl 28(%esi),%eax + sbbl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + + popl %esi + popl %edi + ret -- 1.7.10.4