From 04e5053030f1312e1a8b0fd5793ab5553b48c12a Mon Sep 17 00:00:00 2001 From: simonm Date: Fri, 5 Jun 1998 14:37:54 +0000 Subject: [PATCH] [project @ 1998-06-05 14:37:53 by simonm] Import GMP 2.0.2 --- ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S | 189 ++++++++++++++++ ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S | 195 +++++++++++++++++ ghc/rts/gmp/mpn/i960/README | 9 + ghc/rts/gmp/mpn/m68k/add_n.S | 80 +++++++ ghc/rts/gmp/mpn/m68k/lshift.S | 151 +++++++++++++ ghc/rts/gmp/mpn/m68k/mc68020/addmul_1.S | 84 ++++++++ ghc/rts/gmp/mpn/m68k/mc68020/mul_1.S | 91 ++++++++ ghc/rts/gmp/mpn/m68k/mc68020/submul_1.S | 84 ++++++++ ghc/rts/gmp/mpn/m68k/rshift.S | 150 +++++++++++++ ghc/rts/gmp/mpn/m68k/sub_n.S | 80 +++++++ ghc/rts/gmp/mpn/m68k/syntax.h | 177 +++++++++++++++ ghc/rts/gmp/mpn/m88k/mc88110/add_n.S | 200 +++++++++++++++++ ghc/rts/gmp/mpn/m88k/mc88110/sub_n.S | 276 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/mips3/README | 23 ++ ghc/rts/gmp/mpn/mips3/gmp-mparam.h | 27 +++ ghc/rts/gmp/mpn/powerpc64/gmp-mparam.h | 27 +++ 16 files changed, 1843 insertions(+) create mode 100644 ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S create mode 100644 ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S create mode 100644 ghc/rts/gmp/mpn/i960/README create mode 100644 ghc/rts/gmp/mpn/m68k/add_n.S create mode 100644 ghc/rts/gmp/mpn/m68k/lshift.S create mode 100644 ghc/rts/gmp/mpn/m68k/mc68020/addmul_1.S create mode 100644 ghc/rts/gmp/mpn/m68k/mc68020/mul_1.S create mode 100644 ghc/rts/gmp/mpn/m68k/mc68020/submul_1.S create mode 100644 ghc/rts/gmp/mpn/m68k/rshift.S create mode 100644 ghc/rts/gmp/mpn/m68k/sub_n.S create mode 100644 ghc/rts/gmp/mpn/m68k/syntax.h create mode 100644 ghc/rts/gmp/mpn/m88k/mc88110/add_n.S create mode 100644 ghc/rts/gmp/mpn/m88k/mc88110/sub_n.S create mode 100644 ghc/rts/gmp/mpn/mips3/README create mode 100644 ghc/rts/gmp/mpn/mips3/gmp-mparam.h create mode 100644 ghc/rts/gmp/mpn/powerpc64/gmp-mparam.h diff --git a/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S new file mode 100644 index 0000000..eb1d12b --- /dev/null +++ b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S @@ -0,0 +1,189 @@ +; HP-PA 7100/7200 __mpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __mpn_addmul_1 +__mpn_addmul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + add s0,lo0,s0 + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + add s0,lo0,s0 + fstds %fr6,-8(%r31) + addc s1,lo1,s1 + fstds %fr9,0(%r31) + addc s2,lo2,s2 + fstds %fr10,8(%r31) + addc s3,lo3,s3 + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addc s1,lo1,s1 + stws,ma s1,4(res_ptr) + addc s2,lo2,s2 + stws,ma s2,4(res_ptr) + addc s3,lo3,s3 + stws,ma s3,4(res_ptr) + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S new file mode 100644 index 0000000..a71176e --- /dev/null +++ b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S @@ -0,0 +1,195 @@ +; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __mpn_submul_1 +__mpn_submul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + sub s0,lo0,s0 + fstds %fr6,-8(%r31) + subb s1,lo1,s1 + fstds %fr9,0(%r31) + subb s2,lo2,s2 + fstds %fr10,8(%r31) + subb s3,lo3,s3 + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... just invert cy + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + sub s0,lo0,s0 + stws,ma s0,4(res_ptr) + subb s1,lo1,s1 + stws,ma s1,4(res_ptr) + subb s2,lo2,s2 + stws,ma s2,4(res_ptr) + subb s3,lo3,s3 + stws,ma s3,4(res_ptr) + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... invert cy + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/ghc/rts/gmp/mpn/i960/README b/ghc/rts/gmp/mpn/i960/README new file mode 100644 index 0000000..d68a0a8 --- /dev/null +++ b/ghc/rts/gmp/mpn/i960/README @@ -0,0 +1,9 @@ +This directory contains mpn functions for Intel i960 processors. + +RELEVANT OPTIMIZATION ISSUES + +The code in this directory is not well optimized. + +STATUS + +The code in this directory has not been tested. diff --git a/ghc/rts/gmp/mpn/m68k/add_n.S b/ghc/rts/gmp/mpn/m68k/add_n.S new file mode 100644 index 0000000..7ca5b95 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/add_n.S @@ -0,0 +1,80 @@ +/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_add_n) + +C_SYMBOL_NAME(__mpn_add_n:) +PROLOG(__mpn_add_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__mpn_add_n) diff --git a/ghc/rts/gmp/mpn/m68k/lshift.S b/ghc/rts/gmp/mpn/m68k/lshift.S new file mode 100644 index 0000000..77184d6 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/lshift.S @@ -0,0 +1,151 @@ +/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_lshift) + +C_SYMBOL_NAME(__mpn_lshift:) +PROLOG(__mpn_lshift) + +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(s_ptr),R(res_ptr) + bls L(Lspecial) /* jump if s_ptr >= res_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(s_ptr,d0,l),R(a2) +#endif + cmpl R(res_ptr),R(a2) + bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d0) + lsrl R(d5),R(d0) /* compute carry limb */ + + lsll R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d2) +L(L1:) + movel MEM_PREDEC(s_ptr),R(d1) + movel R(d1),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) +L(LL1:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + + dbf R(s_size),L(LLoop) + addxl R(d0),R(d0) /* save cy in lsb */ + subl #0x10000,R(s_size) + bcs L(LLend) + lsrl #1,R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_lshift) diff --git a/ghc/rts/gmp/mpn/m68k/mc68020/addmul_1.S b/ghc/rts/gmp/mpn/m68k/mc68020/addmul_1.S new file mode 100644 index 0000000..4b99c21 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/mc68020/addmul_1.S @@ -0,0 +1,84 @@ +/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) + +C_SYMBOL_NAME(__mpn_addmul_1:) +PROLOG(__mpn_addmul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + addl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + addl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__mpn_addmul_1) diff --git a/ghc/rts/gmp/mpn/m68k/mc68020/mul_1.S b/ghc/rts/gmp/mpn/m68k/mc68020/mul_1.S new file mode 100644 index 0000000..ef7d937 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/mc68020/mul_1.S @@ -0,0 +1,91 @@ +/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_mul_1) + +C_SYMBOL_NAME(__mpn_mul_1:) +PROLOG(__mpn_mul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d4),MEM_PREDEC(sp) +#if 0 + movel R(d2),MEM_PREDEC(sp) + movel R(d3),MEM_PREDEC(sp) + movel R(d4),MEM_PREDEC(sp) +#endif + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,16),R(res_ptr) + movel MEM_DISP(sp,20),R(s1_ptr) + movel MEM_DISP(sp,24),R(s1_size) + movel MEM_DISP(sp,28),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + clrl R(d3) + addxl R(d3),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d4) +#if 0 + movel MEM_POSTINC(sp),R(d4) + movel MEM_POSTINC(sp),R(d3) + movel MEM_POSTINC(sp),R(d2) +#endif + rts +EPILOG(__mpn_mul_1) diff --git a/ghc/rts/gmp/mpn/m68k/mc68020/submul_1.S b/ghc/rts/gmp/mpn/m68k/mc68020/submul_1.S new file mode 100644 index 0000000..9770c6c --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/mc68020/submul_1.S @@ -0,0 +1,84 @@ +/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_submul_1) + +C_SYMBOL_NAME(__mpn_submul_1:) +PROLOG(__mpn_submul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + subl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + subl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__mpn_submul_1) diff --git a/ghc/rts/gmp/mpn/m68k/rshift.S b/ghc/rts/gmp/mpn/m68k/rshift.S new file mode 100644 index 0000000..2ca5c79 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/rshift.S @@ -0,0 +1,150 @@ +/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_rshift) + +C_SYMBOL_NAME(__mpn_rshift:) +PROLOG(__mpn_rshift) +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(res_ptr),R(s_ptr) + bls L(Lspecial) /* jump if res_ptr >= s_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(res_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(res_ptr,d0,l),R(a2) +#endif + cmpl R(s_ptr),R(a2) + bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d0) + lsll R(d5),R(d0) /* compute carry limb */ + + lsrl R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d2) +L(L1:) + movel MEM_POSTINC(s_ptr),R(d1) + movel R(d1),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM(res_ptr) /* store most significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from most significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) +L(LL1:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + + dbf R(s_size),L(LLoop) + roxrl #1,R(d0) /* save cy in msb */ + subl #0x10000,R(s_size) + bcs L(LLend) + addl R(d0),R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_rshift) diff --git a/ghc/rts/gmp/mpn/m68k/sub_n.S b/ghc/rts/gmp/mpn/m68k/sub_n.S new file mode 100644 index 0000000..f94b0c7 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/sub_n.S @@ -0,0 +1,80 @@ +/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + store difference in a third limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_sub_n) + +C_SYMBOL_NAME(__mpn_sub_n:) +PROLOG(__mpn_sub_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__mpn_sub_n) diff --git a/ghc/rts/gmp/mpn/m68k/syntax.h b/ghc/rts/gmp/mpn/m68k/syntax.h new file mode 100644 index 0000000..9d6f352 --- /dev/null +++ b/ghc/rts/gmp/mpn/m68k/syntax.h @@ -0,0 +1,177 @@ +/* asm.h -- Definitions for 68k syntax variations. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#undef ALIGN + +#ifdef MIT_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)base@ +#define MEM_DISP(base,displacement)base@(displacement) +#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale) +#define MEM_PREDEC(memory_base)memory_base@- +#define MEM_POSTINC(memory_base)memory_base@+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#define moveql moveq +/* Use variable sized opcodes. */ +#define bcc jcc +#define bcs jcs +#define bls jls +#define beq jeq +#define bne jne +#define bra jra +#endif + +#ifdef SONY_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#endif + +#ifdef MOTOROLA_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT +#define ALIGN +#define GLOBL XDEF +#define lea LEA +#define movel MOVE.L +#define moveml MOVEM.L +#define moveql MOVEQ.L +#define cmpl CMP.L +#define orl OR.L +#define clrl CLR.L +#define eorw EOR.W +#define lsrl LSR.L +#define lsll LSL.L +#define roxrl ROXR.L +#define roxll ROXL.L +#define addl ADD.L +#define addxl ADDX.L +#define addql ADDQ.L +#define subl SUB.L +#define subxl SUBX.L +#define subql SUBQ.L +#define negl NEG.L +#define mulul MULU.L +#define bcc BCC +#define bcs BCS +#define bls BLS +#define beq BEQ +#define bne BNE +#define bra BRA +#define dbf DBF +#define rts RTS +#define d0 D0 +#define d1 D1 +#define d2 D2 +#define d3 D3 +#define d4 D4 +#define d5 D5 +#define d6 D6 +#define d7 D7 +#define a0 A0 +#define a1 A1 +#define a2 A2 +#define a3 A3 +#define a4 A4 +#define a5 A5 +#define a6 A6 +#define a7 A7 +#define sp SP +#endif + +#ifdef ELF_SYNTAX +#define PROLOG(name) .type name,@function +#define EPILOG(name) .size name,.-name +#define MEM(base)(R(base)) +#define MEM_DISP(base,displacement)(displacement,R(base)) +#define MEM_PREDEC(memory_base)-(R(memory_base)) +#define MEM_POSTINC(memory_base)(R(memory_base))+ +#ifdef __STDC__ +#define R_(r)%##r +#define R(r)R_(r) +#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix)) +#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix) +#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale)) +#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale) +#define L(label) .##label +#else +#define R(r)%/**/r +#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale) +#define L(label) ./**/label +#endif +#define TEXT .text +#define ALIGN .align 2 +#define GLOBL .globl +#define bcc jbcc +#define bcs jbcs +#define bls jbls +#define beq jbeq +#define bne jbne +#define bra jbra +#endif + +#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX) +#define movel move.l +#define moveml movem.l +#define moveql moveq.l +#define cmpl cmp.l +#define orl or.l +#define clrl clr.l +#define eorw eor.w +#define lsrl lsr.l +#define lsll lsl.l +#define roxrl roxr.l +#define roxll roxl.l +#define addl add.l +#define addxl addx.l +#define addql addq.l +#define subl sub.l +#define subxl subx.l +#define subql subq.l +#define negl neg.l +#define mulul mulu.l +#endif diff --git a/ghc/rts/gmp/mpn/m88k/mc88110/add_n.S b/ghc/rts/gmp/mpn/m88k/mc88110/add_n.S new file mode 100644 index 0000000..39a44e5 --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/mc88110/add_n.S @@ -0,0 +1,200 @@ +; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n): + addu.co r0,r0,r0 ; clear cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + addu.cio r6,r10,r8 + ld r10,s1_ptr,16 + addu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,24 + addu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + addu.cio r6,r10,r8 + ld r10,s1_ptr,32 + addu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: addu.cio r6,r10,r8 + addu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + addu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + or r12,r0,s2_ptr + or s2_ptr,r0,s1_ptr + or s1_ptr,r0,r12 + br L0 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + addu.cio r8,r8,r6 + st r8,res_ptr,8 + addu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + addu.cio r8,r8,r6 + st r8,res_ptr,16 + addu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + addu.cio r8,r8,r6 + st r8,res_ptr,24 + addu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + addu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/ghc/rts/gmp/mpn/m88k/mc88110/sub_n.S b/ghc/rts/gmp/mpn/m88k/mc88110/sub_n.S new file mode 100644 index 0000000..685f024 --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/mc88110/sub_n.S @@ -0,0 +1,276 @@ +; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n): + subu.co r0,r0,r0 ; set cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r10,r8 + ld r10,s1_ptr,16 + subu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,24 + subu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r10,r8 + ld r10,s1_ptr,32 + subu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: subu.cio r6,r10,r8 + subu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + subu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s1_ptr */ + ld r10,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + ld r8,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + subu size,size,1 + subu.co r6,r8,r10 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1b: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s2_ptr,0 + ld r12,s2_ptr,4 + ld.d r8,s1_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1b +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1b: subu size,size,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r8,r10 + ld r10,s2_ptr,16 + subu.cio r7,r9,r12 + ld r12,s2_ptr,20 + ld.d r8,s1_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,24 + subu.cio r7,r9,r12 + ld r12,s2_ptr,28 + ld.d r8,s1_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r8,r10 + ld r10,s2_ptr,32 + subu.cio r7,r9,r12 + ld r12,s2_ptr,36 + addu s2_ptr,s2_ptr,32 + ld.d r8,s1_ptr,32 + addu s1_ptr,s1_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1b + +Lfin1b: addu size,size,8-2 + bcnd lt0,size,Lend1b +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1b:subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1b +Lend1b: subu.cio r6,r8,r10 + subu.cio r7,r9,r12 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1b +/* Add last limb */ + ld r10,s2_ptr,8 + ld r8,s1_ptr,8 + subu.cio r6,r8,r10 + st r6,res_ptr,8 + +Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + subu.cio r8,r8,r6 + st r8,res_ptr,8 + subu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + subu.cio r8,r8,r6 + st r8,res_ptr,16 + subu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + subu.cio r8,r8,r6 + st r8,res_ptr,24 + subu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + subu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/ghc/rts/gmp/mpn/mips3/README b/ghc/rts/gmp/mpn/mips3/README new file mode 100644 index 0000000..e94b2c7 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/README @@ -0,0 +1,23 @@ +This directory contains mpn functions optimized for MIPS3. Example of +processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000. + +RELEVANT OPTIMIZATION ISSUES + +1. On the R4000 and R4400, branches, both the plain and the "likely" ones, + take 3 cycles to execute. (The fastest possible loop will take 4 cycles, + because of the delay insn.) + + On the R4600, branches takes a single cycle + + On the R8000, branches often take no noticable cycles, as they are + executed in a separate function unit.. + +2. The R4000 and R4400 have a load latency of 4 cycles. + +3. On the R4000 and R4400, multiplies take a data-dependent number of + cycles, contrary to the SGI documentation. There seem to be 3 or 4 + possible latencies. + +STATUS + +Good... diff --git a/ghc/rts/gmp/mpn/mips3/gmp-mparam.h b/ghc/rts/gmp/mpn/mips3/gmp-mparam.h new file mode 100644 index 0000000..f3df7ff --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/ghc/rts/gmp/mpn/powerpc64/gmp-mparam.h b/ghc/rts/gmp/mpn/powerpc64/gmp-mparam.h new file mode 100644 index 0000000..48eb85d --- /dev/null +++ b/ghc/rts/gmp/mpn/powerpc64/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 -- 1.7.10.4