From a2fb1fcc265f4e968497c126733e9530da1b4d6a Mon Sep 17 00:00:00 2001 From: simonm Date: Fri, 5 Jun 1998 14:44:05 +0000 Subject: [PATCH 1/1] [project @ 1998-06-05 14:43:59 by simonm] Import GMP 2.0.2 --- ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s | 80 ++++++++++++++++ ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s | 76 +++++++++++++++ ghc/rts/gmp/mpn/i960/add_n.s | 43 +++++++++ ghc/rts/gmp/mpn/i960/addmul_1.s | 48 ++++++++++ ghc/rts/gmp/mpn/i960/mul_1.s | 45 +++++++++ ghc/rts/gmp/mpn/i960/sub_n.s | 43 +++++++++ ghc/rts/gmp/mpn/m88k/add_n.s | 104 +++++++++++++++++++++ ghc/rts/gmp/mpn/m88k/mc88110/addmul_1.s | 61 ++++++++++++ ghc/rts/gmp/mpn/m88k/mc88110/mul_1.s | 59 ++++++++++++ ghc/rts/gmp/mpn/m88k/mul_1.s | 127 +++++++++++++++++++++++++ ghc/rts/gmp/mpn/m88k/sub_n.s | 106 +++++++++++++++++++++ ghc/rts/gmp/mpn/mips2/add_n.s | 120 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/mips2/addmul_1.s | 97 +++++++++++++++++++ ghc/rts/gmp/mpn/mips2/lshift.s | 95 +++++++++++++++++++ ghc/rts/gmp/mpn/mips2/mul_1.s | 85 +++++++++++++++++ ghc/rts/gmp/mpn/mips2/rshift.s | 92 ++++++++++++++++++ ghc/rts/gmp/mpn/mips2/sub_n.s | 120 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/mips2/submul_1.s | 97 +++++++++++++++++++ ghc/rts/gmp/mpn/mips3/add_n.s | 120 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/mips3/addmul_1.s | 97 +++++++++++++++++++ ghc/rts/gmp/mpn/mips3/lshift.s | 95 +++++++++++++++++++ ghc/rts/gmp/mpn/mips3/mul_1.s | 85 +++++++++++++++++ ghc/rts/gmp/mpn/mips3/rshift.s | 92 ++++++++++++++++++ ghc/rts/gmp/mpn/mips3/sub_n.s | 120 ++++++++++++++++++++++++ ghc/rts/gmp/mpn/mips3/submul_1.s | 97 +++++++++++++++++++ ghc/rts/gmp/mpn/ns32k/add_n.s | 46 +++++++++ ghc/rts/gmp/mpn/ns32k/addmul_1.s | 48 ++++++++++ ghc/rts/gmp/mpn/ns32k/mul_1.s | 47 ++++++++++ ghc/rts/gmp/mpn/ns32k/sub_n.s | 46 +++++++++ ghc/rts/gmp/mpn/ns32k/submul_1.s | 48 ++++++++++ ghc/rts/gmp/mpn/power/add_n.s | 81 ++++++++++++++++ ghc/rts/gmp/mpn/power/addmul_1.s | 123 +++++++++++++++++++++++++ ghc/rts/gmp/mpn/power/lshift.s | 59 ++++++++++++ ghc/rts/gmp/mpn/power/mul_1.s | 110 ++++++++++++++++++++++ ghc/rts/gmp/mpn/power/rshift.s | 57 ++++++++++++ ghc/rts/gmp/mpn/power/sub_n.s | 82 +++++++++++++++++ ghc/rts/gmp/mpn/power/submul_1.s | 128 ++++++++++++++++++++++++++ 37 files changed, 3079 insertions(+) create mode 100644 ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s create mode 100644 ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s create mode 100644 ghc/rts/gmp/mpn/i960/add_n.s create mode 100644 ghc/rts/gmp/mpn/i960/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/i960/mul_1.s create mode 100644 ghc/rts/gmp/mpn/i960/sub_n.s create mode 100644 ghc/rts/gmp/mpn/m88k/add_n.s create mode 100644 ghc/rts/gmp/mpn/m88k/mc88110/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/m88k/mc88110/mul_1.s create mode 100644 ghc/rts/gmp/mpn/m88k/mul_1.s create mode 100644 ghc/rts/gmp/mpn/m88k/sub_n.s create mode 100644 ghc/rts/gmp/mpn/mips2/add_n.s create mode 100644 ghc/rts/gmp/mpn/mips2/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/mips2/lshift.s create mode 100644 ghc/rts/gmp/mpn/mips2/mul_1.s create mode 100644 ghc/rts/gmp/mpn/mips2/rshift.s create mode 100644 ghc/rts/gmp/mpn/mips2/sub_n.s create mode 100644 ghc/rts/gmp/mpn/mips2/submul_1.s create mode 100644 ghc/rts/gmp/mpn/mips3/add_n.s create mode 100644 ghc/rts/gmp/mpn/mips3/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/mips3/lshift.s create mode 100644 ghc/rts/gmp/mpn/mips3/mul_1.s create mode 100644 ghc/rts/gmp/mpn/mips3/rshift.s create mode 100644 ghc/rts/gmp/mpn/mips3/sub_n.s create mode 100644 ghc/rts/gmp/mpn/mips3/submul_1.s create mode 100644 ghc/rts/gmp/mpn/ns32k/add_n.s create mode 100644 ghc/rts/gmp/mpn/ns32k/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/ns32k/mul_1.s create mode 100644 ghc/rts/gmp/mpn/ns32k/sub_n.s create mode 100644 ghc/rts/gmp/mpn/ns32k/submul_1.s create mode 100644 ghc/rts/gmp/mpn/power/add_n.s create mode 100644 ghc/rts/gmp/mpn/power/addmul_1.s create mode 100644 ghc/rts/gmp/mpn/power/lshift.s create mode 100644 ghc/rts/gmp/mpn/power/mul_1.s create mode 100644 ghc/rts/gmp/mpn/power/rshift.s create mode 100644 ghc/rts/gmp/mpn/power/sub_n.s create mode 100644 ghc/rts/gmp/mpn/power/submul_1.s diff --git a/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s new file mode 100644 index 0000000..845418c --- /dev/null +++ b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s @@ -0,0 +1,80 @@ +; HP-PA __mpn_rshift -- +; This is optimized for the PA7100, where is runs at 3.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __mpn_rshift +__mpn_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$0004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r29,%r22,%r20 + +L$loop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + vshd %r29,%r22,%r20 + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r29,%r22,%r20 + +L$rest addib,= 4,%r24,L$end1 + nop +L$eloop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r29,%r22,%r20 + +L$end1 stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +L$end2 stws,ma %r20,4(0,%r26) +L$0004 vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend diff --git a/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s new file mode 100644 index 0000000..1e1ebcf --- /dev/null +++ b/ghc/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s @@ -0,0 +1,76 @@ +; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. +; This is optimized for the PA7100, where is runs at 4.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + + .code + .export __mpn_sub_n +__mpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L$rest + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L$loop + subb %r20,%r19,%r28 + +L$rest addib,= 4,%r23,L$end + nop +L$eloop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L$eloop + subb %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend diff --git a/ghc/rts/gmp/mpn/i960/add_n.s b/ghc/rts/gmp/mpn/i960/add_n.s new file mode 100644 index 0000000..6e67482 --- /dev/null +++ b/ghc/rts/gmp/mpn/i960/add_n.s @@ -0,0 +1,43 @@ +# I960 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_add_n +___mpn_add_n: + mov 0,g6 # clear carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + addc g4,g5,g4 # main add + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, clears C bit + + mov g6,g0 + ret diff --git a/ghc/rts/gmp/mpn/i960/addmul_1.s b/ghc/rts/gmp/mpn/i960/addmul_1.s new file mode 100644 index 0000000..db53f64 --- /dev/null +++ b/ghc/rts/gmp/mpn/i960/addmul_1.s @@ -0,0 +1,48 @@ +# I960 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_mul_1 +___mpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + ld (g13)[g2*4],g5 + + addc g0,g6,g6 # relies on that C bit is clear + addc 0,g7,g7 + addc g5,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/ghc/rts/gmp/mpn/i960/mul_1.s b/ghc/rts/gmp/mpn/i960/mul_1.s new file mode 100644 index 0000000..4ccaeab --- /dev/null +++ b/ghc/rts/gmp/mpn/i960/mul_1.s @@ -0,0 +1,45 @@ +# I960 __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_mul_1 +___mpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + + addc g0,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/ghc/rts/gmp/mpn/i960/sub_n.s b/ghc/rts/gmp/mpn/i960/sub_n.s new file mode 100644 index 0000000..01b94a1 --- /dev/null +++ b/ghc/rts/gmp/mpn/i960/sub_n.s @@ -0,0 +1,43 @@ +# I960 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_sub_n +___mpn_sub_n: + mov 1,g6 # set carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + subc g4,g5,g4 # main subtract + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, cy will be 0 + + mov g6,g0 + ret diff --git a/ghc/rts/gmp/mpn/m88k/add_n.s b/ghc/rts/gmp/mpn/m88k/add_n.s new file mode 100644 index 0000000..1b09cce --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/add_n.s @@ -0,0 +1,104 @@ +; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___mpn_add_n +___mpn_add_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu.co r5,r0,r5 ; (clear carry as side effect) + mak r5,r5,3<4> + bcnd eq0,r5,Lzero + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + addu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; add 7 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; add 6 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; add 5 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; add 4 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; add 3 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; add 2 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; add 1 + 8r limbs + addu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/ghc/rts/gmp/mpn/m88k/mc88110/addmul_1.s b/ghc/rts/gmp/mpn/m88k/mc88110/addmul_1.s new file mode 100644 index 0000000..2bd6f21 --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/mc88110/addmul_1.s @@ -0,0 +1,61 @@ +; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___mpn_addmul_1 +___mpn_addmul_1: + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + subu r8,r8,4 + bcnd.n eq0,r4,Lend + mulu.d r10,r6,r5 + +Loop: ld r7,r8[r4] + ld r6,r3[r4] + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd ne0,r4,Loop + +Lend: ld r7,r8,0 + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8,0 + jmp.n r1 + addu.ci r2,r2,r0 diff --git a/ghc/rts/gmp/mpn/m88k/mc88110/mul_1.s b/ghc/rts/gmp/mpn/m88k/mc88110/mul_1.s new file mode 100644 index 0000000..1518900 --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/mc88110/mul_1.s @@ -0,0 +1,59 @@ +; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___mpn_mul_1 +___mpn_mul_1: + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n eq0,r4,Lend + subu r8,r8,8 + +Loop: ld r6,r3[r4] + addu.cio r9,r11,r2 + or r2,r10,r0 ; could be avoided if unrolled + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n ne0,r4,Loop + st r9,r8[r4] + +Lend: addu.cio r9,r11,r2 + st r9,r8,4 + jmp.n r1 + addu.ci r2,r10,r0 diff --git a/ghc/rts/gmp/mpn/m88k/mul_1.s b/ghc/rts/gmp/mpn/m88k/mul_1.s new file mode 100644 index 0000000..26626bf --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/mul_1.s @@ -0,0 +1,127 @@ +; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + +; Common overhead is about 11 cycles/invocation. + +; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention.) + +; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.) + +; To enhance speed: +; 1. Unroll main loop 4-8 times. +; 2. Schedule code to avoid WB contention. It might be tempting to move the +; ld instruction in the loops down to save 2 cycles (less WB contention), +; but that looses because the ultimate value will be read from outside +; the allocated space. But if we handle the ultimate multiplication in +; the tail, we can do this. +; 3. Make the multiplication with less instructions. I think the code for +; (S2_LIMB >= 0x10000) is not minimal. +; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or +; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11 +; cycles/limb. (Assuming infinite unrolling.) + + text + align 16 + global ___mpn_mul_1 +___mpn_mul_1: + + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + ld r9,r3[r4] + mask r7,r5,0xffff ; r7 = lo(S2_LIMB) + extu r8,r5,16 ; r8 = hi(S2_LIMB) + bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0) + subu r6,r6,4 + +; General code for any value of S2_LIMB. + + ; Make a stack frame and save r25 and r26 + subu r31,r31,16 + st.d r25,r31,8 + + ; Enter the loop in the middle + br.n L1 + addu r4,r4,1 + +Loop: ld r9,r3[r4] + st r26,r6[r4] +; bcnd ne0,r0,0 ; bubble + addu r4,r4,1 +L1: mul r26,r9,r5 ; low word of product mul_1 WB ld + mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1 + mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1 + mul r10,r12,r8 ; r10 = prod_1a mul_3 + extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1 + mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1 + mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2 + extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3 + addu r10,r10,r11 ; addu_1 WB extu_2 +; bcnd ne0,r0,0 ; bubble WB addu_1 + addu.co r10,r10,r12 ; WB mul_4 + mask.u r10,r10,0xffff ; move the 16 most significant bits... + addu.ci r10,r10,r0 ; ...to the low half of the word... + rot r10,r10,16 ; ...and put carry in pos 16. + addu.co r26,r26,r2 ; add old carry limb + bcnd.n ne0,r4,Loop + addu.ci r2,r25,r10 ; compute new carry limb + + st r26,r6[r4] + ld.d r25,r31,8 + jmp.n r1 + addu r31,r31,16 + +; Fast code for S2_LIMB < 0x10000 +Lsmall: + ; Enter the loop in the middle + br.n SL1 + addu r4,r4,1 + +SLoop: ld r9,r3[r4] ; + st r8,r6[r4] ; + addu r4,r4,1 ; +SL1: mul r8,r9,r5 ; low word of product + mask r12,r9,0xffff ; r12 = lo(s1_limb) + extu r13,r9,16 ; r13 = hi(s1_limb) + mul r11,r12,r7 ; r11 = prod_0 + mul r12,r13,r7 ; r12 = prod_1b + addu.cio r8,r8,r2 ; add old carry limb + extu r10,r11,16 ; r11 = hi(prod_0) + addu r10,r10,r12 ; + bcnd.n ne0,r4,SLoop + extu r2,r10,16 ; r2 = new carry limb + + jmp.n r1 + st r8,r6[r4] diff --git a/ghc/rts/gmp/mpn/m88k/sub_n.s b/ghc/rts/gmp/mpn/m88k/sub_n.s new file mode 100644 index 0000000..7dfffc9 --- /dev/null +++ b/ghc/rts/gmp/mpn/m88k/sub_n.s @@ -0,0 +1,106 @@ +; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___mpn_sub_n +___mpn_sub_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu r5,r0,r5 + mak r5,r5,3<4> + bcnd.n eq0,r5,Lzero + subu.co r0,r0,r0 ; initialize carry + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + subu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; subtract 7 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; subtract 6 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; subtract 5 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; subtract 4 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; subtract 3 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; subtract 2 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs + subu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/ghc/rts/gmp/mpn/mips2/add_n.s b/ghc/rts/gmp/mpn/mips2/add_n.s new file mode 100644 index 0000000..f5525ce --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/add_n.s @@ -0,0 +1,120 @@ + # MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_add_n diff --git a/ghc/rts/gmp/mpn/mips2/addmul_1.s b/ghc/rts/gmp/mpn/mips2/addmul_1.s new file mode 100644 index 0000000..6145771 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_addmul_1 diff --git a/ghc/rts/gmp/mpn/mips2/lshift.s b/ghc/rts/gmp/mpn/mips2/lshift.s new file mode 100644 index 0000000..ee92d79 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/lshift.s @@ -0,0 +1,95 @@ + # MIPS2 __mpn_lshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .set noreorder + .set nomacro + + sll $2,$6,2 + addu $5,$5,$2 # make r5 point at end of src + lw $10,-4($5) # load first limb + subu $13,$0,$7 + addu $4,$4,$2 # make r4 point at end of res + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + srl $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,-8($5) + addiu $4,$4,-4 + addiu $5,$5,-4 + addiu $9,$9,-1 + sll $11,$10,$7 + srl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,-8($5) + addiu $4,$4,-16 + addiu $6,$6,-4 + sll $11,$10,$7 + srl $12,$3,$13 + + lw $10,-12($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,12($4) + srl $9,$10,$13 + + lw $3,-16($5) + sll $11,$10,$7 + or $8,$14,$9 + sw $8,8($4) + srl $12,$3,$13 + + lw $10,-20($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,4($4) + srl $9,$10,$13 + + addiu $5,$5,-16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,0($4) + +.Lend: sll $8,$10,$7 + j $31 + sw $8,-4($4) + .end __mpn_lshift diff --git a/ghc/rts/gmp/mpn/mips2/mul_1.s b/ghc/rts/gmp/mpn/mips2/mul_1.s new file mode 100644 index 0000000..d006fa1 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/mul_1.s @@ -0,0 +1,85 @@ + # MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_mul_1 + .ent __mpn_mul_1 +__mpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + addiu $5,$5,4 + addu $10,$10,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sw $10,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + multu $8,$7 + sw $10,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + sw $10,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_mul_1 diff --git a/ghc/rts/gmp/mpn/mips2/rshift.s b/ghc/rts/gmp/mpn/mips2/rshift.s new file mode 100644 index 0000000..a8beb40 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/rshift.s @@ -0,0 +1,92 @@ + # MIPS2 __mpn_rshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .set noreorder + .set nomacro + + lw $10,0($5) # load first limb + subu $13,$0,$7 + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + sll $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,4($5) + addiu $4,$4,4 + addiu $5,$5,4 + addiu $9,$9,-1 + srl $11,$10,$7 + sll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,-4($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,4($5) + addiu $4,$4,16 + addiu $6,$6,-4 + srl $11,$10,$7 + sll $12,$3,$13 + + lw $10,8($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-16($4) + sll $9,$10,$13 + + lw $3,12($5) + srl $11,$10,$7 + or $8,$14,$9 + sw $8,-12($4) + sll $12,$3,$13 + + lw $10,16($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-8($4) + sll $9,$10,$13 + + addiu $5,$5,16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,-4($4) + +.Lend: srl $8,$10,$7 + j $31 + sw $8,0($4) + .end __mpn_rshift diff --git a/ghc/rts/gmp/mpn/mips2/sub_n.s b/ghc/rts/gmp/mpn/mips2/sub_n.s new file mode 100644 index 0000000..3368ef2 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/sub_n.s @@ -0,0 +1,120 @@ + # MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_sub_n diff --git a/ghc/rts/gmp/mpn/mips2/submul_1.s b/ghc/rts/gmp/mpn/mips2/submul_1.s new file mode 100644 index 0000000..1324b66 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips2/submul_1.s @@ -0,0 +1,97 @@ + # MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_submul_1 + .ent __mpn_submul_1 +__mpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_submul_1 diff --git a/ghc/rts/gmp/mpn/mips3/add_n.s b/ghc/rts/gmp/mpn/mips3/add_n.s new file mode 100644 index 0000000..996a449 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/add_n.s @@ -0,0 +1,120 @@ + # MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_add_n diff --git a/ghc/rts/gmp/mpn/mips3/addmul_1.s b/ghc/rts/gmp/mpn/mips3/addmul_1.s new file mode 100644 index 0000000..cd75c18 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_addmul_1 diff --git a/ghc/rts/gmp/mpn/mips3/lshift.s b/ghc/rts/gmp/mpn/mips3/lshift.s new file mode 100644 index 0000000..324a602 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/lshift.s @@ -0,0 +1,95 @@ + # MIPS3 __mpn_lshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .set noreorder + .set nomacro + + dsll $2,$6,3 + daddu $5,$5,$2 # make r5 point at end of src + ld $10,-8($5) # load first limb + dsubu $13,$0,$7 + daddu $4,$4,$2 # make r4 point at end of res + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsrl $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,-16($5) + daddiu $4,$4,-8 + daddiu $5,$5,-8 + daddiu $9,$9,-1 + dsll $11,$10,$7 + dsrl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,-16($5) + daddiu $4,$4,-32 + daddiu $6,$6,-4 + dsll $11,$10,$7 + dsrl $12,$3,$13 + + ld $10,-24($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,24($4) + dsrl $9,$10,$13 + + ld $3,-32($5) + dsll $11,$10,$7 + or $8,$14,$9 + sd $8,16($4) + dsrl $12,$3,$13 + + ld $10,-40($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,8($4) + dsrl $9,$10,$13 + + daddiu $5,$5,-32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,0($4) + +.Lend: dsll $8,$10,$7 + j $31 + sd $8,-8($4) + .end __mpn_lshift diff --git a/ghc/rts/gmp/mpn/mips3/mul_1.s b/ghc/rts/gmp/mpn/mips3/mul_1.s new file mode 100644 index 0000000..281d057 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/mul_1.s @@ -0,0 +1,85 @@ + # MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_mul_1 + .ent __mpn_mul_1 +__mpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + daddiu $5,$5,8 + daddu $10,$10,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sd $10,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + dmultu $8,$7 + sd $10,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + sd $10,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_mul_1 diff --git a/ghc/rts/gmp/mpn/mips3/rshift.s b/ghc/rts/gmp/mpn/mips3/rshift.s new file mode 100644 index 0000000..9920e1a --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/rshift.s @@ -0,0 +1,92 @@ + # MIPS3 __mpn_rshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .set noreorder + .set nomacro + + ld $10,0($5) # load first limb + dsubu $13,$0,$7 + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsll $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,8($5) + daddiu $4,$4,8 + daddiu $5,$5,8 + daddiu $9,$9,-1 + dsrl $11,$10,$7 + dsll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,-8($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,8($5) + daddiu $4,$4,32 + daddiu $6,$6,-4 + dsrl $11,$10,$7 + dsll $12,$3,$13 + + ld $10,16($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-32($4) + dsll $9,$10,$13 + + ld $3,24($5) + dsrl $11,$10,$7 + or $8,$14,$9 + sd $8,-24($4) + dsll $12,$3,$13 + + ld $10,32($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-16($4) + dsll $9,$10,$13 + + daddiu $5,$5,32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,-8($4) + +.Lend: dsrl $8,$10,$7 + j $31 + sd $8,0($4) + .end __mpn_rshift diff --git a/ghc/rts/gmp/mpn/mips3/sub_n.s b/ghc/rts/gmp/mpn/mips3/sub_n.s new file mode 100644 index 0000000..56c77d8 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/sub_n.s @@ -0,0 +1,120 @@ + # MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_sub_n diff --git a/ghc/rts/gmp/mpn/mips3/submul_1.s b/ghc/rts/gmp/mpn/mips3/submul_1.s new file mode 100644 index 0000000..a9c9fa2 --- /dev/null +++ b/ghc/rts/gmp/mpn/mips3/submul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_submul_1 + .ent __mpn_submul_1 +__mpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_submul_1 diff --git a/ghc/rts/gmp/mpn/ns32k/add_n.s b/ghc/rts/gmp/mpn/ns32k/add_n.s new file mode 100644 index 0000000..dde2e15 --- /dev/null +++ b/ghc/rts/gmp/mpn/ns32k/add_n.s @@ -0,0 +1,46 @@ +# ns32000 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_add_n +___mpn_add_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + addcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/ghc/rts/gmp/mpn/ns32k/addmul_1.s b/ghc/rts/gmp/mpn/ns32k/addmul_1.s new file mode 100644 index 0000000..205bfe3 --- /dev/null +++ b/ghc/rts/gmp/mpn/ns32k/addmul_1.s @@ -0,0 +1,48 @@ +# ns32000 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_addmul_1 +___mpn_addmul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + addd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/ghc/rts/gmp/mpn/ns32k/mul_1.s b/ghc/rts/gmp/mpn/ns32k/mul_1.s new file mode 100644 index 0000000..64e4abb --- /dev/null +++ b/ghc/rts/gmp/mpn/ns32k/mul_1.s @@ -0,0 +1,47 @@ +# ns32000 __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_mul_1 +___mpn_mul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + movd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/ghc/rts/gmp/mpn/ns32k/sub_n.s b/ghc/rts/gmp/mpn/ns32k/sub_n.s new file mode 100644 index 0000000..ef6c889 --- /dev/null +++ b/ghc/rts/gmp/mpn/ns32k/sub_n.s @@ -0,0 +1,46 @@ +# ns32000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_sub_n +___mpn_sub_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + subcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/ghc/rts/gmp/mpn/ns32k/submul_1.s b/ghc/rts/gmp/mpn/ns32k/submul_1.s new file mode 100644 index 0000000..5093095 --- /dev/null +++ b/ghc/rts/gmp/mpn/ns32k/submul_1.s @@ -0,0 +1,48 @@ +# ns32000 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_submul_1 +___mpn_submul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + subd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/ghc/rts/gmp/mpn/power/add_n.s b/ghc/rts/gmp/mpn/power/add_n.s new file mode 100644 index 0000000..9e1c948 --- /dev/null +++ b/ghc/rts/gmp/mpn/power/add_n.s @@ -0,0 +1,81 @@ +# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_add_n[DS] + .extern .__mpn_add_n +.csect [PR] + .align 2 + .globl __mpn_add_n + .globl .__mpn_add_n + .csect __mpn_add_n[DS] +__mpn_add_n: + .long .__mpn_add_n, TOC[tc0], 0 + .csect [PR] +.__mpn_add_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + a 7,0,8 # add least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + aze 3,10 # use the fact that r10 is zero... + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 # add limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + ae 11,9,10 # add previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + ae 7,0,8 # add previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: ae 11,9,10 # add limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + lil 3,0 # load cy into ... + aze 3,3 # ... return value register + br diff --git a/ghc/rts/gmp/mpn/power/addmul_1.s b/ghc/rts/gmp/mpn/power/addmul_1.s new file mode 100644 index 0000000..2db6984 --- /dev/null +++ b/ghc/rts/gmp/mpn/power/addmul_1.s @@ -0,0 +1,123 @@ +# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_addmul_1[PR] + .align 2 + .globl __mpn_addmul_1 + .globl .__mpn_addmul_1 + .csect __mpn_addmul_1[DS] +__mpn_addmul_1: + .long .__mpn_addmul_1[PR], TOC[tc0], 0 + .csect .__mpn_addmul_1[PR] +.__mpn_addmul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 # add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/ghc/rts/gmp/mpn/power/lshift.s b/ghc/rts/gmp/mpn/power/lshift.s new file mode 100644 index 0000000..38169bf --- /dev/null +++ b/ghc/rts/gmp/mpn/power/lshift.s @@ -0,0 +1,59 @@ +# IBM POWER __mpn_lshift -- + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .extern __mpn_lshift[DS] + .extern .__mpn_lshift +.csect [PR] + .align 2 + .globl __mpn_lshift + .globl .__mpn_lshift + .csect __mpn_lshift[DS] +__mpn_lshift: + .long .__mpn_lshift, TOC[tc0], 0 + .csect [PR] +.__mpn_lshift: + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + lu 0,-4(4) # read most significant limb + sre 3,0,8 # compute carry out limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,-4(4) # read 2:nd most significant limb + sreq 7,0,8 # compute most significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,-4(4) # load next lower limb + stu 7,-4(9) # store previous result during read latency + sreq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,-4(9) # store 2:nd least significant limb +Lend2: sle 7,0,6 # compute least significant limb + st 7,-4(9) # store it" \ + br diff --git a/ghc/rts/gmp/mpn/power/mul_1.s b/ghc/rts/gmp/mpn/power/mul_1.s new file mode 100644 index 0000000..a72bce6 --- /dev/null +++ b/ghc/rts/gmp/mpn/power/mul_1.s @@ -0,0 +1,110 @@ +# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_mul_1[PR] + .align 2 + .globl __mpn_mul_1 + .globl .__mpn_mul_1 + .csect __mpn_mul_1[DS] +__mpn_mul_1: + .long .__mpn_mul_1[PR], TOC[tc0], 0 + .csect .__mpn_mul_1[PR] +.__mpn_mul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 # reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/ghc/rts/gmp/mpn/power/rshift.s b/ghc/rts/gmp/mpn/power/rshift.s new file mode 100644 index 0000000..30d408a --- /dev/null +++ b/ghc/rts/gmp/mpn/power/rshift.s @@ -0,0 +1,57 @@ +# IBM POWER __mpn_rshift -- + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .extern __mpn_rshift[DS] + .extern .__mpn_rshift +.csect [PR] + .align 2 + .globl __mpn_rshift + .globl .__mpn_rshift + .csect __mpn_rshift[DS] +__mpn_rshift: + .long .__mpn_rshift, TOC[tc0], 0 + .csect [PR] +.__mpn_rshift: + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + l 0,0(4) # read least significant limb + ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s + sle 3,0,8 # compute carry limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,4(4) # read 2:nd least significant limb + sleq 7,0,8 # compute least significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,4(4) # load next higher limb + stu 7,4(9) # store previous result during read latency + sleq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,4(9) # store 2:nd most significant limb +Lend2: sre 7,0,6 # compute most significant limb + st 7,4(9) # store it" \ + br diff --git a/ghc/rts/gmp/mpn/power/sub_n.s b/ghc/rts/gmp/mpn/power/sub_n.s new file mode 100644 index 0000000..30d4fee --- /dev/null +++ b/ghc/rts/gmp/mpn/power/sub_n.s @@ -0,0 +1,82 @@ +# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_sub_n[DS] + .extern .__mpn_sub_n +.csect [PR] + .align 2 + .globl __mpn_sub_n + .globl .__mpn_sub_n + .csect __mpn_sub_n[DS] +__mpn_sub_n: + .long .__mpn_sub_n, TOC[tc0], 0 + .csect [PR] +.__mpn_sub_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + sf 7,0,8 # subtract least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 # subtract limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + sfe 11,10,9 # subtract previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + sfe 7,0,8 # subtract previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: sfe 11,10,9 # subtract limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br diff --git a/ghc/rts/gmp/mpn/power/submul_1.s b/ghc/rts/gmp/mpn/power/submul_1.s new file mode 100644 index 0000000..8e5946f --- /dev/null +++ b/ghc/rts/gmp/mpn/power/submul_1.s @@ -0,0 +1,128 @@ +# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_submul_1[PR] + .align 2 + .globl __mpn_submul_1 + .globl .__mpn_submul_1 + .csect __mpn_submul_1[DS] +__mpn_submul_1: + .long .__mpn_submul_1[PR], TOC[tc0], 0 + .csect .__mpn_submul_1[PR] +.__mpn_submul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 # invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br -- 1.7.10.4