+++ /dev/null
-; HP-PA 7100/7200 __gmpn_submul_1 -- Multiply a limb vector with a limb and
-; subtract the result from a second limb vector.
-
-; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
-
-; This file is part of the GNU MP Library.
-
-; The GNU MP Library is free software; you can redistribute it and/or modify
-; it under the terms of the GNU Lesser General Public License as published by
-; the Free Software Foundation; either version 2.1 of the License, or (at your
-; option) any later version.
-
-; The GNU MP Library is distributed in the hope that it will be useful, but
-; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-; License for more details.
-
-; You should have received a copy of the GNU Lesser General Public License
-; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-; MA 02111-1307, USA.
-
-; INPUT PARAMETERS
-#define res_ptr %r26
-#define s1_ptr %r25
-#define size %r24
-#define s2_limb %r23
-
-#define cylimb %r28
-#define s0 %r19
-#define s1 %r20
-#define s2 %r3
-#define s3 %r4
-#define lo0 %r21
-#define lo1 %r5
-#define lo2 %r6
-#define lo3 %r7
-#define hi0 %r22
-#define hi1 %r23 /* safe to reuse */
-#define hi2 %r29
-#define hi3 %r1
-
- .code
- .export __gmpn_submul_1
-__gmpn_submul_1
- .proc
- .callinfo frame=128,no_calls
- .entry
-
- ldo 128(%r30),%r30
- stws s2_limb,-16(%r30)
- add %r0,%r0,cylimb ; clear cy and cylimb
- addib,< -4,size,L$few_limbs
- fldws -16(%r30),%fr31R
-
- ldo -112(%r30),%r31
- stw %r3,-96(%r30)
- stw %r4,-92(%r30)
- stw %r5,-88(%r30)
- stw %r6,-84(%r30)
- stw %r7,-80(%r30)
-
- bb,>=,n s1_ptr,29,L$0
-
- fldws,ma 4(s1_ptr),%fr4
- ldws 0(res_ptr),s0
- xmpyu %fr4,%fr31R,%fr5
- fstds %fr5,-16(%r31)
- ldws -16(%r31),cylimb
- ldws -12(%r31),lo0
- sub s0,lo0,s0
- add s0,lo0,%r0 ; invert cy
- addib,< -1,size,L$few_limbs
- stws,ma s0,4(res_ptr)
-
-; start software pipeline ----------------------------------------------------
-L$0 fldds,ma 8(s1_ptr),%fr4
- fldds,ma 8(s1_ptr),%fr8
-
- xmpyu %fr4L,%fr31R,%fr5
- xmpyu %fr4R,%fr31R,%fr6
- xmpyu %fr8L,%fr31R,%fr9
- xmpyu %fr8R,%fr31R,%fr10
-
- fstds %fr5,-16(%r31)
- fstds %fr6,-8(%r31)
- fstds %fr9,0(%r31)
- fstds %fr10,8(%r31)
-
- ldws -16(%r31),hi0
- ldws -12(%r31),lo0
- ldws -8(%r31),hi1
- ldws -4(%r31),lo1
- ldws 0(%r31),hi2
- ldws 4(%r31),lo2
- ldws 8(%r31),hi3
- ldws 12(%r31),lo3
-
- addc lo0,cylimb,lo0
- addc lo1,hi0,lo1
- addc lo2,hi1,lo2
- addc lo3,hi2,lo3
-
- addib,< -4,size,L$end
- addc %r0,hi3,cylimb ; propagate carry into cylimb
-; main loop ------------------------------------------------------------------
-L$loop fldds,ma 8(s1_ptr),%fr4
- fldds,ma 8(s1_ptr),%fr8
-
- ldws 0(res_ptr),s0
- xmpyu %fr4L,%fr31R,%fr5
- ldws 4(res_ptr),s1
- xmpyu %fr4R,%fr31R,%fr6
- ldws 8(res_ptr),s2
- xmpyu %fr8L,%fr31R,%fr9
- ldws 12(res_ptr),s3
- xmpyu %fr8R,%fr31R,%fr10
-
- fstds %fr5,-16(%r31)
- sub s0,lo0,s0
- fstds %fr6,-8(%r31)
- subb s1,lo1,s1
- fstds %fr9,0(%r31)
- subb s2,lo2,s2
- fstds %fr10,8(%r31)
- subb s3,lo3,s3
- subb %r0,%r0,lo0 ; these two insns ...
- add lo0,lo0,%r0 ; ... just invert cy
-
- ldws -16(%r31),hi0
- ldws -12(%r31),lo0
- ldws -8(%r31),hi1
- ldws -4(%r31),lo1
- ldws 0(%r31),hi2
- ldws 4(%r31),lo2
- ldws 8(%r31),hi3
- ldws 12(%r31),lo3
-
- addc lo0,cylimb,lo0
- stws,ma s0,4(res_ptr)
- addc lo1,hi0,lo1
- stws,ma s1,4(res_ptr)
- addc lo2,hi1,lo2
- stws,ma s2,4(res_ptr)
- addc lo3,hi2,lo3
- stws,ma s3,4(res_ptr)
-
- addib,>= -4,size,L$loop
- addc %r0,hi3,cylimb ; propagate carry into cylimb
-; finish software pipeline ---------------------------------------------------
-L$end ldws 0(res_ptr),s0
- ldws 4(res_ptr),s1
- ldws 8(res_ptr),s2
- ldws 12(res_ptr),s3
-
- sub s0,lo0,s0
- stws,ma s0,4(res_ptr)
- subb s1,lo1,s1
- stws,ma s1,4(res_ptr)
- subb s2,lo2,s2
- stws,ma s2,4(res_ptr)
- subb s3,lo3,s3
- stws,ma s3,4(res_ptr)
- subb %r0,%r0,lo0 ; these two insns ...
- add lo0,lo0,%r0 ; ... invert cy
-
-; restore callee-saves registers ---------------------------------------------
- ldw -96(%r30),%r3
- ldw -92(%r30),%r4
- ldw -88(%r30),%r5
- ldw -84(%r30),%r6
- ldw -80(%r30),%r7
-
-L$few_limbs
- addib,=,n 4,size,L$ret
-L$loop2 fldws,ma 4(s1_ptr),%fr4
- ldws 0(res_ptr),s0
- xmpyu %fr4,%fr31R,%fr5
- fstds %fr5,-16(%r30)
- ldws -16(%r30),hi0
- ldws -12(%r30),lo0
- addc lo0,cylimb,lo0
- addc %r0,hi0,cylimb
- sub s0,lo0,s0
- add s0,lo0,%r0 ; invert cy
- stws,ma s0,4(res_ptr)
- addib,<> -1,size,L$loop2
- nop
-
-L$ret addc %r0,cylimb,cylimb
- bv 0(%r2)
- ldo -128(%r30),%r30
-
- .exit
- .procend