1 ; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and
2 ; subtract the result from a second limb vector.
4 ; Copyright (C) 1995 Free Software Foundation, Inc.
6 ; This file is part of the GNU MP Library.
8 ; The GNU MP Library is free software; you can redistribute it and/or modify
9 ; it under the terms of the GNU Library General Public License as published by
10 ; the Free Software Foundation; either version 2 of the License, or (at your
11 ; option) any later version.
13 ; The GNU MP Library is distributed in the hope that it will be useful, but
14 ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16 ; License for more details.
18 ; You should have received a copy of the GNU Library General Public License
19 ; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20 ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
39 #define hi1 %r23 /* safe to reuse */
44 .export __mpn_submul_1
47 .callinfo frame=128,no_calls
51 stws s2_limb,-16(%r30)
52 add %r0,%r0,cylimb ; clear cy and cylimb
53 addib,< -4,size,L$few_limbs
54 fldws -16(%r30),%fr31R
65 fldws,ma 4(s1_ptr),%fr4
67 xmpyu %fr4,%fr31R,%fr5
72 add s0,lo0,%r0 ; invert cy
73 addib,< -1,size,L$few_limbs
76 ; start software pipeline ----------------------------------------------------
77 L$0 fldds,ma 8(s1_ptr),%fr4
78 fldds,ma 8(s1_ptr),%fr8
80 xmpyu %fr4L,%fr31R,%fr5
81 xmpyu %fr4R,%fr31R,%fr6
82 xmpyu %fr8L,%fr31R,%fr9
83 xmpyu %fr8R,%fr31R,%fr10
104 addib,< -4,size,L$end
105 addc %r0,hi3,cylimb ; propagate carry into cylimb
106 ; main loop ------------------------------------------------------------------
107 L$loop fldds,ma 8(s1_ptr),%fr4
108 fldds,ma 8(s1_ptr),%fr8
111 xmpyu %fr4L,%fr31R,%fr5
113 xmpyu %fr4R,%fr31R,%fr6
115 xmpyu %fr8L,%fr31R,%fr9
117 xmpyu %fr8R,%fr31R,%fr10
127 subb %r0,%r0,lo0 ; these two insns ...
128 add lo0,lo0,%r0 ; ... just invert cy
140 stws,ma s0,4(res_ptr)
142 stws,ma s1,4(res_ptr)
144 stws,ma s2,4(res_ptr)
146 stws,ma s3,4(res_ptr)
148 addib,>= -4,size,L$loop
149 addc %r0,hi3,cylimb ; propagate carry into cylimb
150 ; finish software pipeline ---------------------------------------------------
151 L$end ldws 0(res_ptr),s0
157 stws,ma s0,4(res_ptr)
159 stws,ma s1,4(res_ptr)
161 stws,ma s2,4(res_ptr)
163 stws,ma s3,4(res_ptr)
164 subb %r0,%r0,lo0 ; these two insns ...
165 add lo0,lo0,%r0 ; ... invert cy
167 ; restore callee-saves registers ---------------------------------------------
175 addib,=,n 4,size,L$ret
176 L$loop2 fldws,ma 4(s1_ptr),%fr4
178 xmpyu %fr4,%fr31R,%fr5
185 add s0,lo0,%r0 ; invert cy
186 stws,ma s0,4(res_ptr)
187 addib,<> -1,size,L$loop2
190 L$ret addc %r0,cylimb,cylimb