dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 2.1 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, dnl MA 02111-1307, USA. include(`../config.m4') C INPUT PARAMETERS C res_ptr i0 C s1_ptr i1 C size i2 C s2_limb i3 ASM_START() .register %g2,#scratch .register %g3,#scratch PROLOGUE(mpn_submul_1) save %sp,-256,%sp C We store 0.0 in f10 and keep it invariant accross thw two C function calls below. Note that this is not ABI conformant, C but since the functions are local, that's acceptable. ifdef(`PIC', `L(pc): rd %pc,%o7 ld [%o7+L(noll)-L(pc)],%f10', ` sethi %hh(L(noll)),%g2 sethi %lm(L(noll)),%g1 or %g2,%hm(L(noll)),%g2 or %g1,%lo(L(noll)),%g1 sllx %g2,32,%g2 ld [%g1+%g2],%f10') sub %i1,%i0,%g1 srlx %g1,3,%g1 cmp %g1,%i2 bcc,pt %xcc,L(nooverlap) nop sllx %i2,3,%g2 C compute stack allocation byte count add %g2,15,%o0 and %o0,-16,%o0 sub %sp,%o0,%sp add %sp,2223,%o0 mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp call mpn_copyi mov %i2,%o2 C copy n to mpn_copyi's count parameter add %sp,2223,%i1 L(nooverlap): C First multiply-add with low 32 bits of s2_limb mov %i0,%o0 mov %i1,%o1 add %i2,%i2,%o2 call submull srl %i3,0,%o3 mov %o0,%l0 C keep carry-out from accmull C Now multiply-add with high 32 bits of s2_limb, unless it is zero. srlx %i3,32,%o3 brz,a,pn %o3,L(small) mov %o0,%i0 mov %i1,%o1 add %i2,%i2,%o2 call submulu add %i0,4,%o0 add %l0,%o0,%i0 L(small): ret restore %g0,%g0,%g0 EPILOGUE(mpn_submul_1) C Put a zero in the text segment to allow us to t the address C quickly when compiling for PIC TEXT ALIGN(4) L(noll): .word 0 define(`LO',`(+4)') define(`HI',`(-4)') define(`DLO',`(+4)') define(`DHI',`(-4)') define(`LOWPART') define(`E',`L(l.$1)') include_mpn(`sparc64/submul1h.asm') define(`DLO',`(-4)') define(`DHI',`(+4)') undefine(`LOWPART') define(`E',`L(u.$1)') include_mpn(`sparc64/submul1h.asm')