! SPARC v9 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and ! store difference in a third limb vector. ! Copyright (C) 1999, 2000 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at your ! option) any later version. ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public ! License for more details. ! You should have received a copy of the GNU Lesser General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, ! MA 02111-1307, USA. ! INPUT PARAMETERS ! res_ptr %o0 ! s1_ptr %o1 ! s2_ptr %o2 ! size %o3 include(`../config.m4') ASM_START() .register %g2,#scratch .register %g3,#scratch PROLOGUE(mpn_sub_n) ! 12 mem ops >= 12 cycles ! 8 shift insn >= 8 cycles ! 8 addccc, executing alone, +8 cycles ! Unrolling not mandatory...perhaps 2-way is best? ! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl ! All in all, it runs at 5 cycles/limb save %sp,-160,%sp addcc %g0,%g0,%g0 add %i3,-4,%i3 brlz,pn %i3,L(there) nop ldx [%i1+0],%l0 ldx [%i2+0],%l4 ldx [%i1+8],%l1 ldx [%i2+8],%l5 ldx [%i1+16],%l2 ldx [%i2+16],%l6 ldx [%i1+24],%l3 ldx [%i2+24],%l7 add %i1,32,%i1 add %i2,32,%i2 add %i3,-4,%i3 brlz,pn %i3,L(skip) nop b L(loop1) ! jump instead of executing many NOPs nop ALIGN(32) !--------- Start main loop --------- L(loop1): subccc %l0,%l4,%g1 !- srlx %l0,32,%o0 ldx [%i1+0],%l0 !- srlx %l4,32,%o4 ldx [%i2+0],%l4 !- subccc %o0,%o4,%g0 !- subccc %l1,%l5,%g2 !- srlx %l1,32,%o1 ldx [%i1+8],%l1 !- srlx %l5,32,%o5 ldx [%i2+8],%l5 !- subccc %o1,%o5,%g0 !- subccc %l2,%l6,%g3 !- srlx %l2,32,%o2 ldx [%i1+16],%l2 !- srlx %l6,32,%g5 ! asymmetry ldx [%i2+16],%l6 !- subccc %o2,%g5,%g0 !- subccc %l3,%l7,%g4 !- srlx %l3,32,%o3 ldx [%i1+24],%l3 add %i1,32,%i1 !- srlx %l7,32,%o7 ldx [%i2+24],%l7 add %i2,32,%i2 !- subccc %o3,%o7,%g0 !- stx %g1,[%i0+0] !- stx %g2,[%i0+8] !- stx %g3,[%i0+16] add %i3,-4,%i3 !- stx %g4,[%i0+24] add %i0,32,%i0 brgez,pt %i3,L(loop1) nop !--------- End main loop --------- L(skip): subccc %l0,%l4,%g1 srlx %l0,32,%o0 srlx %l4,32,%o4 subccc %o0,%o4,%g0 subccc %l1,%l5,%g2 srlx %l1,32,%o1 srlx %l5,32,%o5 subccc %o1,%o5,%g0 subccc %l2,%l6,%g3 srlx %l2,32,%o2 srlx %l6,32,%g5 ! asymmetry subccc %o2,%g5,%g0 subccc %l3,%l7,%g4 srlx %l3,32,%o3 srlx %l7,32,%o7 subccc %o3,%o7,%g0 stx %g1,[%i0+0] stx %g2,[%i0+8] stx %g3,[%i0+16] stx %g4,[%i0+24] add %i0,32,%i0 L(there): add %i3,4,%i3 brz,pt %i3,L(end) nop L(loop2): ldx [%i1+0],%l0 add %i1,8,%i1 ldx [%i2+0],%l4 add %i2,8,%i2 srlx %l0,32,%g2 srlx %l4,32,%g3 subccc %l0,%l4,%g1 subccc %g2,%g3,%g0 stx %g1,[%i0+0] add %i0,8,%i0 add %i3,-1,%i3 brgz,pt %i3,L(loop2) nop L(end): addc %g0,%g0,%i0 ret restore EPILOGUE(mpn_sub_n)