+++ /dev/null
-! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
-! sum in a third limb vector.
-
-! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-! MA 02111-1307, USA.
-
-
-! INPUT PARAMETERS
-! res_ptr %o0
-! s1_ptr %o1
-! s2_ptr %o2
-! size %o3
-
-include(`../config.m4')
-
-ASM_START()
- .register %g2,#scratch
- .register %g3,#scratch
-PROLOGUE(mpn_add_n)
-
-! 12 mem ops >= 12 cycles
-! 8 shift insn >= 8 cycles
-! 8 addccc, executing alone, +8 cycles
-! Unrolling not mandatory...perhaps 2-way is best?
-! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
-! All in all, it runs at 5 cycles/limb
-
- save %sp,-160,%sp
-
- addcc %g0,%g0,%g0
-
- add %i3,-4,%i3
- brlz,pn %i3,L(there)
- nop
-
- ldx [%i1+0],%l0
- ldx [%i2+0],%l4
- ldx [%i1+8],%l1
- ldx [%i2+8],%l5
- ldx [%i1+16],%l2
- ldx [%i2+16],%l6
- ldx [%i1+24],%l3
- ldx [%i2+24],%l7
- add %i1,32,%i1
- add %i2,32,%i2
-
- add %i3,-4,%i3
- brlz,pn %i3,L(skip)
- nop
- b L(loop1) ! jump instead of executing many NOPs
- nop
- ALIGN(32)
-!--------- Start main loop ---------
-L(loop1):
- addccc %l0,%l4,%g1
-!-
- srlx %l0,32,%o0
- ldx [%i1+0],%l0
-!-
- srlx %l4,32,%o4
- ldx [%i2+0],%l4
-!-
- addccc %o0,%o4,%g0
-!-
- addccc %l1,%l5,%g2
-!-
- srlx %l1,32,%o1
- ldx [%i1+8],%l1
-!-
- srlx %l5,32,%o5
- ldx [%i2+8],%l5
-!-
- addccc %o1,%o5,%g0
-!-
- addccc %l2,%l6,%g3
-!-
- srlx %l2,32,%o2
- ldx [%i1+16],%l2
-!-
- srlx %l6,32,%g5 ! asymmetry
- ldx [%i2+16],%l6
-!-
- addccc %o2,%g5,%g0
-!-
- addccc %l3,%l7,%g4
-!-
- srlx %l3,32,%o3
- ldx [%i1+24],%l3
- add %i1,32,%i1
-!-
- srlx %l7,32,%o7
- ldx [%i2+24],%l7
- add %i2,32,%i2
-!-
- addccc %o3,%o7,%g0
-!-
- stx %g1,[%i0+0]
-!-
- stx %g2,[%i0+8]
-!-
- stx %g3,[%i0+16]
- add %i3,-4,%i3
-!-
- stx %g4,[%i0+24]
- add %i0,32,%i0
-
- brgez,pt %i3,L(loop1)
- nop
-!--------- End main loop ---------
-L(skip):
- addccc %l0,%l4,%g1
- srlx %l0,32,%o0
- srlx %l4,32,%o4
- addccc %o0,%o4,%g0
- addccc %l1,%l5,%g2
- srlx %l1,32,%o1
- srlx %l5,32,%o5
- addccc %o1,%o5,%g0
- addccc %l2,%l6,%g3
- srlx %l2,32,%o2
- srlx %l6,32,%g5 ! asymmetry
- addccc %o2,%g5,%g0
- addccc %l3,%l7,%g4
- srlx %l3,32,%o3
- srlx %l7,32,%o7
- addccc %o3,%o7,%g0
- stx %g1,[%i0+0]
- stx %g2,[%i0+8]
- stx %g3,[%i0+16]
- stx %g4,[%i0+24]
- add %i0,32,%i0
-
-L(there):
- add %i3,4,%i3
- brz,pt %i3,L(end)
- nop
-
-L(loop2):
- ldx [%i1+0],%l0
- add %i1,8,%i1
- ldx [%i2+0],%l4
- add %i2,8,%i2
- srlx %l0,32,%g2
- srlx %l4,32,%g3
- addccc %l0,%l4,%g1
- addccc %g2,%g3,%g0
- stx %g1,[%i0+0]
- add %i0,8,%i0
- add %i3,-1,%i3
- brgz,pt %i3,L(loop2)
- nop
-
-L(end): addc %g0,%g0,%i0
- ret
- restore
-EPILOGUE(mpn_add_n)