1 dnl Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
2 dnl store sum in a third limb vector.
4 dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published by
10 dnl the Free Software Foundation; either version 2.1 of the License, or (at your
11 dnl option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20 dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21 dnl MA 02111-1307, USA.
23 include(`../config.m4')
33 bis r31,r31,r25 C clear cy
34 subq r19,4,r19 C decr loop cnt
35 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
36 C Start software pipeline for 1st loop
41 addq r17,32,r17 C update s1_ptr
43 addq r0,r4,r20 C 1st main add
45 subq r19,4,r19 C decr loop cnt
47 cmpult r20,r0,r25 C compute cy from last add
49 addq r1,r5,r28 C 2nd main add
50 addq r18,32,r18 C update s2_ptr
51 addq r28,r25,r21 C 2nd carry add
52 cmpult r28,r5,r8 C compute cy from last add
53 blt r19,$Lend1 C if less than 4 limbs remain, jump
54 C 1st loop handles groups of 4 limbs in a software pipeline
56 $Loop: cmpult r21,r28,r25 C compute cy from last add
58 bis r8,r25,r25 C combine cy from the two adds
60 addq r2,r6,r28 C 3rd main add
62 addq r28,r25,r22 C 3rd carry add
64 cmpult r28,r6,r8 C compute cy from last add
65 cmpult r22,r28,r25 C compute cy from last add
67 bis r8,r25,r25 C combine cy from the two adds
69 addq r3,r7,r28 C 4th main add
70 addq r28,r25,r23 C 4th carry add
71 cmpult r28,r7,r8 C compute cy from last add
72 cmpult r23,r28,r25 C compute cy from last add
73 addq r17,32,r17 C update s1_ptr
74 bis r8,r25,r25 C combine cy from the two adds
75 addq r16,32,r16 C update res_ptr
76 addq r0,r4,r28 C 1st main add
78 addq r25,r28,r20 C 1st carry add
80 cmpult r28,r4,r8 C compute cy from last add
82 cmpult r20,r28,r25 C compute cy from last add
84 bis r8,r25,r25 C combine cy from the two adds
85 subq r19,4,r19 C decr loop cnt
87 addq r1,r5,r28 C 2nd main add
89 addq r25,r28,r21 C 2nd carry add
90 addq r18,32,r18 C update s2_ptr
91 cmpult r28,r5,r8 C compute cy from last add
93 C Finish software pipeline for 1st loop
94 $Lend1: cmpult r21,r28,r25 C compute cy from last add
95 bis r8,r25,r25 C combine cy from the two adds
96 addq r2,r6,r28 C 3rd main add
97 addq r28,r25,r22 C 3rd carry add
98 cmpult r28,r6,r8 C compute cy from last add
99 cmpult r22,r28,r25 C compute cy from last add
101 bis r8,r25,r25 C combine cy from the two adds
103 addq r3,r7,r28 C 4th main add
104 addq r28,r25,r23 C 4th carry add
105 cmpult r28,r7,r8 C compute cy from last add
106 cmpult r23,r28,r25 C compute cy from last add
107 bis r8,r25,r25 C combine cy from the two adds
108 addq r16,32,r16 C update res_ptr
111 $Lend2: addq r19,4,r19 C restore loop cnt
113 C Start software pipeline for 2nd loop
118 C 2nd loop handles remaining 1-3 limbs
120 $Loop0: addq r0,r4,r28 C main add
122 cmpult r28,r4,r8 C compute cy from last add
124 addq r28,r25,r20 C carry add
128 cmpult r20,r28,r25 C compute cy from last add
129 subq r19,1,r19 C decr loop cnt
130 bis r8,r25,r25 C combine cy from the two adds
133 $Lend0: addq r0,r4,r28 C main add
134 addq r28,r25,r20 C carry add
135 cmpult r28,r4,r8 C compute cy from last add
136 cmpult r20,r28,r25 C compute cy from last add
138 bis r8,r25,r25 C combine cy from the two adds
140 $Lret: bis r25,r31,r0 C return cy