remove empty dir
[ghc-hetmet.git] / rts / gmp / mpn / alpha / ev5 / sub_n.asm
1 dnl  Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
2 dnl  and store difference in a third limb vector.
3
4 dnl  Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
5
6 dnl  This file is part of the GNU MP Library.
7
8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl  it under the terms of the GNU Lesser General Public License as published by
10 dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
11 dnl  option) any later version.
12
13 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 dnl  License for more details.
17
18 dnl  You should have received a copy of the GNU Lesser General Public License
19 dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
20 dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21 dnl  MA 02111-1307, USA.
22
23 include(`../config.m4')
24
25 dnl  INPUT PARAMETERS
26 dnl  res_ptr    r16
27 dnl  s1_ptr     r17
28 dnl  s2_ptr     r18
29 dnl  size       r19
30
31 ASM_START()
32 PROLOGUE(mpn_sub_n)
33         bis     r31,r31,r25             C clear cy
34         subq    r19,4,r19               C decr loop cnt
35         blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
36 C Start software pipeline for 1st loop
37         ldq     r0,0(r18)
38         ldq     r4,0(r17)
39         ldq     r1,8(r18)
40         ldq     r5,8(r17)
41         addq    r17,32,r17              C update s1_ptr
42         ldq     r2,16(r18)
43         subq    r4,r0,r20               C 1st main subtract
44         ldq     r3,24(r18)
45         subq    r19,4,r19               C decr loop cnt
46         ldq     r6,-16(r17)
47         cmpult  r4,r0,r25               C compute cy from last subtract
48         ldq     r7,-8(r17)
49         subq    r5,r1,r28               C 2nd main subtract
50         addq    r18,32,r18              C update s2_ptr
51         subq    r28,r25,r21             C 2nd carry subtract
52         cmpult  r5,r1,r8                C compute cy from last subtract
53         blt     r19,$Lend1              C if less than 4 limbs remain, jump
54 C 1st loop handles groups of 4 limbs in a software pipeline
55         ALIGN(16)
56 $Loop:  cmpult  r28,r25,r25             C compute cy from last subtract
57         ldq     r0,0(r18)
58         bis     r8,r25,r25              C combine cy from the two subtracts
59         ldq     r1,8(r18)
60         subq    r6,r2,r28               C 3rd main subtract
61         ldq     r4,0(r17)
62         subq    r28,r25,r22             C 3rd carry subtract
63         ldq     r5,8(r17)
64         cmpult  r6,r2,r8                C compute cy from last subtract
65         cmpult  r28,r25,r25             C compute cy from last subtract
66         stq     r20,0(r16)
67         bis     r8,r25,r25              C combine cy from the two subtracts
68         stq     r21,8(r16)
69         subq    r7,r3,r28               C 4th main subtract
70         subq    r28,r25,r23             C 4th carry subtract
71         cmpult  r7,r3,r8                C compute cy from last subtract
72         cmpult  r28,r25,r25             C compute cy from last subtract
73                 addq    r17,32,r17              C update s1_ptr
74         bis     r8,r25,r25              C combine cy from the two subtracts
75                 addq    r16,32,r16              C update res_ptr
76         subq    r4,r0,r28               C 1st main subtract
77         ldq     r2,16(r18)
78         subq    r28,r25,r20             C 1st carry subtract
79         ldq     r3,24(r18)
80         cmpult  r4,r0,r8                C compute cy from last subtract
81         ldq     r6,-16(r17)
82         cmpult  r28,r25,r25             C compute cy from last subtract
83         ldq     r7,-8(r17)
84         bis     r8,r25,r25              C combine cy from the two subtracts
85         subq    r19,4,r19               C decr loop cnt
86         stq     r22,-16(r16)
87         subq    r5,r1,r28               C 2nd main subtract
88         stq     r23,-8(r16)
89         subq    r28,r25,r21             C 2nd carry subtract
90                 addq    r18,32,r18              C update s2_ptr
91         cmpult  r5,r1,r8                C compute cy from last subtract
92         bge     r19,$Loop
93 C Finish software pipeline for 1st loop
94 $Lend1: cmpult  r28,r25,r25             C compute cy from last subtract
95         bis     r8,r25,r25              C combine cy from the two subtracts
96         subq    r6,r2,r28               C cy add
97         subq    r28,r25,r22             C 3rd main subtract
98         cmpult  r6,r2,r8                C compute cy from last subtract
99         cmpult  r28,r25,r25             C compute cy from last subtract
100         stq     r20,0(r16)
101         bis     r8,r25,r25              C combine cy from the two subtracts
102         stq     r21,8(r16)
103         subq    r7,r3,r28               C cy add
104         subq    r28,r25,r23             C 4th main subtract
105         cmpult  r7,r3,r8                C compute cy from last subtract
106         cmpult  r28,r25,r25             C compute cy from last subtract
107         bis     r8,r25,r25              C combine cy from the two subtracts
108         addq    r16,32,r16              C update res_ptr
109         stq     r22,-16(r16)
110         stq     r23,-8(r16)
111 $Lend2: addq    r19,4,r19               C restore loop cnt
112         beq     r19,$Lret
113 C Start software pipeline for 2nd loop
114         ldq     r0,0(r18)
115         ldq     r4,0(r17)
116         subq    r19,1,r19
117         beq     r19,$Lend0
118 C 2nd loop handles remaining 1-3 limbs
119         ALIGN(16)
120 $Loop0: subq    r4,r0,r28               C main subtract
121         cmpult  r4,r0,r8                C compute cy from last subtract
122         ldq     r0,8(r18)
123         ldq     r4,8(r17)
124         subq    r28,r25,r20             C carry subtract
125         addq    r18,8,r18
126         addq    r17,8,r17
127         stq     r20,0(r16)
128         cmpult  r28,r25,r25             C compute cy from last subtract
129         subq    r19,1,r19               C decr loop cnt
130         bis     r8,r25,r25              C combine cy from the two subtracts
131         addq    r16,8,r16
132         bne     r19,$Loop0
133 $Lend0: subq    r4,r0,r28               C main subtract
134         subq    r28,r25,r20             C carry subtract
135         cmpult  r4,r0,r8                C compute cy from last subtract
136         cmpult  r28,r25,r25             C compute cy from last subtract
137         stq     r20,0(r16)
138         bis     r8,r25,r25              C combine cy from the two subtracts
139
140 $Lret:  bis     r25,r31,r0              C return cy
141         ret     r31,(r26),1
142 EPILOGUE(mpn_sub_n)
143 ASM_END()