Reorganisation of the source tree
[ghc-hetmet.git] / rts / gmp / mpn / alpha / ev5 / rshift.asm
1 dnl  Alpha EV5 __gmpn_rshift -- Shift a number right.
2
3 dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of the GNU Lesser General Public License as published by
9 dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
10 dnl  option) any later version.
11
12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 dnl  License for more details.
16
17 dnl  You should have received a copy of the GNU Lesser General Public License
18 dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19 dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 dnl  MA 02111-1307, USA.
21
22 include(`../config.m4')
23
24 dnl  INPUT PARAMETERS
25 dnl  res_ptr    r16
26 dnl  s1_ptr     r17
27 dnl  size       r18
28 dnl  cnt        r19
29
30 dnl  This code runs at 3.25 cycles/limb on the EV5.
31
32 ASM_START()
33 PROLOGUE(mpn_rshift)
34         ldq     r4,0(r17)       C load first limb
35         subq    r31,r19,r20
36         subq    r18,1,r18
37         and     r18,4-1,r28     C number of limbs in first loop
38         sll     r4,r20,r0       C compute function result
39
40         beq     r28,$L0
41         subq    r18,r28,r18
42
43         ALIGN(8)
44 $Loop0: ldq     r3,8(r17)
45         addq    r16,8,r16
46         srl     r4,r19,r5
47         addq    r17,8,r17
48         subq    r28,1,r28
49         sll     r3,r20,r6
50         bis     r3,r3,r4
51         bis     r5,r6,r8
52         stq     r8,-8(r16)
53         bne     r28,$Loop0
54
55 $L0:    srl     r4,r19,r24
56         beq     r18,$Lend
57 C warm up phase 1
58         ldq     r1,8(r17)
59         subq    r18,4,r18
60         ldq     r2,16(r17)
61         ldq     r3,24(r17)
62         ldq     r4,32(r17)
63         beq     r18,$Lend1
64 C warm up phase 2
65         sll     r1,r20,r7
66         srl     r1,r19,r21
67         sll     r2,r20,r8
68         ldq     r1,40(r17)
69         srl     r2,r19,r22
70         ldq     r2,48(r17)
71         sll     r3,r20,r5
72         bis     r7,r24,r7
73         srl     r3,r19,r23
74         bis     r8,r21,r8
75         sll     r4,r20,r6
76         ldq     r3,56(r17)
77         srl     r4,r19,r24
78         ldq     r4,64(r17)
79         subq    r18,4,r18
80         beq     r18,$Lend2
81         ALIGN(16)
82 C main loop
83 $Loop:  stq     r7,0(r16)
84         bis     r5,r22,r5
85         stq     r8,8(r16)
86         bis     r6,r23,r6
87
88         sll     r1,r20,r7
89         subq    r18,4,r18
90         srl     r1,r19,r21
91         unop    C ldq   r31,-96(r17)
92
93         sll     r2,r20,r8
94         ldq     r1,72(r17)
95         srl     r2,r19,r22
96         ldq     r2,80(r17)
97
98         stq     r5,16(r16)
99         bis     r7,r24,r7
100         stq     r6,24(r16)
101         bis     r8,r21,r8
102
103         sll     r3,r20,r5
104         unop    C ldq   r31,-96(r17)
105         srl     r3,r19,r23
106         addq    r16,32,r16
107
108         sll     r4,r20,r6
109         ldq     r3,88(r17)
110         srl     r4,r19,r24
111         ldq     r4,96(r17)
112
113         addq    r17,32,r17
114         bne     r18,$Loop
115 C cool down phase 2/1
116 $Lend2: stq     r7,0(r16)
117         bis     r5,r22,r5
118         stq     r8,8(r16)
119         bis     r6,r23,r6
120         sll     r1,r20,r7
121         srl     r1,r19,r21
122         sll     r2,r20,r8
123         srl     r2,r19,r22
124         stq     r5,16(r16)
125         bis     r7,r24,r7
126         stq     r6,24(r16)
127         bis     r8,r21,r8
128         sll     r3,r20,r5
129         srl     r3,r19,r23
130         sll     r4,r20,r6
131         srl     r4,r19,r24
132 C cool down phase 2/2
133         stq     r7,32(r16)
134         bis     r5,r22,r5
135         stq     r8,40(r16)
136         bis     r6,r23,r6
137         stq     r5,48(r16)
138         stq     r6,56(r16)
139 C cool down phase 2/3
140         stq     r24,64(r16)
141         ret     r31,(r26),1
142
143 C cool down phase 1/1
144 $Lend1: sll     r1,r20,r7
145         srl     r1,r19,r21
146         sll     r2,r20,r8
147         srl     r2,r19,r22
148         sll     r3,r20,r5
149         bis     r7,r24,r7
150         srl     r3,r19,r23
151         bis     r8,r21,r8
152         sll     r4,r20,r6
153         srl     r4,r19,r24
154 C cool down phase 1/2
155         stq     r7,0(r16)
156         bis     r5,r22,r5
157         stq     r8,8(r16)
158         bis     r6,r23,r6
159         stq     r5,16(r16)
160         stq     r6,24(r16)
161         stq     r24,32(r16)
162         ret     r31,(r26),1
163
164 $Lend:  stq     r24,0(r16)
165         ret     r31,(r26),1
166 EPILOGUE(mpn_rshift)
167 ASM_END()