Reorganisation of the source tree
[ghc-hetmet.git] / rts / gmp / mpn / alpha / ev5 / lshift.asm
1 dnl  Alpha EV5 __gmpn_lshift -- Shift a number left.
2
3 dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of the GNU Lesser General Public License as published by
9 dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
10 dnl  option) any later version.
11
12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 dnl  License for more details.
16
17 dnl  You should have received a copy of the GNU Lesser General Public License
18 dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19 dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 dnl  MA 02111-1307, USA.
21
22 include(`../config.m4')
23
24 dnl  INPUT PARAMETERS
25 dnl  res_ptr    r16
26 dnl  s1_ptr     r17
27 dnl  size       r18
28 dnl  cnt        r19
29
30 dnl  This code runs at 3.25 cycles/limb on the EV5.
31
32 ASM_START()
33 PROLOGUE(mpn_lshift)
34         s8addq  r18,r17,r17     C make r17 point at end of s1
35         ldq     r4,-8(r17)      C load first limb
36         subq    r31,r19,r20
37         s8addq  r18,r16,r16     C make r16 point at end of RES
38         subq    r18,1,r18
39         and     r18,4-1,r28     C number of limbs in first loop
40         srl     r4,r20,r0       C compute function result
41
42         beq     r28,$L0
43         subq    r18,r28,r18
44
45         ALIGN(8)
46 $Loop0: ldq     r3,-16(r17)
47         subq    r16,8,r16
48         sll     r4,r19,r5
49         subq    r17,8,r17
50         subq    r28,1,r28
51         srl     r3,r20,r6
52         bis     r3,r3,r4
53         bis     r5,r6,r8
54         stq     r8,0(r16)
55         bne     r28,$Loop0
56
57 $L0:    sll     r4,r19,r24
58         beq     r18,$Lend
59 C warm up phase 1
60         ldq     r1,-16(r17)
61         subq    r18,4,r18
62         ldq     r2,-24(r17)
63         ldq     r3,-32(r17)
64         ldq     r4,-40(r17)
65         beq     r18,$Lend1
66 C warm up phase 2
67         srl     r1,r20,r7
68         sll     r1,r19,r21
69         srl     r2,r20,r8
70         ldq     r1,-48(r17)
71         sll     r2,r19,r22
72         ldq     r2,-56(r17)
73         srl     r3,r20,r5
74         bis     r7,r24,r7
75         sll     r3,r19,r23
76         bis     r8,r21,r8
77         srl     r4,r20,r6
78         ldq     r3,-64(r17)
79         sll     r4,r19,r24
80         ldq     r4,-72(r17)
81         subq    r18,4,r18
82         beq     r18,$Lend2
83         ALIGN(16)
84 C main loop
85 $Loop:  stq     r7,-8(r16)
86         bis     r5,r22,r5
87         stq     r8,-16(r16)
88         bis     r6,r23,r6
89
90         srl     r1,r20,r7
91         subq    r18,4,r18
92         sll     r1,r19,r21
93         unop    C ldq   r31,-96(r17)
94
95         srl     r2,r20,r8
96         ldq     r1,-80(r17)
97         sll     r2,r19,r22
98         ldq     r2,-88(r17)
99
100         stq     r5,-24(r16)
101         bis     r7,r24,r7
102         stq     r6,-32(r16)
103         bis     r8,r21,r8
104
105         srl     r3,r20,r5
106         unop    C ldq   r31,-96(r17)
107         sll     r3,r19,r23
108         subq    r16,32,r16
109
110         srl     r4,r20,r6
111         ldq     r3,-96(r17)
112         sll     r4,r19,r24
113         ldq     r4,-104(r17)
114
115         subq    r17,32,r17
116         bne     r18,$Loop
117 C cool down phase 2/1
118 $Lend2: stq     r7,-8(r16)
119         bis     r5,r22,r5
120         stq     r8,-16(r16)
121         bis     r6,r23,r6
122         srl     r1,r20,r7
123         sll     r1,r19,r21
124         srl     r2,r20,r8
125         sll     r2,r19,r22
126         stq     r5,-24(r16)
127         bis     r7,r24,r7
128         stq     r6,-32(r16)
129         bis     r8,r21,r8
130         srl     r3,r20,r5
131         sll     r3,r19,r23
132         srl     r4,r20,r6
133         sll     r4,r19,r24
134 C cool down phase 2/2
135         stq     r7,-40(r16)
136         bis     r5,r22,r5
137         stq     r8,-48(r16)
138         bis     r6,r23,r6
139         stq     r5,-56(r16)
140         stq     r6,-64(r16)
141 C cool down phase 2/3
142         stq     r24,-72(r16)
143         ret     r31,(r26),1
144
145 C cool down phase 1/1
146 $Lend1: srl     r1,r20,r7
147         sll     r1,r19,r21
148         srl     r2,r20,r8
149         sll     r2,r19,r22
150         srl     r3,r20,r5
151         bis     r7,r24,r7
152         sll     r3,r19,r23
153         bis     r8,r21,r8
154         srl     r4,r20,r6
155         sll     r4,r19,r24
156 C cool down phase 1/2
157         stq     r7,-8(r16)
158         bis     r5,r22,r5
159         stq     r8,-16(r16)
160         bis     r6,r23,r6
161         stq     r5,-24(r16)
162         stq     r6,-32(r16)
163         stq     r24,-40(r16)
164         ret     r31,(r26),1
165
166 $Lend:  stq     r24,-8(r16)
167         ret     r31,(r26),1
168 EPILOGUE(mpn_lshift)
169 ASM_END()