ced55b720344e77181e171576228c0cfff89da23
[ghc-hetmet.git] / ghc / rts / gmp / mpn / alpha / ev5 / lshift.s
1  # Alpha EV5 __mpn_lshift --
2
3  # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
4
5  # This file is part of the GNU MP Library.
6
7  # The GNU MP Library is free software; you can redistribute it and/or modify
8  # it under the terms of the GNU Library General Public License as published by
9  # the Free Software Foundation; either version 2 of the License, or (at your
10  # option) any later version.
11
12  # The GNU MP Library is distributed in the hope that it will be useful, but
13  # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
15  # License for more details.
16
17  # You should have received a copy of the GNU Library General Public License
18  # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19  # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20  # MA 02111-1307, USA.
21
22
23  # INPUT PARAMETERS
24  # res_ptr      r16
25  # s1_ptr       r17
26  # size         r18
27  # cnt          r19
28
29  # This code runs at 3.25 cycles/limb on the EV5.
30
31         .set    noreorder
32         .set    noat
33 .text
34         .align  3
35         .globl  __mpn_lshift
36         .ent    __mpn_lshift
37 __mpn_lshift:
38         .frame  $30,0,$26,0
39
40         s8addq  $18,$17,$17     # make r17 point at end of s1
41         ldq     $4,-8($17)      # load first limb
42         subq    $31,$19,$20
43         s8addq  $18,$16,$16     # make r16 point at end of RES
44         subq    $18,1,$18
45         and     $18,4-1,$28     # number of limbs in first loop
46         srl     $4,$20,$0       # compute function result
47
48         beq     $28,.L0
49         subq    $18,$28,$18
50
51         .align  3
52 .Loop0: ldq     $3,-16($17)
53         subq    $16,8,$16
54         sll     $4,$19,$5
55         subq    $17,8,$17
56         subq    $28,1,$28
57         srl     $3,$20,$6
58         or      $3,$3,$4
59         or      $5,$6,$8
60         stq     $8,0($16)
61         bne     $28,.Loop0
62
63 .L0:    sll     $4,$19,$24
64         beq     $18,.Lend
65  # warm up phase 1
66         ldq     $1,-16($17)
67         subq    $18,4,$18
68         ldq     $2,-24($17)
69         ldq     $3,-32($17)
70         ldq     $4,-40($17)
71         beq     $18,.Lend1
72  # warm up phase 2
73         srl     $1,$20,$7
74         sll     $1,$19,$21
75         srl     $2,$20,$8
76         ldq     $1,-48($17)
77         sll     $2,$19,$22
78         ldq     $2,-56($17)
79         srl     $3,$20,$5
80         or      $7,$24,$7
81         sll     $3,$19,$23
82         or      $8,$21,$8
83         srl     $4,$20,$6
84         ldq     $3,-64($17)
85         sll     $4,$19,$24
86         ldq     $4,-72($17)
87         subq    $18,4,$18
88         beq     $18,.Lend2
89         .align  4
90  # main loop
91 .Loop:  stq     $7,-8($16)
92         or      $5,$22,$5
93         stq     $8,-16($16)
94         or      $6,$23,$6
95
96         srl     $1,$20,$7
97         subq    $18,4,$18
98         sll     $1,$19,$21
99         unop    # ldq   $31,-96($17)
100
101         srl     $2,$20,$8
102         ldq     $1,-80($17)
103         sll     $2,$19,$22
104         ldq     $2,-88($17)
105
106         stq     $5,-24($16)
107         or      $7,$24,$7
108         stq     $6,-32($16)
109         or      $8,$21,$8
110
111         srl     $3,$20,$5
112         unop    # ldq   $31,-96($17)
113         sll     $3,$19,$23
114         subq    $16,32,$16
115
116         srl     $4,$20,$6
117         ldq     $3,-96($17)
118         sll     $4,$19,$24
119         ldq     $4,-104($17)
120
121         subq    $17,32,$17
122         bne     $18,.Loop
123  # cool down phase 2/1
124 .Lend2: stq     $7,-8($16)
125         or      $5,$22,$5
126         stq     $8,-16($16)
127         or      $6,$23,$6
128         srl     $1,$20,$7
129         sll     $1,$19,$21
130         srl     $2,$20,$8
131         sll     $2,$19,$22
132         stq     $5,-24($16)
133         or      $7,$24,$7
134         stq     $6,-32($16)
135         or      $8,$21,$8
136         srl     $3,$20,$5
137         sll     $3,$19,$23
138         srl     $4,$20,$6
139         sll     $4,$19,$24
140  # cool down phase 2/2
141         stq     $7,-40($16)
142         or      $5,$22,$5
143         stq     $8,-48($16)
144         or      $6,$23,$6
145         stq     $5,-56($16)
146         stq     $6,-64($16)
147  # cool down phase 2/3
148         stq     $24,-72($16)
149         ret     $31,($26),1
150
151  # cool down phase 1/1
152 .Lend1: srl     $1,$20,$7
153         sll     $1,$19,$21
154         srl     $2,$20,$8
155         sll     $2,$19,$22
156         srl     $3,$20,$5
157         or      $7,$24,$7
158         sll     $3,$19,$23
159         or      $8,$21,$8
160         srl     $4,$20,$6
161         sll     $4,$19,$24
162  # cool down phase 1/2
163         stq     $7,-8($16)
164         or      $5,$22,$5
165         stq     $8,-16($16)
166         or      $6,$23,$6
167         stq     $5,-24($16)
168         stq     $6,-32($16)
169         stq     $24,-40($16)
170         ret     $31,($26),1
171
172 .Lend:  stq     $24,-8($16)
173         ret     $31,($26),1
174         .end    __mpn_lshift