1dnl  Alpha mpn_rshift -- Shift a number right.
2
3dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C      cycles/limb
23C EV4:     ?
24C EV5:     3.25
25C EV6:     1.75
26
27C  INPUT PARAMETERS
28C  rp	r16
29C  up	r17
30C  n	r18
31C  cnt	r19
32
33
34ASM_START()
35PROLOGUE(mpn_rshift)
36	ldq	r4,0(r17)	C load first limb
37	subq	r31,r19,r20
38	subq	r18,1,r18
39	and	r18,4-1,r28	C number of limbs in first loop
40	sll	r4,r20,r0	C compute function result
41
42	beq	r28,L(L0)
43	subq	r18,r28,r18
44
45	ALIGN(8)
46L(top0):
47	ldq	r3,8(r17)
48	addq	r16,8,r16
49	srl	r4,r19,r5
50	addq	r17,8,r17
51	subq	r28,1,r28
52	sll	r3,r20,r6
53	bis	r3,r3,r4
54	bis	r5,r6,r8
55	stq	r8,-8(r16)
56	bne	r28,L(top0)
57
58L(L0):	srl	r4,r19,r24
59	beq	r18,L(end)
60C warm up phase 1
61	ldq	r1,8(r17)
62	subq	r18,4,r18
63	ldq	r2,16(r17)
64	ldq	r3,24(r17)
65	ldq	r4,32(r17)
66C warm up phase 2
67	sll	r1,r20,r7
68	srl	r1,r19,r21
69	sll	r2,r20,r8
70	beq	r18,L(end1)
71	ldq	r1,40(r17)
72	srl	r2,r19,r22
73	ldq	r2,48(r17)
74	sll	r3,r20,r5
75	bis	r7,r24,r7
76	srl	r3,r19,r23
77	bis	r8,r21,r8
78	sll	r4,r20,r6
79	ldq	r3,56(r17)
80	srl	r4,r19,r24
81	ldq	r4,64(r17)
82	subq	r18,4,r18
83	beq	r18,L(end2)
84	ALIGN(16)
85C main loop
86L(top):	stq	r7,0(r16)
87	bis	r5,r22,r5
88	stq	r8,8(r16)
89	bis	r6,r23,r6
90
91	sll	r1,r20,r7
92	subq	r18,4,r18
93	srl	r1,r19,r21
94	unop	C ldq	r31,-96(r17)
95
96	sll	r2,r20,r8
97	ldq	r1,72(r17)
98	srl	r2,r19,r22
99	ldq	r2,80(r17)
100
101	stq	r5,16(r16)
102	bis	r7,r24,r7
103	stq	r6,24(r16)
104	bis	r8,r21,r8
105
106	sll	r3,r20,r5
107	unop	C ldq	r31,-96(r17)
108	srl	r3,r19,r23
109	addq	r16,32,r16
110
111	sll	r4,r20,r6
112	ldq	r3,88(r17)
113	srl	r4,r19,r24
114	ldq	r4,96(r17)
115
116	addq	r17,32,r17
117	bne	r18,L(top)
118C cool down phase 2/1
119L(end2):
120	stq	r7,0(r16)
121	bis	r5,r22,r5
122	stq	r8,8(r16)
123	bis	r6,r23,r6
124	sll	r1,r20,r7
125	srl	r1,r19,r21
126	sll	r2,r20,r8
127	srl	r2,r19,r22
128	stq	r5,16(r16)
129	bis	r7,r24,r7
130	stq	r6,24(r16)
131	bis	r8,r21,r8
132	sll	r3,r20,r5
133	srl	r3,r19,r23
134	sll	r4,r20,r6
135	srl	r4,r19,r24
136C cool down phase 2/2
137	stq	r7,32(r16)
138	bis	r5,r22,r5
139	stq	r8,40(r16)
140	bis	r6,r23,r6
141	stq	r5,48(r16)
142	stq	r6,56(r16)
143C cool down phase 2/3
144	stq	r24,64(r16)
145	ret	r31,(r26),1
146
147C cool down phase 1/1
148L(end1):
149	srl	r2,r19,r22
150	sll	r3,r20,r5
151	bis	r7,r24,r7
152	srl	r3,r19,r23
153	bis	r8,r21,r8
154	sll	r4,r20,r6
155	srl	r4,r19,r24
156C cool down phase 1/2
157	stq	r7,0(r16)
158	bis	r5,r22,r5
159	stq	r8,8(r16)
160	bis	r6,r23,r6
161	stq	r5,16(r16)
162	stq	r6,24(r16)
163	stq	r24,32(r16)
164	ret	r31,(r26),1
165
166L(end):	stq	r24,0(r16)
167	ret	r31,(r26),1
168EPILOGUE(mpn_rshift)
169ASM_END()
170