1dnl  PowerPC-32 mpn_rshift -- Shift a number right.
2
3dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                cycles/limb
34C 603e:            ?
35C 604e:            3.0
36C 75x (G3):        3.0
37C 7400,7410 (G4):  3.0
38C 7445,7455 (G4+): 2.5
39C 7447,7457 (G4+): 2.25
40C power4/ppc970:   2.5
41C power5:          2.5
42
43C INPUT PARAMETERS
44C rp	r3
45C up	r4
46C n	r5
47C cnt	r6
48
49ASM_START()
50PROLOGUE(mpn_rshift)
51	cmpwi	cr0, r5, 30	C more than 30 limbs?
52	addi	r7, r3, -4	C dst-4
53	bgt	L(BIG)		C branch if more than 12 limbs
54
55	mtctr	r5		C copy size into CTR
56	subfic	r8, r6, 32
57	lwz	r11, 0(r4)	C load first s1 limb
58	slw	r3, r11, r8	C compute function return value
59	bdz	L(end1)
60
61L(oop):	lwzu	r10, 4(r4)
62	srw	r9, r11, r6
63	slw	r12, r10, r8
64	or	r9, r9, r12
65	stwu	r9, 4(r7)
66	bdz	L(end2)
67	lwzu	r11, 4(r4)
68	srw	r9, r10, r6
69	slw	r12, r11, r8
70	or	r9, r9, r12
71	stwu	r9, 4(r7)
72	bdnz	L(oop)
73
74L(end1):
75	srw	r0, r11, r6
76	stw	r0, 4(r7)
77	blr
78L(end2):
79	srw	r0, r10, r6
80	stw	r0, 4(r7)
81	blr
82
83L(BIG):
84	stwu	r1, -48(r1)
85	stmw	r24, 8(r1)	C save registers we are supposed to preserve
86	lwz	r9, 0(r4)
87	subfic	r8, r6, 32
88	slw	r3, r9, r8	C compute function return value
89	srw	r0, r9, r6
90	addi	r5, r5, -1
91
92	andi.	r10, r5, 3	C count for spill loop
93	beq	L(e)
94	mtctr	r10
95	lwzu	r28, 4(r4)
96	bdz	L(xe0)
97
98L(loop0):
99	srw	r12, r28, r6
100	slw	r24, r28, r8
101	lwzu	r28, 4(r4)
102	or	r24, r0, r24
103	stwu	r24, 4(r7)
104	mr	r0, r12
105	bdnz	L(loop0)	C taken at most once!
106
107L(xe0):	srw	r12, r28, r6
108	slw	r24, r28, r8
109	or	r24, r0, r24
110	stwu	r24, 4(r7)
111	mr	r0, r12
112
113L(e):	srwi	r5, r5, 2	C count for unrolled loop
114	addi	r5, r5, -1
115	mtctr	r5
116	lwz	r28, 4(r4)
117	lwz	r29, 8(r4)
118	lwz	r30, 12(r4)
119	lwzu	r31, 16(r4)
120
121L(loopU):
122	srw	r9, r28, r6
123	slw	r24, r28, r8
124	lwz	r28, 4(r4)
125	srw	r10, r29, r6
126	slw	r25, r29, r8
127	lwz	r29, 8(r4)
128	srw	r11, r30, r6
129	slw	r26, r30, r8
130	lwz	r30, 12(r4)
131	srw	r12, r31, r6
132	slw	r27, r31, r8
133	lwzu	r31, 16(r4)
134	or	r24, r0, r24
135	stw	r24, 4(r7)
136	or	r25, r9, r25
137	stw	r25, 8(r7)
138	or	r26, r10, r26
139	stw	r26, 12(r7)
140	or	r27, r11, r27
141	stwu	r27, 16(r7)
142	mr	r0, r12
143	bdnz	L(loopU)
144
145	srw	r9, r28, r6
146	slw	r24, r28, r8
147	srw	r10, r29, r6
148	slw	r25, r29, r8
149	srw	r11, r30, r6
150	slw	r26, r30, r8
151	srw	r12, r31, r6
152	slw	r27, r31, r8
153	or	r24, r0, r24
154	stw	r24, 4(r7)
155	or	r25, r9, r25
156	stw	r25, 8(r7)
157	or	r26, r10, r26
158	stw	r26, 12(r7)
159	or	r27, r11, r27
160	stw	r27, 16(r7)
161
162	stw	r12, 20(r7)
163	lmw	r24, 8(r1)	C restore registers
164	addi	r1, r1, 48
165	blr
166EPILOGUE()
167