1dnl  PowerPC-32 mpn_rshift -- Shift a number right.
2
3dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
4dnl  Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C                cycles/limb
24C 603e:            ?
25C 604e:            3.0
26C 75x (G3):        3.0
27C 7400,7410 (G4):  3.0
28C 7445,7455 (G4+): 2.5
29C 7447,7457 (G4+): 2.25
30C power4/ppc970:   2.5
31C power5:          2.5
32
33C INPUT PARAMETERS
34C rp	r3
35C up	r4
36C n	r5
37C cnt	r6
38
39ASM_START()
40PROLOGUE(mpn_rshift)
41	cmpwi	cr0, r5, 12	C more than 12 limbs?
42	addi	r7, r3, -4	C dst-4
43	bgt	L(BIG)		C branch if more than 12 limbs
44
45	mtctr	r5		C copy size into CTR
46	subfic	r8, r6, 32
47	lwz	r11, 0(r4)	C load first s1 limb
48	slw	r3, r11, r8	C compute function return value
49	bdz	L(end1)
50
51L(oop):	lwzu	r10, 4(r4)
52	srw	r9, r11, r6
53	slw	r12, r10, r8
54	or	r9, r9, r12
55	stwu	r9, 4(r7)
56	bdz	L(end2)
57	lwzu	r11, 4(r4)
58	srw	r9, r10, r6
59	slw	r12, r11, r8
60	or	r9, r9, r12
61	stwu	r9, 4(r7)
62	bdnz	L(oop)
63
64L(end1):
65	srw	r0, r11, r6
66	stw	r0, 4(r7)
67	blr
68L(end2):
69	srw	r0, r10, r6
70	stw	r0, 4(r7)
71	blr
72
73L(BIG):
74	stmw	r24, -32(r1)	C save registers we are supposed to preserve
75	lwz	r9, 0(r4)
76	subfic	r8, r6, 32
77	slw	r3, r9, r8	C compute function return value
78	srw	r0, r9, r6
79	addi	r5, r5, -1
80
81	andi.	r10, r5, 3	C count for spill loop
82	beq	L(e)
83	mtctr	r10
84	lwzu	r28, 4(r4)
85	bdz	L(xe0)
86
87L(loop0):
88	srw	r12, r28, r6
89	slw	r24, r28, r8
90	lwzu	r28, 4(r4)
91	or	r24, r0, r24
92	stwu	r24, 4(r7)
93	mr	r0, r12
94	bdnz	L(loop0)	C taken at most once!
95
96L(xe0):	srw	r12, r28, r6
97	slw	r24, r28, r8
98	or	r24, r0, r24
99	stwu	r24, 4(r7)
100	mr	r0, r12
101
102L(e):	srwi	r5, r5, 2	C count for unrolled loop
103	addi	r5, r5, -1
104	mtctr	r5
105	lwz	r28, 4(r4)
106	lwz	r29, 8(r4)
107	lwz	r30, 12(r4)
108	lwzu	r31, 16(r4)
109
110L(loopU):
111	srw	r9, r28, r6
112	slw	r24, r28, r8
113	lwz	r28, 4(r4)
114	srw	r10, r29, r6
115	slw	r25, r29, r8
116	lwz	r29, 8(r4)
117	srw	r11, r30, r6
118	slw	r26, r30, r8
119	lwz	r30, 12(r4)
120	srw	r12, r31, r6
121	slw	r27, r31, r8
122	lwzu	r31, 16(r4)
123	or	r24, r0, r24
124	stw	r24, 4(r7)
125	or	r25, r9, r25
126	stw	r25, 8(r7)
127	or	r26, r10, r26
128	stw	r26, 12(r7)
129	or	r27, r11, r27
130	stwu	r27, 16(r7)
131	mr	r0, r12
132	bdnz	L(loopU)
133
134	srw	r9, r28, r6
135	slw	r24, r28, r8
136	srw	r10, r29, r6
137	slw	r25, r29, r8
138	srw	r11, r30, r6
139	slw	r26, r30, r8
140	srw	r12, r31, r6
141	slw	r27, r31, r8
142	or	r24, r0, r24
143	stw	r24, 4(r7)
144	or	r25, r9, r25
145	stw	r25, 8(r7)
146	or	r26, r10, r26
147	stw	r26, 12(r7)
148	or	r27, r11, r27
149	stw	r27, 16(r7)
150
151	stw	r12, 20(r7)
152	lmw	r24, -32(r1)	C restore registers
153	blr
154EPILOGUE(mpn_rshift)
155