1dnl  PowerPC 750 mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C       cycles/limb
24C 750:     3.0
25C 7400:    3.0
26
27
28C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
29C                       unsigned shift);
30C
31C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
32C smaller and saving about 30 or so cycles of overhead.
33
34ASM_START()
35PROLOGUE(mpn_rshift)
36
37	C r3	dst
38	C r4	src
39	C r5	size
40	C r6	shift
41
42	mtctr	r5		C size
43	lwz	r8, 0(r4)	C src[0]
44
45	subfic	r7, r6, 32	C 32-shift
46	addi	r5, r3, -4	C dst-4
47
48	slw	r3, r8, r7	C return value
49	bdz	L(one)
50
51	lwzu	r9, 4(r4)	C src[1]
52	srw	r8, r8, r6	C src[0] >> shift
53	bdz	L(two)
54
55
56L(top):
57	C r3	return value
58	C r4	src, incrementing
59	C r5	dst, incrementing
60	C r6	shift
61	C r7	32-shift
62	C r8	src[i-1] >> shift
63	C r9	src[i]
64	C r10
65
66	lwzu	r10, 4(r4)
67	slw	r11, r9, r7
68
69	or	r8, r8, r11
70	stwu	r8, 4(r5)
71
72	srw	r8, r9, r6
73	bdz	L(odd)
74
75	C r8	src[i-1] >> shift
76	C r9
77	C r10	src[i]
78
79	lwzu	r9, 4(r4)
80	slw	r11, r10, r7
81
82	or	r8, r8, r11
83	stwu	r8, 4(r5)
84
85	srw	r8, r10, r6
86	bdnz	L(top)
87
88
89L(two):
90	C r3	return value
91	C r4
92	C r5	&dst[size-2]
93	C r6	shift
94	C r7	32-shift
95	C r8	src[size-2] >> shift
96	C r9	src[size-1]
97	C r10
98
99	slw	r11, r9, r7
100	srw	r12, r9, r6	C src[size-1] >> shift
101
102	or	r8, r8, r11
103	stw	r12, 8(r5)	C dst[size-1]
104
105	stw	r8, 4(r5)	C dst[size-2]
106	blr
107
108
109L(odd):
110	C r3	return value
111	C r4
112	C r5	&dst[size-2]
113	C r6	shift
114	C r7	32-shift
115	C r8	src[size-2] >> shift
116	C r9
117	C r10	src[size-1]
118
119	slw	r11, r10, r7
120	srw	r12, r10, r6
121
122	or	r8, r8, r11
123	stw	r12, 8(r5)	C dst[size-1]
124
125	stw	r8, 4(r5)	C dst[size-2]
126	blr
127
128
129L(one):
130	C r3	return value
131	C r4
132	C r5	dst-4
133	C r6	shift
134	C r7
135	C r8	src[0]
136
137	srw	r8, r8, r6
138
139	stw	r8, 4(r5)	C dst[0]
140	blr
141
142EPILOGUE(mpn_rshift)
143