1dnl  PowerPC-32 mpn_lshiftc.
2
3dnl  Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                cycles/limb
34C 603e:            ?
35C 604e:            3.0
36C 75x (G3):        3.0
37C 7400,7410 (G4):  3.0
38C 7445,7455 (G4+): 2.5
39C 7447,7457 (G4+): 2.25
40C power4/ppc970:   2.5
41C power5:          2.5
42
43C INPUT PARAMETERS
44C rp	r3
45C up	r4
46C n	r5
47C cnt	r6
48
49ASM_START()
50PROLOGUE(mpn_lshiftc)
51	cmpwi	cr0, r5, 30	C more than 30 limbs?
52	slwi	r0, r5, 2
53	add	r4, r4, r0	C make r4 point at end of s1
54	add	r7, r3, r0	C make r7 point at end of res
55	bgt	L(BIG)		C branch if more than 12 limbs
56
57	mtctr	r5		C copy size into CTR
58	subfic	r8, r6, 32
59	lwzu	r11, -4(r4)	C load first s1 limb
60	srw	r3, r11, r8	C compute function return value
61	bdz	L(end1)
62
63L(oop):	lwzu	r10, -4(r4)
64	slw	r9, r11, r6
65	srw	r12, r10, r8
66	nor	r9, r9, r12
67	stwu	r9, -4(r7)
68	bdz	L(end2)
69	lwzu	r11, -4(r4)
70	slw	r9, r10, r6
71	srw	r12, r11, r8
72	nor	r9, r9, r12
73	stwu	r9, -4(r7)
74	bdnz	L(oop)
75
76L(end1):
77	slw	r0, r11, r6
78	nor	r0, r0, r0
79	stw	r0, -4(r7)
80	blr
81L(end2):
82	slw	r0, r10, r6
83	nor	r0, r0, r0
84	stw	r0, -4(r7)
85	blr
86
87L(BIG):
88	stwu	r1, -48(r1)
89	stmw	r24, 8(r1)	C save registers we are supposed to preserve
90	lwzu	r9, -4(r4)
91	subfic	r8, r6, 32
92	srw	r3, r9, r8	C compute function return value
93	slw	r0, r9, r6
94	addi	r5, r5, -1
95
96	andi.	r10, r5, 3	C count for spill loop
97	beq	L(e)
98	mtctr	r10
99	lwzu	r28, -4(r4)
100	bdz	L(xe0)
101
102L(loop0):
103	slw	r12, r28, r6
104	srw	r24, r28, r8
105	lwzu	r28, -4(r4)
106	nor	r24, r0, r24
107	stwu	r24, -4(r7)
108	mr	r0, r12
109	bdnz	L(loop0)	C taken at most once!
110
111L(xe0):	slw	r12, r28, r6
112	srw	r24, r28, r8
113	nor	r24, r0, r24
114	stwu	r24, -4(r7)
115	mr	r0, r12
116
117L(e):	srwi	r5, r5, 2	C count for unrolled loop
118	addi	r5, r5, -1
119	mtctr	r5
120	lwz	r28, -4(r4)
121	lwz	r29, -8(r4)
122	lwz	r30, -12(r4)
123	lwzu	r31, -16(r4)
124
125L(loopU):
126	slw	r9, r28, r6
127	srw	r24, r28, r8
128	lwz	r28, -4(r4)
129	slw	r10, r29, r6
130	srw	r25, r29, r8
131	lwz	r29, -8(r4)
132	slw	r11, r30, r6
133	srw	r26, r30, r8
134	lwz	r30, -12(r4)
135	slw	r12, r31, r6
136	srw	r27, r31, r8
137	lwzu	r31, -16(r4)
138	nor	r24, r0, r24
139	stw	r24, -4(r7)
140	nor	r25, r9, r25
141	stw	r25, -8(r7)
142	nor	r26, r10, r26
143	stw	r26, -12(r7)
144	nor	r27, r11, r27
145	stwu	r27, -16(r7)
146	mr	r0, r12
147	bdnz	L(loopU)
148
149	slw	r9, r28, r6
150	srw	r24, r28, r8
151	slw	r10, r29, r6
152	srw	r25, r29, r8
153	slw	r11, r30, r6
154	srw	r26, r30, r8
155	slw	r12, r31, r6
156	srw	r27, r31, r8
157	nor	r24, r0, r24
158	stw	r24, -4(r7)
159	nor	r25, r9, r25
160	stw	r25, -8(r7)
161	nor	r26, r10, r26
162	stw	r26, -12(r7)
163	nor	r27, r11, r27
164	stw	r27, -16(r7)
165	nor	r12, r12, r12
166	stw	r12, -20(r7)
167	lmw	r24, 8(r1)	C restore registers
168	addi	r1, r1, 48
169	blr
170EPILOGUE()
171