1/* Pentium optimized __mpn_lshift --
2   Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, write to the Free
17   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18   02111-1307 USA.  */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22#include "bp-sym.h"
23#include "bp-asm.h"
24
25#define PARMS	LINKAGE+16		/* space for 4 saved regs */
26#define RES	PARMS
27#define S	RES+PTR_SIZE
28#define SIZE	S+PTR_SIZE
29#define CNT	SIZE+4
30
31	.text
32ENTRY (BP_SYM (__mpn_lshift))
33	ENTER
34
35	pushl	%edi
36	pushl	%esi
37	pushl	%ebp
38	pushl	%ebx
39
40	movl	RES(%esp),%edi
41	movl	S(%esp),%esi
42	movl	SIZE(%esp),%ebx
43	movl	CNT(%esp),%ecx
44#if __BOUNDED_POINTERS__
45	shll	$2, %ebx		/* convert limbs to bytes */
46	CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx)
47	CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx)
48	shrl	$2, %ebx
49#endif
50
51/* We can use faster code for shift-by-1 under certain conditions.  */
52	cmp	$1,%ecx
53	jne	L(normal)
54	leal	4(%esi),%eax
55	cmpl	%edi,%eax
56	jnc	L(special)		/* jump if s_ptr + 1 >= res_ptr */
57	leal	(%esi,%ebx,4),%eax
58	cmpl	%eax,%edi
59	jnc	L(special)		/* jump if res_ptr >= s_ptr + size */
60
61L(normal):
62	leal	-4(%edi,%ebx,4),%edi
63	leal	-4(%esi,%ebx,4),%esi
64
65	movl	(%esi),%edx
66	subl	$4,%esi
67	xorl	%eax,%eax
68	shldl	%cl,%edx,%eax		/* compute carry limb */
69	pushl	%eax			/* push carry limb onto stack */
70
71	decl	%ebx
72	pushl	%ebx
73	shrl	$3,%ebx
74	jz	L(end)
75
76	movl	(%edi),%eax		/* fetch destination cache line */
77
78	ALIGN	(2)
79L(oop):	movl	-28(%edi),%eax		/* fetch destination cache line */
80	movl	%edx,%ebp
81
82	movl	(%esi),%eax
83	movl	-4(%esi),%edx
84	shldl	%cl,%eax,%ebp
85	shldl	%cl,%edx,%eax
86	movl	%ebp,(%edi)
87	movl	%eax,-4(%edi)
88
89	movl	-8(%esi),%ebp
90	movl	-12(%esi),%eax
91	shldl	%cl,%ebp,%edx
92	shldl	%cl,%eax,%ebp
93	movl	%edx,-8(%edi)
94	movl	%ebp,-12(%edi)
95
96	movl	-16(%esi),%edx
97	movl	-20(%esi),%ebp
98	shldl	%cl,%edx,%eax
99	shldl	%cl,%ebp,%edx
100	movl	%eax,-16(%edi)
101	movl	%edx,-20(%edi)
102
103	movl	-24(%esi),%eax
104	movl	-28(%esi),%edx
105	shldl	%cl,%eax,%ebp
106	shldl	%cl,%edx,%eax
107	movl	%ebp,-24(%edi)
108	movl	%eax,-28(%edi)
109
110	subl	$32,%esi
111	subl	$32,%edi
112	decl	%ebx
113	jnz	L(oop)
114
115L(end):	popl	%ebx
116	andl	$7,%ebx
117	jz	L(end2)
118L(oop2):
119	movl	(%esi),%eax
120	shldl	%cl,%eax,%edx
121	movl	%edx,(%edi)
122	movl	%eax,%edx
123	subl	$4,%esi
124	subl	$4,%edi
125	decl	%ebx
126	jnz	L(oop2)
127
128L(end2):
129	shll	%cl,%edx		/* compute least significant limb */
130	movl	%edx,(%edi)		/* store it */
131
132	popl	%eax			/* pop carry limb */
133
134	popl	%ebx
135	popl	%ebp
136	popl	%esi
137	popl	%edi
138
139	LEAVE
140	ret
141
142/* We loop from least significant end of the arrays, which is only
143   permissible if the source and destination don't overlap, since the
144   function is documented to work for overlapping source and destination.
145*/
146
147L(special):
148	movl	(%esi),%edx
149	addl	$4,%esi
150
151	decl	%ebx
152	pushl	%ebx
153	shrl	$3,%ebx
154
155	addl	%edx,%edx
156	incl	%ebx
157	decl	%ebx
158	jz	L(Lend)
159
160	movl	(%edi),%eax		/* fetch destination cache line */
161
162	ALIGN	(2)
163L(Loop):
164	movl	28(%edi),%eax		/* fetch destination cache line */
165	movl	%edx,%ebp
166
167	movl	(%esi),%eax
168	movl	4(%esi),%edx
169	adcl	%eax,%eax
170	movl	%ebp,(%edi)
171	adcl	%edx,%edx
172	movl	%eax,4(%edi)
173
174	movl	8(%esi),%ebp
175	movl	12(%esi),%eax
176	adcl	%ebp,%ebp
177	movl	%edx,8(%edi)
178	adcl	%eax,%eax
179	movl	%ebp,12(%edi)
180
181	movl	16(%esi),%edx
182	movl	20(%esi),%ebp
183	adcl	%edx,%edx
184	movl	%eax,16(%edi)
185	adcl	%ebp,%ebp
186	movl	%edx,20(%edi)
187
188	movl	24(%esi),%eax
189	movl	28(%esi),%edx
190	adcl	%eax,%eax
191	movl	%ebp,24(%edi)
192	adcl	%edx,%edx
193	movl	%eax,28(%edi)
194
195	leal	32(%esi),%esi		/* use leal not to clobber carry */
196	leal	32(%edi),%edi
197	decl	%ebx
198	jnz	L(Loop)
199
200L(Lend):
201	popl	%ebx
202	sbbl	%eax,%eax		/* save carry in %eax */
203	andl	$7,%ebx
204	jz	L(Lend2)
205	addl	%eax,%eax		/* restore carry from eax */
206L(Loop2):
207	movl	%edx,%ebp
208	movl	(%esi),%edx
209	adcl	%edx,%edx
210	movl	%ebp,(%edi)
211
212	leal	4(%esi),%esi		/* use leal not to clobber carry */
213	leal	4(%edi),%edi
214	decl	%ebx
215	jnz	L(Loop2)
216
217	jmp	L(L1)
218L(Lend2):
219	addl	%eax,%eax		/* restore carry from eax */
220L(L1):	movl	%edx,(%edi)		/* store last limb */
221
222	sbbl	%eax,%eax
223	negl	%eax
224
225	popl	%ebx
226	popl	%ebp
227	popl	%esi
228	popl	%edi
229
230	LEAVE
231	ret
232END (BP_SYM (__mpn_lshift))
233