1/* i80586   rshift
2 *
3 *      Copyright (C) 1992, 1994, 1998,
4 *                    2001, 2002 Free Software Foundation, Inc.
5 *
6 * This file is part of Libgcrypt.
7 *
8 * Libgcrypt is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as
10 * published by the Free Software Foundation; either version 2.1 of
11 * the License, or (at your option) any later version.
12 *
13 * Libgcrypt is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
21 *
22 * Note: This code is heavily based on the GNU MP Library.
23 *	 Actually it's the same code with only minor changes in the
24 *	 way the data is stored; this is to support the abstraction
25 *	 of an optional secure memory allocation which may be used
26 *	 to avoid revealing of sensitive data due to paging etc.
27 */
28
29
30#include "sysdep.h"
31#include "asm-syntax.h"
32
33
34
35/*******************
36 * mpi_limb_t
37 * _gcry_mpih_rshift( mpi_ptr_t wp,	(sp + 4)
38 *		   mpi_ptr_t up,	(sp + 8)
39 *		   mpi_size_t usize,	(sp + 12)
40 *		   unsigned cnt)	(sp + 16)
41 */
42
43.text
44	ALIGN (3)
45	.globl C_SYMBOL_NAME(_gcry_mpih_rshift)
46C_SYMBOL_NAME(_gcry_mpih_rshift:)
47	pushl	%edi
48	pushl	%esi
49	pushl	%ebx
50	pushl	%ebp
51
52	movl	20(%esp),%edi		/* res_ptr */
53	movl	24(%esp),%esi		/* s_ptr */
54	movl	28(%esp),%ebp		/* size */
55	movl	32(%esp),%ecx		/* cnt */
56
57/* We can use faster code for shift-by-1 under certain conditions.  */
58	cmp	$1,%ecx
59	jne	Rnormal
60	leal	4(%edi),%eax
61	cmpl	%esi,%eax
62	jnc	Rspecial		/* jump if res_ptr + 1 >= s_ptr */
63	leal	(%edi,%ebp,4),%eax
64	cmpl	%eax,%esi
65	jnc	Rspecial		/* jump if s_ptr >= res_ptr + size */
66
67Rnormal:
68	movl	(%esi),%edx
69	addl	$4,%esi
70	xorl	%eax,%eax
71	shrdl	%cl,%edx,%eax		/* compute carry limb */
72	pushl	%eax			/* push carry limb onto stack */
73
74	decl	%ebp
75	pushl	%ebp
76	shrl	$3,%ebp
77	jz	Rend
78
79	movl	(%edi),%eax		/* fetch destination cache line */
80
81	ALIGN	(2)
82Roop:	movl	28(%edi),%eax		/* fetch destination cache line */
83	movl	%edx,%ebx
84
85	movl	(%esi),%eax
86	movl	4(%esi),%edx
87	shrdl	%cl,%eax,%ebx
88	shrdl	%cl,%edx,%eax
89	movl	%ebx,(%edi)
90	movl	%eax,4(%edi)
91
92	movl	8(%esi),%ebx
93	movl	12(%esi),%eax
94	shrdl	%cl,%ebx,%edx
95	shrdl	%cl,%eax,%ebx
96	movl	%edx,8(%edi)
97	movl	%ebx,12(%edi)
98
99	movl	16(%esi),%edx
100	movl	20(%esi),%ebx
101	shrdl	%cl,%edx,%eax
102	shrdl	%cl,%ebx,%edx
103	movl	%eax,16(%edi)
104	movl	%edx,20(%edi)
105
106	movl	24(%esi),%eax
107	movl	28(%esi),%edx
108	shrdl	%cl,%eax,%ebx
109	shrdl	%cl,%edx,%eax
110	movl	%ebx,24(%edi)
111	movl	%eax,28(%edi)
112
113	addl	$32,%esi
114	addl	$32,%edi
115	decl	%ebp
116	jnz	Roop
117
118Rend:	popl	%ebp
119	andl	$7,%ebp
120	jz	Rend2
121Roop2:	movl	(%esi),%eax
122	shrdl	%cl,%eax,%edx		/* compute result limb */
123	movl	%edx,(%edi)
124	movl	%eax,%edx
125	addl	$4,%esi
126	addl	$4,%edi
127	decl	%ebp
128	jnz	Roop2
129
130Rend2:	shrl	%cl,%edx		/* compute most significant limb */
131	movl	%edx,(%edi)		/* store it */
132
133	popl	%eax			/* pop carry limb */
134
135	popl	%ebp
136	popl	%ebx
137	popl	%esi
138	popl	%edi
139	ret
140
141/* We loop from least significant end of the arrays, which is only
142   permissable if the source and destination don't overlap, since the
143   function is documented to work for overlapping source and destination.
144*/
145
146Rspecial:
147	leal	-4(%edi,%ebp,4),%edi
148	leal	-4(%esi,%ebp,4),%esi
149
150	movl	(%esi),%edx
151	subl	$4,%esi
152
153	decl	%ebp
154	pushl	%ebp
155	shrl	$3,%ebp
156
157	shrl	$1,%edx
158	incl	%ebp
159	decl	%ebp
160	jz	RLend
161
162	movl	(%edi),%eax		/* fetch destination cache line */
163
164	ALIGN	(2)
165RLoop:	movl	-28(%edi),%eax		/* fetch destination cache line */
166	movl	%edx,%ebx
167
168	movl	(%esi),%eax
169	movl	-4(%esi),%edx
170	rcrl	$1,%eax
171	movl	%ebx,(%edi)
172	rcrl	$1,%edx
173	movl	%eax,-4(%edi)
174
175	movl	-8(%esi),%ebx
176	movl	-12(%esi),%eax
177	rcrl	$1,%ebx
178	movl	%edx,-8(%edi)
179	rcrl	$1,%eax
180	movl	%ebx,-12(%edi)
181
182	movl	-16(%esi),%edx
183	movl	-20(%esi),%ebx
184	rcrl	$1,%edx
185	movl	%eax,-16(%edi)
186	rcrl	$1,%ebx
187	movl	%edx,-20(%edi)
188
189	movl	-24(%esi),%eax
190	movl	-28(%esi),%edx
191	rcrl	$1,%eax
192	movl	%ebx,-24(%edi)
193	rcrl	$1,%edx
194	movl	%eax,-28(%edi)
195
196	leal	-32(%esi),%esi		/* use leal not to clobber carry */
197	leal	-32(%edi),%edi
198	decl	%ebp
199	jnz	RLoop
200
201RLend:	popl	%ebp
202	sbbl	%eax,%eax		/* save carry in %eax */
203	andl	$7,%ebp
204	jz	RLend2
205	addl	%eax,%eax		/* restore carry from eax */
206RLoop2: movl	%edx,%ebx
207	movl	(%esi),%edx
208	rcrl	$1,%edx
209	movl	%ebx,(%edi)
210
211	leal	-4(%esi),%esi		/* use leal not to clobber carry */
212	leal	-4(%edi),%edi
213	decl	%ebp
214	jnz	RLoop2
215
216	jmp	RL1
217RLend2: addl	%eax,%eax		/* restore carry from eax */
218RL1:	movl	%edx,(%edi)		/* store last limb */
219
220	movl	$0,%eax
221	rcrl	$1,%eax
222
223	popl	%ebp
224	popl	%ebx
225	popl	%esi
226	popl	%edi
227	ret
228
229