x86-gf2m.s revision 256281
1178479Sjb	# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-gf2m.s 238405 2012-07-12 19:30:53Z jkim $
2178479Sjb.file	"x86-gf2m.s"
3178479Sjb.text
4178479Sjb.type	_mul_1x1_mmx,@function
5178479Sjb.align	16
6178479Sjb_mul_1x1_mmx:
7178479Sjb	subl	$36,%esp
8178479Sjb	movl	%eax,%ecx
9178479Sjb	leal	(%eax,%eax,1),%edx
10178479Sjb	andl	$1073741823,%ecx
11178479Sjb	leal	(%edx,%edx,1),%ebp
12178479Sjb	movl	$0,(%esp)
13178479Sjb	andl	$2147483647,%edx
14178479Sjb	movd	%eax,%mm2
15178479Sjb	movd	%ebx,%mm3
16178479Sjb	movl	%ecx,4(%esp)
17178479Sjb	xorl	%edx,%ecx
18178479Sjb	pxor	%mm5,%mm5
19178479Sjb	pxor	%mm4,%mm4
20178479Sjb	movl	%edx,8(%esp)
21178479Sjb	xorl	%ebp,%edx
22210767Srpaulo	movl	%ecx,12(%esp)
23178479Sjb	pcmpgtd	%mm2,%mm5
24178479Sjb	paddd	%mm2,%mm2
25178479Sjb	xorl	%edx,%ecx
26237624Spfg	movl	%ebp,16(%esp)
27237624Spfg	xorl	%edx,%ebp
28248708Spfg	pand	%mm3,%mm5
29237624Spfg	pcmpgtd	%mm2,%mm4
30237624Spfg	movl	%ecx,20(%esp)
31178479Sjb	xorl	%ecx,%ebp
32178479Sjb	psllq	$31,%mm5
33178479Sjb	pand	%mm3,%mm4
34178479Sjb	movl	%edx,24(%esp)
35178479Sjb	movl	$7,%esi
36178479Sjb	movl	%ebp,28(%esp)
37178479Sjb	movl	%esi,%ebp
38178576Sjb	andl	%ebx,%esi
39178479Sjb	shrl	$3,%ebx
40178576Sjb	movl	%ebp,%edi
41178479Sjb	psllq	$30,%mm4
42211554Srpaulo	andl	%ebx,%edi
43211554Srpaulo	shrl	$3,%ebx
44211554Srpaulo	movd	(%esp,%esi,4),%mm0
45178479Sjb	movl	%ebp,%esi
46178479Sjb	andl	%ebx,%esi
47178479Sjb	shrl	$3,%ebx
48178479Sjb	movd	(%esp,%edi,4),%mm2
49178479Sjb	movl	%ebp,%edi
50178479Sjb	psllq	$3,%mm2
51178479Sjb	andl	%ebx,%edi
52178479Sjb	shrl	$3,%ebx
53178479Sjb	pxor	%mm2,%mm0
54178479Sjb	movd	(%esp,%esi,4),%mm1
55178479Sjb	movl	%ebp,%esi
56178479Sjb	psllq	$6,%mm1
57178479Sjb	andl	%ebx,%esi
58178479Sjb	shrl	$3,%ebx
59178479Sjb	pxor	%mm1,%mm0
60178479Sjb	movd	(%esp,%edi,4),%mm2
61178479Sjb	movl	%ebp,%edi
62178479Sjb	psllq	$9,%mm2
63178479Sjb	andl	%ebx,%edi
64178479Sjb	shrl	$3,%ebx
65178479Sjb	pxor	%mm2,%mm0
66178479Sjb	movd	(%esp,%esi,4),%mm1
67178479Sjb	movl	%ebp,%esi
68178479Sjb	psllq	$12,%mm1
69178479Sjb	andl	%ebx,%esi
70178479Sjb	shrl	$3,%ebx
71178479Sjb	pxor	%mm1,%mm0
72178479Sjb	movd	(%esp,%edi,4),%mm2
73178479Sjb	movl	%ebp,%edi
74178479Sjb	psllq	$15,%mm2
75178479Sjb	andl	%ebx,%edi
76178479Sjb	shrl	$3,%ebx
77178479Sjb	pxor	%mm2,%mm0
78178479Sjb	movd	(%esp,%esi,4),%mm1
79178479Sjb	movl	%ebp,%esi
80178479Sjb	psllq	$18,%mm1
81178479Sjb	andl	%ebx,%esi
82178479Sjb	shrl	$3,%ebx
83178479Sjb	pxor	%mm1,%mm0
84178479Sjb	movd	(%esp,%edi,4),%mm2
85178479Sjb	movl	%ebp,%edi
86178479Sjb	psllq	$21,%mm2
87178479Sjb	andl	%ebx,%edi
88178479Sjb	shrl	$3,%ebx
89178479Sjb	pxor	%mm2,%mm0
90178479Sjb	movd	(%esp,%esi,4),%mm1
91178479Sjb	movl	%ebp,%esi
92178479Sjb	psllq	$24,%mm1
93178479Sjb	andl	%ebx,%esi
94178479Sjb	shrl	$3,%ebx
95178479Sjb	pxor	%mm1,%mm0
96178479Sjb	movd	(%esp,%edi,4),%mm2
97178479Sjb	pxor	%mm4,%mm0
98178479Sjb	psllq	$27,%mm2
99178479Sjb	pxor	%mm2,%mm0
100178479Sjb	movd	(%esp,%esi,4),%mm1
101178479Sjb	pxor	%mm5,%mm0
102178479Sjb	psllq	$30,%mm1
103178479Sjb	addl	$36,%esp
104178479Sjb	pxor	%mm1,%mm0
105178479Sjb	ret
106178479Sjb.size	_mul_1x1_mmx,.-_mul_1x1_mmx
107178479Sjb.type	_mul_1x1_ialu,@function
108178479Sjb.align	16
109178479Sjb_mul_1x1_ialu:
110178479Sjb	subl	$36,%esp
111178479Sjb	movl	%eax,%ecx
112178479Sjb	leal	(%eax,%eax,1),%edx
113178479Sjb	leal	(,%eax,4),%ebp
114178479Sjb	andl	$1073741823,%ecx
115178479Sjb	leal	(%eax,%eax,1),%edi
116178479Sjb	sarl	$31,%eax
117178479Sjb	movl	$0,(%esp)
118178479Sjb	andl	$2147483647,%edx
119178479Sjb	movl	%ecx,4(%esp)
120178479Sjb	xorl	%edx,%ecx
121178479Sjb	movl	%edx,8(%esp)
122178479Sjb	xorl	%ebp,%edx
123178479Sjb	movl	%ecx,12(%esp)
124178479Sjb	xorl	%edx,%ecx
125178479Sjb	movl	%ebp,16(%esp)
126178479Sjb	xorl	%edx,%ebp
127178479Sjb	movl	%ecx,20(%esp)
128178479Sjb	xorl	%ecx,%ebp
129178479Sjb	sarl	$31,%edi
130178479Sjb	andl	%ebx,%eax
131178479Sjb	movl	%edx,24(%esp)
132178479Sjb	andl	%ebx,%edi
133178479Sjb	movl	%ebp,28(%esp)
134178479Sjb	movl	%eax,%edx
135178479Sjb	shll	$31,%eax
136178479Sjb	movl	%edi,%ecx
137178479Sjb	shrl	$1,%edx
138178479Sjb	movl	$7,%esi
139178479Sjb	shll	$30,%edi
140178479Sjb	andl	%ebx,%esi
141178479Sjb	shrl	$2,%ecx
142178479Sjb	xorl	%edi,%eax
143178479Sjb	shrl	$3,%ebx
144178479Sjb	movl	$7,%edi
145178479Sjb	andl	%ebx,%edi
146178479Sjb	shrl	$3,%ebx
147178479Sjb	xorl	%ecx,%edx
148178479Sjb	xorl	(%esp,%esi,4),%eax
149178479Sjb	movl	$7,%esi
150178479Sjb	andl	%ebx,%esi
151178479Sjb	shrl	$3,%ebx
152178479Sjb	movl	(%esp,%edi,4),%ebp
153178479Sjb	movl	$7,%edi
154178479Sjb	movl	%ebp,%ecx
155178479Sjb	shll	$3,%ebp
156178479Sjb	andl	%ebx,%edi
157178479Sjb	shrl	$29,%ecx
158178479Sjb	xorl	%ebp,%eax
159178479Sjb	shrl	$3,%ebx
160178479Sjb	xorl	%ecx,%edx
161178479Sjb	movl	(%esp,%esi,4),%ecx
162178479Sjb	movl	$7,%esi
163178479Sjb	movl	%ecx,%ebp
164178479Sjb	shll	$6,%ecx
165178479Sjb	andl	%ebx,%esi
166178479Sjb	shrl	$26,%ebp
167178479Sjb	xorl	%ecx,%eax
168178479Sjb	shrl	$3,%ebx
169178479Sjb	xorl	%ebp,%edx
170178479Sjb	movl	(%esp,%edi,4),%ebp
171178479Sjb	movl	$7,%edi
172178479Sjb	movl	%ebp,%ecx
173178479Sjb	shll	$9,%ebp
174178479Sjb	andl	%ebx,%edi
175178479Sjb	shrl	$23,%ecx
176178479Sjb	xorl	%ebp,%eax
177178479Sjb	shrl	$3,%ebx
178178479Sjb	xorl	%ecx,%edx
179178479Sjb	movl	(%esp,%esi,4),%ecx
180178479Sjb	movl	$7,%esi
181178479Sjb	movl	%ecx,%ebp
182178479Sjb	shll	$12,%ecx
183178479Sjb	andl	%ebx,%esi
184178479Sjb	shrl	$20,%ebp
185178479Sjb	xorl	%ecx,%eax
186178479Sjb	shrl	$3,%ebx
187178479Sjb	xorl	%ebp,%edx
188178479Sjb	movl	(%esp,%edi,4),%ebp
189178479Sjb	movl	$7,%edi
190178479Sjb	movl	%ebp,%ecx
191178479Sjb	shll	$15,%ebp
192178479Sjb	andl	%ebx,%edi
193178479Sjb	shrl	$17,%ecx
194178479Sjb	xorl	%ebp,%eax
195178479Sjb	shrl	$3,%ebx
196178479Sjb	xorl	%ecx,%edx
197178479Sjb	movl	(%esp,%esi,4),%ecx
198178479Sjb	movl	$7,%esi
199178479Sjb	movl	%ecx,%ebp
200178479Sjb	shll	$18,%ecx
201178479Sjb	andl	%ebx,%esi
202178479Sjb	shrl	$14,%ebp
203178479Sjb	xorl	%ecx,%eax
204178479Sjb	shrl	$3,%ebx
205178479Sjb	xorl	%ebp,%edx
206178479Sjb	movl	(%esp,%edi,4),%ebp
207178479Sjb	movl	$7,%edi
208178479Sjb	movl	%ebp,%ecx
209178479Sjb	shll	$21,%ebp
210178479Sjb	andl	%ebx,%edi
211178479Sjb	shrl	$11,%ecx
212178479Sjb	xorl	%ebp,%eax
213178479Sjb	shrl	$3,%ebx
214178479Sjb	xorl	%ecx,%edx
215178479Sjb	movl	(%esp,%esi,4),%ecx
216178479Sjb	movl	$7,%esi
217178479Sjb	movl	%ecx,%ebp
218178479Sjb	shll	$24,%ecx
219178479Sjb	andl	%ebx,%esi
220178479Sjb	shrl	$8,%ebp
221178479Sjb	xorl	%ecx,%eax
222178479Sjb	shrl	$3,%ebx
223178479Sjb	xorl	%ebp,%edx
224178479Sjb	movl	(%esp,%edi,4),%ebp
225178479Sjb	movl	%ebp,%ecx
226178479Sjb	shll	$27,%ebp
227178479Sjb	movl	(%esp,%esi,4),%edi
228178479Sjb	shrl	$5,%ecx
229178479Sjb	movl	%edi,%esi
230178479Sjb	xorl	%ebp,%eax
231178479Sjb	shll	$30,%edi
232178479Sjb	xorl	%ecx,%edx
233178479Sjb	shrl	$2,%esi
234178479Sjb	xorl	%edi,%eax
235178479Sjb	xorl	%esi,%edx
236178479Sjb	addl	$36,%esp
237178479Sjb	ret
238178479Sjb.size	_mul_1x1_ialu,.-_mul_1x1_ialu
239178479Sjb.globl	bn_GF2m_mul_2x2
240178479Sjb.type	bn_GF2m_mul_2x2,@function
241178479Sjb.align	16
242178479Sjbbn_GF2m_mul_2x2:
243178479Sjb.L_bn_GF2m_mul_2x2_begin:
244178479Sjb	leal	OPENSSL_ia32cap_P,%edx
245178479Sjb	movl	(%edx),%eax
246178479Sjb	movl	4(%edx),%edx
247178479Sjb	testl	$8388608,%eax
248178479Sjb	jz	.L000ialu
249178479Sjb	testl	$16777216,%eax
250178479Sjb	jz	.L001mmx
251178479Sjb	testl	$2,%edx
252178479Sjb	jz	.L001mmx
253178479Sjb	movups	8(%esp),%xmm0
254178479Sjb	shufps	$177,%xmm0,%xmm0
255178479Sjb.byte	102,15,58,68,192,1
256178479Sjb	movl	4(%esp),%eax
257178479Sjb	movups	%xmm0,(%eax)
258178479Sjb	ret
259178479Sjb.align	16
260178479Sjb.L001mmx:
261178479Sjb	pushl	%ebp
262178479Sjb	pushl	%ebx
263178479Sjb	pushl	%esi
264178479Sjb	pushl	%edi
265178479Sjb	movl	24(%esp),%eax
266178479Sjb	movl	32(%esp),%ebx
267178479Sjb	call	_mul_1x1_mmx
268178479Sjb	movq	%mm0,%mm7
269178479Sjb	movl	28(%esp),%eax
270178479Sjb	movl	36(%esp),%ebx
271178479Sjb	call	_mul_1x1_mmx
272178479Sjb	movq	%mm0,%mm6
273178479Sjb	movl	24(%esp),%eax
274178479Sjb	movl	32(%esp),%ebx
275178479Sjb	xorl	28(%esp),%eax
276178479Sjb	xorl	36(%esp),%ebx
277178479Sjb	call	_mul_1x1_mmx
278178479Sjb	pxor	%mm7,%mm0
279178479Sjb	movl	20(%esp),%eax
280178479Sjb	pxor	%mm6,%mm0
281178479Sjb	movq	%mm0,%mm2
282178479Sjb	psllq	$32,%mm0
283178479Sjb	popl	%edi
284178479Sjb	psrlq	$32,%mm2
285178479Sjb	popl	%esi
286178479Sjb	pxor	%mm6,%mm0
287178479Sjb	popl	%ebx
288178479Sjb	pxor	%mm7,%mm2
289178479Sjb	movq	%mm0,(%eax)
290178479Sjb	popl	%ebp
291178479Sjb	movq	%mm2,8(%eax)
292178479Sjb	emms
293178479Sjb	ret
294178479Sjb.align	16
295178479Sjb.L000ialu:
296178479Sjb	pushl	%ebp
297178479Sjb	pushl	%ebx
298178479Sjb	pushl	%esi
299178479Sjb	pushl	%edi
300178479Sjb	subl	$20,%esp
301178479Sjb	movl	44(%esp),%eax
302178479Sjb	movl	52(%esp),%ebx
303178479Sjb	call	_mul_1x1_ialu
304178479Sjb	movl	%eax,8(%esp)
305178479Sjb	movl	%edx,12(%esp)
306178479Sjb	movl	48(%esp),%eax
307178479Sjb	movl	56(%esp),%ebx
308178479Sjb	call	_mul_1x1_ialu
309178479Sjb	movl	%eax,(%esp)
310178479Sjb	movl	%edx,4(%esp)
311178479Sjb	movl	44(%esp),%eax
312178479Sjb	movl	52(%esp),%ebx
313178479Sjb	xorl	48(%esp),%eax
314178479Sjb	xorl	56(%esp),%ebx
315178479Sjb	call	_mul_1x1_ialu
316178479Sjb	movl	40(%esp),%ebp
317178479Sjb	movl	(%esp),%ebx
318178479Sjb	movl	4(%esp),%ecx
319178479Sjb	movl	8(%esp),%edi
320178479Sjb	movl	12(%esp),%esi
321178479Sjb	xorl	%edx,%eax
322178479Sjb	xorl	%ecx,%edx
323178479Sjb	xorl	%ebx,%eax
324178479Sjb	movl	%ebx,(%ebp)
325178479Sjb	xorl	%edi,%edx
326178479Sjb	movl	%esi,12(%ebp)
327178479Sjb	xorl	%esi,%eax
328178479Sjb	addl	$20,%esp
329178479Sjb	xorl	%esi,%edx
330178479Sjb	popl	%edi
331178479Sjb	xorl	%edx,%eax
332178479Sjb	popl	%esi
333178479Sjb	movl	%edx,8(%ebp)
334178479Sjb	popl	%ebx
335178479Sjb	movl	%eax,4(%ebp)
336178479Sjb	popl	%ebp
337178479Sjb	ret
338178479Sjb.size	bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
339178479Sjb.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
340178479Sjb.byte	99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
341178479Sjb.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
342178479Sjb.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
343178479Sjb.byte	62,0
344178479Sjb.comm	OPENSSL_ia32cap_P,8,4
345178479Sjb