x86-mont.S revision 306195
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-mont.S 306195 2016-09-22 14:57:48Z jkim $ */
2/* Do not modify. This file is auto-generated from x86-mont.pl. */
3#ifdef PIC
4.file	"x86-mont.S"
5.text
6.globl	bn_mul_mont
7.type	bn_mul_mont,@function
8.align	16
9bn_mul_mont:
10.L_bn_mul_mont_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	xorl	%eax,%eax
16	movl	40(%esp),%edi
17	cmpl	$4,%edi
18	jl	.L000just_leave
19	leal	20(%esp),%esi
20	leal	24(%esp),%edx
21	addl	$2,%edi
22	negl	%edi
23	leal	-32(%esp,%edi,4),%ebp
24	negl	%edi
25	movl	%ebp,%eax
26	subl	%edx,%eax
27	andl	$2047,%eax
28	subl	%eax,%ebp
29	xorl	%ebp,%edx
30	andl	$2048,%edx
31	xorl	$2048,%edx
32	subl	%edx,%ebp
33	andl	$-64,%ebp
34	movl	%esp,%eax
35	subl	%ebp,%eax
36	andl	$-4096,%eax
37	movl	%esp,%edx
38	leal	(%ebp,%eax,1),%esp
39	movl	(%esp),%eax
40	cmpl	%ebp,%esp
41	ja	.L001page_walk
42	jmp	.L002page_walk_done
43.align	16
44.L001page_walk:
45	leal	-4096(%esp),%esp
46	movl	(%esp),%eax
47	cmpl	%ebp,%esp
48	ja	.L001page_walk
49.L002page_walk_done:
50	movl	(%esi),%eax
51	movl	4(%esi),%ebx
52	movl	8(%esi),%ecx
53	movl	12(%esi),%ebp
54	movl	16(%esi),%esi
55	movl	(%esi),%esi
56	movl	%eax,4(%esp)
57	movl	%ebx,8(%esp)
58	movl	%ecx,12(%esp)
59	movl	%ebp,16(%esp)
60	movl	%esi,20(%esp)
61	leal	-3(%edi),%ebx
62	movl	%edx,24(%esp)
63	call	.L003PIC_me_up
64.L003PIC_me_up:
65	popl	%eax
66	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
67	btl	$26,(%eax)
68	jnc	.L004non_sse2
69	movl	$-1,%eax
70	movd	%eax,%mm7
71	movl	8(%esp),%esi
72	movl	12(%esp),%edi
73	movl	16(%esp),%ebp
74	xorl	%edx,%edx
75	xorl	%ecx,%ecx
76	movd	(%edi),%mm4
77	movd	(%esi),%mm5
78	movd	(%ebp),%mm3
79	pmuludq	%mm4,%mm5
80	movq	%mm5,%mm2
81	movq	%mm5,%mm0
82	pand	%mm7,%mm0
83	pmuludq	20(%esp),%mm5
84	pmuludq	%mm5,%mm3
85	paddq	%mm0,%mm3
86	movd	4(%ebp),%mm1
87	movd	4(%esi),%mm0
88	psrlq	$32,%mm2
89	psrlq	$32,%mm3
90	incl	%ecx
91.align	16
92.L0051st:
93	pmuludq	%mm4,%mm0
94	pmuludq	%mm5,%mm1
95	paddq	%mm0,%mm2
96	paddq	%mm1,%mm3
97	movq	%mm2,%mm0
98	pand	%mm7,%mm0
99	movd	4(%ebp,%ecx,4),%mm1
100	paddq	%mm0,%mm3
101	movd	4(%esi,%ecx,4),%mm0
102	psrlq	$32,%mm2
103	movd	%mm3,28(%esp,%ecx,4)
104	psrlq	$32,%mm3
105	leal	1(%ecx),%ecx
106	cmpl	%ebx,%ecx
107	jl	.L0051st
108	pmuludq	%mm4,%mm0
109	pmuludq	%mm5,%mm1
110	paddq	%mm0,%mm2
111	paddq	%mm1,%mm3
112	movq	%mm2,%mm0
113	pand	%mm7,%mm0
114	paddq	%mm0,%mm3
115	movd	%mm3,28(%esp,%ecx,4)
116	psrlq	$32,%mm2
117	psrlq	$32,%mm3
118	paddq	%mm2,%mm3
119	movq	%mm3,32(%esp,%ebx,4)
120	incl	%edx
121.L006outer:
122	xorl	%ecx,%ecx
123	movd	(%edi,%edx,4),%mm4
124	movd	(%esi),%mm5
125	movd	32(%esp),%mm6
126	movd	(%ebp),%mm3
127	pmuludq	%mm4,%mm5
128	paddq	%mm6,%mm5
129	movq	%mm5,%mm0
130	movq	%mm5,%mm2
131	pand	%mm7,%mm0
132	pmuludq	20(%esp),%mm5
133	pmuludq	%mm5,%mm3
134	paddq	%mm0,%mm3
135	movd	36(%esp),%mm6
136	movd	4(%ebp),%mm1
137	movd	4(%esi),%mm0
138	psrlq	$32,%mm2
139	psrlq	$32,%mm3
140	paddq	%mm6,%mm2
141	incl	%ecx
142	decl	%ebx
143.L007inner:
144	pmuludq	%mm4,%mm0
145	pmuludq	%mm5,%mm1
146	paddq	%mm0,%mm2
147	paddq	%mm1,%mm3
148	movq	%mm2,%mm0
149	movd	36(%esp,%ecx,4),%mm6
150	pand	%mm7,%mm0
151	movd	4(%ebp,%ecx,4),%mm1
152	paddq	%mm0,%mm3
153	movd	4(%esi,%ecx,4),%mm0
154	psrlq	$32,%mm2
155	movd	%mm3,28(%esp,%ecx,4)
156	psrlq	$32,%mm3
157	paddq	%mm6,%mm2
158	decl	%ebx
159	leal	1(%ecx),%ecx
160	jnz	.L007inner
161	movl	%ecx,%ebx
162	pmuludq	%mm4,%mm0
163	pmuludq	%mm5,%mm1
164	paddq	%mm0,%mm2
165	paddq	%mm1,%mm3
166	movq	%mm2,%mm0
167	pand	%mm7,%mm0
168	paddq	%mm0,%mm3
169	movd	%mm3,28(%esp,%ecx,4)
170	psrlq	$32,%mm2
171	psrlq	$32,%mm3
172	movd	36(%esp,%ebx,4),%mm6
173	paddq	%mm2,%mm3
174	paddq	%mm6,%mm3
175	movq	%mm3,32(%esp,%ebx,4)
176	leal	1(%edx),%edx
177	cmpl	%ebx,%edx
178	jle	.L006outer
179	emms
180	jmp	.L008common_tail
181.align	16
182.L004non_sse2:
183	movl	8(%esp),%esi
184	leal	1(%ebx),%ebp
185	movl	12(%esp),%edi
186	xorl	%ecx,%ecx
187	movl	%esi,%edx
188	andl	$1,%ebp
189	subl	%edi,%edx
190	leal	4(%edi,%ebx,4),%eax
191	orl	%edx,%ebp
192	movl	(%edi),%edi
193	jz	.L009bn_sqr_mont
194	movl	%eax,28(%esp)
195	movl	(%esi),%eax
196	xorl	%edx,%edx
197.align	16
198.L010mull:
199	movl	%edx,%ebp
200	mull	%edi
201	addl	%eax,%ebp
202	leal	1(%ecx),%ecx
203	adcl	$0,%edx
204	movl	(%esi,%ecx,4),%eax
205	cmpl	%ebx,%ecx
206	movl	%ebp,28(%esp,%ecx,4)
207	jl	.L010mull
208	movl	%edx,%ebp
209	mull	%edi
210	movl	20(%esp),%edi
211	addl	%ebp,%eax
212	movl	16(%esp),%esi
213	adcl	$0,%edx
214	imull	32(%esp),%edi
215	movl	%eax,32(%esp,%ebx,4)
216	xorl	%ecx,%ecx
217	movl	%edx,36(%esp,%ebx,4)
218	movl	%ecx,40(%esp,%ebx,4)
219	movl	(%esi),%eax
220	mull	%edi
221	addl	32(%esp),%eax
222	movl	4(%esi),%eax
223	adcl	$0,%edx
224	incl	%ecx
225	jmp	.L0112ndmadd
226.align	16
227.L0121stmadd:
228	movl	%edx,%ebp
229	mull	%edi
230	addl	32(%esp,%ecx,4),%ebp
231	leal	1(%ecx),%ecx
232	adcl	$0,%edx
233	addl	%eax,%ebp
234	movl	(%esi,%ecx,4),%eax
235	adcl	$0,%edx
236	cmpl	%ebx,%ecx
237	movl	%ebp,28(%esp,%ecx,4)
238	jl	.L0121stmadd
239	movl	%edx,%ebp
240	mull	%edi
241	addl	32(%esp,%ebx,4),%eax
242	movl	20(%esp),%edi
243	adcl	$0,%edx
244	movl	16(%esp),%esi
245	addl	%eax,%ebp
246	adcl	$0,%edx
247	imull	32(%esp),%edi
248	xorl	%ecx,%ecx
249	addl	36(%esp,%ebx,4),%edx
250	movl	%ebp,32(%esp,%ebx,4)
251	adcl	$0,%ecx
252	movl	(%esi),%eax
253	movl	%edx,36(%esp,%ebx,4)
254	movl	%ecx,40(%esp,%ebx,4)
255	mull	%edi
256	addl	32(%esp),%eax
257	movl	4(%esi),%eax
258	adcl	$0,%edx
259	movl	$1,%ecx
260.align	16
261.L0112ndmadd:
262	movl	%edx,%ebp
263	mull	%edi
264	addl	32(%esp,%ecx,4),%ebp
265	leal	1(%ecx),%ecx
266	adcl	$0,%edx
267	addl	%eax,%ebp
268	movl	(%esi,%ecx,4),%eax
269	adcl	$0,%edx
270	cmpl	%ebx,%ecx
271	movl	%ebp,24(%esp,%ecx,4)
272	jl	.L0112ndmadd
273	movl	%edx,%ebp
274	mull	%edi
275	addl	32(%esp,%ebx,4),%ebp
276	adcl	$0,%edx
277	addl	%eax,%ebp
278	adcl	$0,%edx
279	movl	%ebp,28(%esp,%ebx,4)
280	xorl	%eax,%eax
281	movl	12(%esp),%ecx
282	addl	36(%esp,%ebx,4),%edx
283	adcl	40(%esp,%ebx,4),%eax
284	leal	4(%ecx),%ecx
285	movl	%edx,32(%esp,%ebx,4)
286	cmpl	28(%esp),%ecx
287	movl	%eax,36(%esp,%ebx,4)
288	je	.L008common_tail
289	movl	(%ecx),%edi
290	movl	8(%esp),%esi
291	movl	%ecx,12(%esp)
292	xorl	%ecx,%ecx
293	xorl	%edx,%edx
294	movl	(%esi),%eax
295	jmp	.L0121stmadd
296.align	16
297.L009bn_sqr_mont:
298	movl	%ebx,(%esp)
299	movl	%ecx,12(%esp)
300	movl	%edi,%eax
301	mull	%edi
302	movl	%eax,32(%esp)
303	movl	%edx,%ebx
304	shrl	$1,%edx
305	andl	$1,%ebx
306	incl	%ecx
307.align	16
308.L013sqr:
309	movl	(%esi,%ecx,4),%eax
310	movl	%edx,%ebp
311	mull	%edi
312	addl	%ebp,%eax
313	leal	1(%ecx),%ecx
314	adcl	$0,%edx
315	leal	(%ebx,%eax,2),%ebp
316	shrl	$31,%eax
317	cmpl	(%esp),%ecx
318	movl	%eax,%ebx
319	movl	%ebp,28(%esp,%ecx,4)
320	jl	.L013sqr
321	movl	(%esi,%ecx,4),%eax
322	movl	%edx,%ebp
323	mull	%edi
324	addl	%ebp,%eax
325	movl	20(%esp),%edi
326	adcl	$0,%edx
327	movl	16(%esp),%esi
328	leal	(%ebx,%eax,2),%ebp
329	imull	32(%esp),%edi
330	shrl	$31,%eax
331	movl	%ebp,32(%esp,%ecx,4)
332	leal	(%eax,%edx,2),%ebp
333	movl	(%esi),%eax
334	shrl	$31,%edx
335	movl	%ebp,36(%esp,%ecx,4)
336	movl	%edx,40(%esp,%ecx,4)
337	mull	%edi
338	addl	32(%esp),%eax
339	movl	%ecx,%ebx
340	adcl	$0,%edx
341	movl	4(%esi),%eax
342	movl	$1,%ecx
343.align	16
344.L0143rdmadd:
345	movl	%edx,%ebp
346	mull	%edi
347	addl	32(%esp,%ecx,4),%ebp
348	adcl	$0,%edx
349	addl	%eax,%ebp
350	movl	4(%esi,%ecx,4),%eax
351	adcl	$0,%edx
352	movl	%ebp,28(%esp,%ecx,4)
353	movl	%edx,%ebp
354	mull	%edi
355	addl	36(%esp,%ecx,4),%ebp
356	leal	2(%ecx),%ecx
357	adcl	$0,%edx
358	addl	%eax,%ebp
359	movl	(%esi,%ecx,4),%eax
360	adcl	$0,%edx
361	cmpl	%ebx,%ecx
362	movl	%ebp,24(%esp,%ecx,4)
363	jl	.L0143rdmadd
364	movl	%edx,%ebp
365	mull	%edi
366	addl	32(%esp,%ebx,4),%ebp
367	adcl	$0,%edx
368	addl	%eax,%ebp
369	adcl	$0,%edx
370	movl	%ebp,28(%esp,%ebx,4)
371	movl	12(%esp),%ecx
372	xorl	%eax,%eax
373	movl	8(%esp),%esi
374	addl	36(%esp,%ebx,4),%edx
375	adcl	40(%esp,%ebx,4),%eax
376	movl	%edx,32(%esp,%ebx,4)
377	cmpl	%ebx,%ecx
378	movl	%eax,36(%esp,%ebx,4)
379	je	.L008common_tail
380	movl	4(%esi,%ecx,4),%edi
381	leal	1(%ecx),%ecx
382	movl	%edi,%eax
383	movl	%ecx,12(%esp)
384	mull	%edi
385	addl	32(%esp,%ecx,4),%eax
386	adcl	$0,%edx
387	movl	%eax,32(%esp,%ecx,4)
388	xorl	%ebp,%ebp
389	cmpl	%ebx,%ecx
390	leal	1(%ecx),%ecx
391	je	.L015sqrlast
392	movl	%edx,%ebx
393	shrl	$1,%edx
394	andl	$1,%ebx
395.align	16
396.L016sqradd:
397	movl	(%esi,%ecx,4),%eax
398	movl	%edx,%ebp
399	mull	%edi
400	addl	%ebp,%eax
401	leal	(%eax,%eax,1),%ebp
402	adcl	$0,%edx
403	shrl	$31,%eax
404	addl	32(%esp,%ecx,4),%ebp
405	leal	1(%ecx),%ecx
406	adcl	$0,%eax
407	addl	%ebx,%ebp
408	adcl	$0,%eax
409	cmpl	(%esp),%ecx
410	movl	%ebp,28(%esp,%ecx,4)
411	movl	%eax,%ebx
412	jle	.L016sqradd
413	movl	%edx,%ebp
414	addl	%edx,%edx
415	shrl	$31,%ebp
416	addl	%ebx,%edx
417	adcl	$0,%ebp
418.L015sqrlast:
419	movl	20(%esp),%edi
420	movl	16(%esp),%esi
421	imull	32(%esp),%edi
422	addl	32(%esp,%ecx,4),%edx
423	movl	(%esi),%eax
424	adcl	$0,%ebp
425	movl	%edx,32(%esp,%ecx,4)
426	movl	%ebp,36(%esp,%ecx,4)
427	mull	%edi
428	addl	32(%esp),%eax
429	leal	-1(%ecx),%ebx
430	adcl	$0,%edx
431	movl	$1,%ecx
432	movl	4(%esi),%eax
433	jmp	.L0143rdmadd
434.align	16
435.L008common_tail:
436	movl	16(%esp),%ebp
437	movl	4(%esp),%edi
438	leal	32(%esp),%esi
439	movl	(%esi),%eax
440	movl	%ebx,%ecx
441	xorl	%edx,%edx
442.align	16
443.L017sub:
444	sbbl	(%ebp,%edx,4),%eax
445	movl	%eax,(%edi,%edx,4)
446	decl	%ecx
447	movl	4(%esi,%edx,4),%eax
448	leal	1(%edx),%edx
449	jge	.L017sub
450	sbbl	$0,%eax
451	andl	%eax,%esi
452	notl	%eax
453	movl	%edi,%ebp
454	andl	%eax,%ebp
455	orl	%ebp,%esi
456.align	16
457.L018copy:
458	movl	(%esi,%ebx,4),%eax
459	movl	%eax,(%edi,%ebx,4)
460	movl	%ecx,32(%esp,%ebx,4)
461	decl	%ebx
462	jge	.L018copy
463	movl	24(%esp),%esp
464	movl	$1,%eax
465.L000just_leave:
466	popl	%edi
467	popl	%esi
468	popl	%ebx
469	popl	%ebp
470	ret
471.size	bn_mul_mont,.-.L_bn_mul_mont_begin
472.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
473.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
474.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
475.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
476.byte	111,114,103,62,0
477.comm	OPENSSL_ia32cap_P,16,4
478#else
479.file	"x86-mont.S"
480.text
481.globl	bn_mul_mont
482.type	bn_mul_mont,@function
483.align	16
484bn_mul_mont:
485.L_bn_mul_mont_begin:
486	pushl	%ebp
487	pushl	%ebx
488	pushl	%esi
489	pushl	%edi
490	xorl	%eax,%eax
491	movl	40(%esp),%edi
492	cmpl	$4,%edi
493	jl	.L000just_leave
494	leal	20(%esp),%esi
495	leal	24(%esp),%edx
496	addl	$2,%edi
497	negl	%edi
498	leal	-32(%esp,%edi,4),%ebp
499	negl	%edi
500	movl	%ebp,%eax
501	subl	%edx,%eax
502	andl	$2047,%eax
503	subl	%eax,%ebp
504	xorl	%ebp,%edx
505	andl	$2048,%edx
506	xorl	$2048,%edx
507	subl	%edx,%ebp
508	andl	$-64,%ebp
509	movl	%esp,%eax
510	subl	%ebp,%eax
511	andl	$-4096,%eax
512	movl	%esp,%edx
513	leal	(%ebp,%eax,1),%esp
514	movl	(%esp),%eax
515	cmpl	%ebp,%esp
516	ja	.L001page_walk
517	jmp	.L002page_walk_done
518.align	16
519.L001page_walk:
520	leal	-4096(%esp),%esp
521	movl	(%esp),%eax
522	cmpl	%ebp,%esp
523	ja	.L001page_walk
524.L002page_walk_done:
525	movl	(%esi),%eax
526	movl	4(%esi),%ebx
527	movl	8(%esi),%ecx
528	movl	12(%esi),%ebp
529	movl	16(%esi),%esi
530	movl	(%esi),%esi
531	movl	%eax,4(%esp)
532	movl	%ebx,8(%esp)
533	movl	%ecx,12(%esp)
534	movl	%ebp,16(%esp)
535	movl	%esi,20(%esp)
536	leal	-3(%edi),%ebx
537	movl	%edx,24(%esp)
538	leal	OPENSSL_ia32cap_P,%eax
539	btl	$26,(%eax)
540	jnc	.L003non_sse2
541	movl	$-1,%eax
542	movd	%eax,%mm7
543	movl	8(%esp),%esi
544	movl	12(%esp),%edi
545	movl	16(%esp),%ebp
546	xorl	%edx,%edx
547	xorl	%ecx,%ecx
548	movd	(%edi),%mm4
549	movd	(%esi),%mm5
550	movd	(%ebp),%mm3
551	pmuludq	%mm4,%mm5
552	movq	%mm5,%mm2
553	movq	%mm5,%mm0
554	pand	%mm7,%mm0
555	pmuludq	20(%esp),%mm5
556	pmuludq	%mm5,%mm3
557	paddq	%mm0,%mm3
558	movd	4(%ebp),%mm1
559	movd	4(%esi),%mm0
560	psrlq	$32,%mm2
561	psrlq	$32,%mm3
562	incl	%ecx
563.align	16
564.L0041st:
565	pmuludq	%mm4,%mm0
566	pmuludq	%mm5,%mm1
567	paddq	%mm0,%mm2
568	paddq	%mm1,%mm3
569	movq	%mm2,%mm0
570	pand	%mm7,%mm0
571	movd	4(%ebp,%ecx,4),%mm1
572	paddq	%mm0,%mm3
573	movd	4(%esi,%ecx,4),%mm0
574	psrlq	$32,%mm2
575	movd	%mm3,28(%esp,%ecx,4)
576	psrlq	$32,%mm3
577	leal	1(%ecx),%ecx
578	cmpl	%ebx,%ecx
579	jl	.L0041st
580	pmuludq	%mm4,%mm0
581	pmuludq	%mm5,%mm1
582	paddq	%mm0,%mm2
583	paddq	%mm1,%mm3
584	movq	%mm2,%mm0
585	pand	%mm7,%mm0
586	paddq	%mm0,%mm3
587	movd	%mm3,28(%esp,%ecx,4)
588	psrlq	$32,%mm2
589	psrlq	$32,%mm3
590	paddq	%mm2,%mm3
591	movq	%mm3,32(%esp,%ebx,4)
592	incl	%edx
593.L005outer:
594	xorl	%ecx,%ecx
595	movd	(%edi,%edx,4),%mm4
596	movd	(%esi),%mm5
597	movd	32(%esp),%mm6
598	movd	(%ebp),%mm3
599	pmuludq	%mm4,%mm5
600	paddq	%mm6,%mm5
601	movq	%mm5,%mm0
602	movq	%mm5,%mm2
603	pand	%mm7,%mm0
604	pmuludq	20(%esp),%mm5
605	pmuludq	%mm5,%mm3
606	paddq	%mm0,%mm3
607	movd	36(%esp),%mm6
608	movd	4(%ebp),%mm1
609	movd	4(%esi),%mm0
610	psrlq	$32,%mm2
611	psrlq	$32,%mm3
612	paddq	%mm6,%mm2
613	incl	%ecx
614	decl	%ebx
615.L006inner:
616	pmuludq	%mm4,%mm0
617	pmuludq	%mm5,%mm1
618	paddq	%mm0,%mm2
619	paddq	%mm1,%mm3
620	movq	%mm2,%mm0
621	movd	36(%esp,%ecx,4),%mm6
622	pand	%mm7,%mm0
623	movd	4(%ebp,%ecx,4),%mm1
624	paddq	%mm0,%mm3
625	movd	4(%esi,%ecx,4),%mm0
626	psrlq	$32,%mm2
627	movd	%mm3,28(%esp,%ecx,4)
628	psrlq	$32,%mm3
629	paddq	%mm6,%mm2
630	decl	%ebx
631	leal	1(%ecx),%ecx
632	jnz	.L006inner
633	movl	%ecx,%ebx
634	pmuludq	%mm4,%mm0
635	pmuludq	%mm5,%mm1
636	paddq	%mm0,%mm2
637	paddq	%mm1,%mm3
638	movq	%mm2,%mm0
639	pand	%mm7,%mm0
640	paddq	%mm0,%mm3
641	movd	%mm3,28(%esp,%ecx,4)
642	psrlq	$32,%mm2
643	psrlq	$32,%mm3
644	movd	36(%esp,%ebx,4),%mm6
645	paddq	%mm2,%mm3
646	paddq	%mm6,%mm3
647	movq	%mm3,32(%esp,%ebx,4)
648	leal	1(%edx),%edx
649	cmpl	%ebx,%edx
650	jle	.L005outer
651	emms
652	jmp	.L007common_tail
653.align	16
654.L003non_sse2:
655	movl	8(%esp),%esi
656	leal	1(%ebx),%ebp
657	movl	12(%esp),%edi
658	xorl	%ecx,%ecx
659	movl	%esi,%edx
660	andl	$1,%ebp
661	subl	%edi,%edx
662	leal	4(%edi,%ebx,4),%eax
663	orl	%edx,%ebp
664	movl	(%edi),%edi
665	jz	.L008bn_sqr_mont
666	movl	%eax,28(%esp)
667	movl	(%esi),%eax
668	xorl	%edx,%edx
669.align	16
670.L009mull:
671	movl	%edx,%ebp
672	mull	%edi
673	addl	%eax,%ebp
674	leal	1(%ecx),%ecx
675	adcl	$0,%edx
676	movl	(%esi,%ecx,4),%eax
677	cmpl	%ebx,%ecx
678	movl	%ebp,28(%esp,%ecx,4)
679	jl	.L009mull
680	movl	%edx,%ebp
681	mull	%edi
682	movl	20(%esp),%edi
683	addl	%ebp,%eax
684	movl	16(%esp),%esi
685	adcl	$0,%edx
686	imull	32(%esp),%edi
687	movl	%eax,32(%esp,%ebx,4)
688	xorl	%ecx,%ecx
689	movl	%edx,36(%esp,%ebx,4)
690	movl	%ecx,40(%esp,%ebx,4)
691	movl	(%esi),%eax
692	mull	%edi
693	addl	32(%esp),%eax
694	movl	4(%esi),%eax
695	adcl	$0,%edx
696	incl	%ecx
697	jmp	.L0102ndmadd
698.align	16
699.L0111stmadd:
700	movl	%edx,%ebp
701	mull	%edi
702	addl	32(%esp,%ecx,4),%ebp
703	leal	1(%ecx),%ecx
704	adcl	$0,%edx
705	addl	%eax,%ebp
706	movl	(%esi,%ecx,4),%eax
707	adcl	$0,%edx
708	cmpl	%ebx,%ecx
709	movl	%ebp,28(%esp,%ecx,4)
710	jl	.L0111stmadd
711	movl	%edx,%ebp
712	mull	%edi
713	addl	32(%esp,%ebx,4),%eax
714	movl	20(%esp),%edi
715	adcl	$0,%edx
716	movl	16(%esp),%esi
717	addl	%eax,%ebp
718	adcl	$0,%edx
719	imull	32(%esp),%edi
720	xorl	%ecx,%ecx
721	addl	36(%esp,%ebx,4),%edx
722	movl	%ebp,32(%esp,%ebx,4)
723	adcl	$0,%ecx
724	movl	(%esi),%eax
725	movl	%edx,36(%esp,%ebx,4)
726	movl	%ecx,40(%esp,%ebx,4)
727	mull	%edi
728	addl	32(%esp),%eax
729	movl	4(%esi),%eax
730	adcl	$0,%edx
731	movl	$1,%ecx
732.align	16
733.L0102ndmadd:
734	movl	%edx,%ebp
735	mull	%edi
736	addl	32(%esp,%ecx,4),%ebp
737	leal	1(%ecx),%ecx
738	adcl	$0,%edx
739	addl	%eax,%ebp
740	movl	(%esi,%ecx,4),%eax
741	adcl	$0,%edx
742	cmpl	%ebx,%ecx
743	movl	%ebp,24(%esp,%ecx,4)
744	jl	.L0102ndmadd
745	movl	%edx,%ebp
746	mull	%edi
747	addl	32(%esp,%ebx,4),%ebp
748	adcl	$0,%edx
749	addl	%eax,%ebp
750	adcl	$0,%edx
751	movl	%ebp,28(%esp,%ebx,4)
752	xorl	%eax,%eax
753	movl	12(%esp),%ecx
754	addl	36(%esp,%ebx,4),%edx
755	adcl	40(%esp,%ebx,4),%eax
756	leal	4(%ecx),%ecx
757	movl	%edx,32(%esp,%ebx,4)
758	cmpl	28(%esp),%ecx
759	movl	%eax,36(%esp,%ebx,4)
760	je	.L007common_tail
761	movl	(%ecx),%edi
762	movl	8(%esp),%esi
763	movl	%ecx,12(%esp)
764	xorl	%ecx,%ecx
765	xorl	%edx,%edx
766	movl	(%esi),%eax
767	jmp	.L0111stmadd
768.align	16
769.L008bn_sqr_mont:
770	movl	%ebx,(%esp)
771	movl	%ecx,12(%esp)
772	movl	%edi,%eax
773	mull	%edi
774	movl	%eax,32(%esp)
775	movl	%edx,%ebx
776	shrl	$1,%edx
777	andl	$1,%ebx
778	incl	%ecx
779.align	16
780.L012sqr:
781	movl	(%esi,%ecx,4),%eax
782	movl	%edx,%ebp
783	mull	%edi
784	addl	%ebp,%eax
785	leal	1(%ecx),%ecx
786	adcl	$0,%edx
787	leal	(%ebx,%eax,2),%ebp
788	shrl	$31,%eax
789	cmpl	(%esp),%ecx
790	movl	%eax,%ebx
791	movl	%ebp,28(%esp,%ecx,4)
792	jl	.L012sqr
793	movl	(%esi,%ecx,4),%eax
794	movl	%edx,%ebp
795	mull	%edi
796	addl	%ebp,%eax
797	movl	20(%esp),%edi
798	adcl	$0,%edx
799	movl	16(%esp),%esi
800	leal	(%ebx,%eax,2),%ebp
801	imull	32(%esp),%edi
802	shrl	$31,%eax
803	movl	%ebp,32(%esp,%ecx,4)
804	leal	(%eax,%edx,2),%ebp
805	movl	(%esi),%eax
806	shrl	$31,%edx
807	movl	%ebp,36(%esp,%ecx,4)
808	movl	%edx,40(%esp,%ecx,4)
809	mull	%edi
810	addl	32(%esp),%eax
811	movl	%ecx,%ebx
812	adcl	$0,%edx
813	movl	4(%esi),%eax
814	movl	$1,%ecx
815.align	16
816.L0133rdmadd:
817	movl	%edx,%ebp
818	mull	%edi
819	addl	32(%esp,%ecx,4),%ebp
820	adcl	$0,%edx
821	addl	%eax,%ebp
822	movl	4(%esi,%ecx,4),%eax
823	adcl	$0,%edx
824	movl	%ebp,28(%esp,%ecx,4)
825	movl	%edx,%ebp
826	mull	%edi
827	addl	36(%esp,%ecx,4),%ebp
828	leal	2(%ecx),%ecx
829	adcl	$0,%edx
830	addl	%eax,%ebp
831	movl	(%esi,%ecx,4),%eax
832	adcl	$0,%edx
833	cmpl	%ebx,%ecx
834	movl	%ebp,24(%esp,%ecx,4)
835	jl	.L0133rdmadd
836	movl	%edx,%ebp
837	mull	%edi
838	addl	32(%esp,%ebx,4),%ebp
839	adcl	$0,%edx
840	addl	%eax,%ebp
841	adcl	$0,%edx
842	movl	%ebp,28(%esp,%ebx,4)
843	movl	12(%esp),%ecx
844	xorl	%eax,%eax
845	movl	8(%esp),%esi
846	addl	36(%esp,%ebx,4),%edx
847	adcl	40(%esp,%ebx,4),%eax
848	movl	%edx,32(%esp,%ebx,4)
849	cmpl	%ebx,%ecx
850	movl	%eax,36(%esp,%ebx,4)
851	je	.L007common_tail
852	movl	4(%esi,%ecx,4),%edi
853	leal	1(%ecx),%ecx
854	movl	%edi,%eax
855	movl	%ecx,12(%esp)
856	mull	%edi
857	addl	32(%esp,%ecx,4),%eax
858	adcl	$0,%edx
859	movl	%eax,32(%esp,%ecx,4)
860	xorl	%ebp,%ebp
861	cmpl	%ebx,%ecx
862	leal	1(%ecx),%ecx
863	je	.L014sqrlast
864	movl	%edx,%ebx
865	shrl	$1,%edx
866	andl	$1,%ebx
867.align	16
868.L015sqradd:
869	movl	(%esi,%ecx,4),%eax
870	movl	%edx,%ebp
871	mull	%edi
872	addl	%ebp,%eax
873	leal	(%eax,%eax,1),%ebp
874	adcl	$0,%edx
875	shrl	$31,%eax
876	addl	32(%esp,%ecx,4),%ebp
877	leal	1(%ecx),%ecx
878	adcl	$0,%eax
879	addl	%ebx,%ebp
880	adcl	$0,%eax
881	cmpl	(%esp),%ecx
882	movl	%ebp,28(%esp,%ecx,4)
883	movl	%eax,%ebx
884	jle	.L015sqradd
885	movl	%edx,%ebp
886	addl	%edx,%edx
887	shrl	$31,%ebp
888	addl	%ebx,%edx
889	adcl	$0,%ebp
890.L014sqrlast:
891	movl	20(%esp),%edi
892	movl	16(%esp),%esi
893	imull	32(%esp),%edi
894	addl	32(%esp,%ecx,4),%edx
895	movl	(%esi),%eax
896	adcl	$0,%ebp
897	movl	%edx,32(%esp,%ecx,4)
898	movl	%ebp,36(%esp,%ecx,4)
899	mull	%edi
900	addl	32(%esp),%eax
901	leal	-1(%ecx),%ebx
902	adcl	$0,%edx
903	movl	$1,%ecx
904	movl	4(%esi),%eax
905	jmp	.L0133rdmadd
906.align	16
907.L007common_tail:
908	movl	16(%esp),%ebp
909	movl	4(%esp),%edi
910	leal	32(%esp),%esi
911	movl	(%esi),%eax
912	movl	%ebx,%ecx
913	xorl	%edx,%edx
914.align	16
915.L016sub:
916	sbbl	(%ebp,%edx,4),%eax
917	movl	%eax,(%edi,%edx,4)
918	decl	%ecx
919	movl	4(%esi,%edx,4),%eax
920	leal	1(%edx),%edx
921	jge	.L016sub
922	sbbl	$0,%eax
923	andl	%eax,%esi
924	notl	%eax
925	movl	%edi,%ebp
926	andl	%eax,%ebp
927	orl	%ebp,%esi
928.align	16
929.L017copy:
930	movl	(%esi,%ebx,4),%eax
931	movl	%eax,(%edi,%ebx,4)
932	movl	%ecx,32(%esp,%ebx,4)
933	decl	%ebx
934	jge	.L017copy
935	movl	24(%esp),%esp
936	movl	$1,%eax
937.L000just_leave:
938	popl	%edi
939	popl	%esi
940	popl	%ebx
941	popl	%ebp
942	ret
943.size	bn_mul_mont,.-.L_bn_mul_mont_begin
944.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
945.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
946.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
947.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
948.byte	111,114,103,62,0
949.comm	OPENSSL_ia32cap_P,16,4
950#endif
951