x86-mont.S revision 337982
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-mont.S 337982 2018-08-17 18:32:53Z jkim $ */
2/* Do not modify. This file is auto-generated from x86-mont.pl. */
3#ifdef PIC
4.file	"x86-mont.S"
5.text
6.globl	bn_mul_mont
7.type	bn_mul_mont,@function
8.align	16
9bn_mul_mont:
10.L_bn_mul_mont_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	xorl	%eax,%eax
16	movl	40(%esp),%edi
17	cmpl	$4,%edi
18	jl	.L000just_leave
19	leal	20(%esp),%esi
20	leal	24(%esp),%edx
21	addl	$2,%edi
22	negl	%edi
23	leal	-32(%esp,%edi,4),%ebp
24	negl	%edi
25	movl	%ebp,%eax
26	subl	%edx,%eax
27	andl	$2047,%eax
28	subl	%eax,%ebp
29	xorl	%ebp,%edx
30	andl	$2048,%edx
31	xorl	$2048,%edx
32	subl	%edx,%ebp
33	andl	$-64,%ebp
34	movl	%esp,%eax
35	subl	%ebp,%eax
36	andl	$-4096,%eax
37	movl	%esp,%edx
38	leal	(%ebp,%eax,1),%esp
39	movl	(%esp),%eax
40	cmpl	%ebp,%esp
41	ja	.L001page_walk
42	jmp	.L002page_walk_done
43.align	16
44.L001page_walk:
45	leal	-4096(%esp),%esp
46	movl	(%esp),%eax
47	cmpl	%ebp,%esp
48	ja	.L001page_walk
49.L002page_walk_done:
50	movl	(%esi),%eax
51	movl	4(%esi),%ebx
52	movl	8(%esi),%ecx
53	movl	12(%esi),%ebp
54	movl	16(%esi),%esi
55	movl	(%esi),%esi
56	movl	%eax,4(%esp)
57	movl	%ebx,8(%esp)
58	movl	%ecx,12(%esp)
59	movl	%ebp,16(%esp)
60	movl	%esi,20(%esp)
61	leal	-3(%edi),%ebx
62	movl	%edx,24(%esp)
63	call	.L003PIC_me_up
64.L003PIC_me_up:
65	popl	%eax
66	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
67	btl	$26,(%eax)
68	jnc	.L004non_sse2
69	movl	$-1,%eax
70	movd	%eax,%mm7
71	movl	8(%esp),%esi
72	movl	12(%esp),%edi
73	movl	16(%esp),%ebp
74	xorl	%edx,%edx
75	xorl	%ecx,%ecx
76	movd	(%edi),%mm4
77	movd	(%esi),%mm5
78	movd	(%ebp),%mm3
79	pmuludq	%mm4,%mm5
80	movq	%mm5,%mm2
81	movq	%mm5,%mm0
82	pand	%mm7,%mm0
83	pmuludq	20(%esp),%mm5
84	pmuludq	%mm5,%mm3
85	paddq	%mm0,%mm3
86	movd	4(%ebp),%mm1
87	movd	4(%esi),%mm0
88	psrlq	$32,%mm2
89	psrlq	$32,%mm3
90	incl	%ecx
91.align	16
92.L0051st:
93	pmuludq	%mm4,%mm0
94	pmuludq	%mm5,%mm1
95	paddq	%mm0,%mm2
96	paddq	%mm1,%mm3
97	movq	%mm2,%mm0
98	pand	%mm7,%mm0
99	movd	4(%ebp,%ecx,4),%mm1
100	paddq	%mm0,%mm3
101	movd	4(%esi,%ecx,4),%mm0
102	psrlq	$32,%mm2
103	movd	%mm3,28(%esp,%ecx,4)
104	psrlq	$32,%mm3
105	leal	1(%ecx),%ecx
106	cmpl	%ebx,%ecx
107	jl	.L0051st
108	pmuludq	%mm4,%mm0
109	pmuludq	%mm5,%mm1
110	paddq	%mm0,%mm2
111	paddq	%mm1,%mm3
112	movq	%mm2,%mm0
113	pand	%mm7,%mm0
114	paddq	%mm0,%mm3
115	movd	%mm3,28(%esp,%ecx,4)
116	psrlq	$32,%mm2
117	psrlq	$32,%mm3
118	paddq	%mm2,%mm3
119	movq	%mm3,32(%esp,%ebx,4)
120	incl	%edx
121.L006outer:
122	xorl	%ecx,%ecx
123	movd	(%edi,%edx,4),%mm4
124	movd	(%esi),%mm5
125	movd	32(%esp),%mm6
126	movd	(%ebp),%mm3
127	pmuludq	%mm4,%mm5
128	paddq	%mm6,%mm5
129	movq	%mm5,%mm0
130	movq	%mm5,%mm2
131	pand	%mm7,%mm0
132	pmuludq	20(%esp),%mm5
133	pmuludq	%mm5,%mm3
134	paddq	%mm0,%mm3
135	movd	36(%esp),%mm6
136	movd	4(%ebp),%mm1
137	movd	4(%esi),%mm0
138	psrlq	$32,%mm2
139	psrlq	$32,%mm3
140	paddq	%mm6,%mm2
141	incl	%ecx
142	decl	%ebx
143.L007inner:
144	pmuludq	%mm4,%mm0
145	pmuludq	%mm5,%mm1
146	paddq	%mm0,%mm2
147	paddq	%mm1,%mm3
148	movq	%mm2,%mm0
149	movd	36(%esp,%ecx,4),%mm6
150	pand	%mm7,%mm0
151	movd	4(%ebp,%ecx,4),%mm1
152	paddq	%mm0,%mm3
153	movd	4(%esi,%ecx,4),%mm0
154	psrlq	$32,%mm2
155	movd	%mm3,28(%esp,%ecx,4)
156	psrlq	$32,%mm3
157	paddq	%mm6,%mm2
158	decl	%ebx
159	leal	1(%ecx),%ecx
160	jnz	.L007inner
161	movl	%ecx,%ebx
162	pmuludq	%mm4,%mm0
163	pmuludq	%mm5,%mm1
164	paddq	%mm0,%mm2
165	paddq	%mm1,%mm3
166	movq	%mm2,%mm0
167	pand	%mm7,%mm0
168	paddq	%mm0,%mm3
169	movd	%mm3,28(%esp,%ecx,4)
170	psrlq	$32,%mm2
171	psrlq	$32,%mm3
172	movd	36(%esp,%ebx,4),%mm6
173	paddq	%mm2,%mm3
174	paddq	%mm6,%mm3
175	movq	%mm3,32(%esp,%ebx,4)
176	leal	1(%edx),%edx
177	cmpl	%ebx,%edx
178	jle	.L006outer
179	emms
180	jmp	.L008common_tail
181.align	16
182.L004non_sse2:
183	movl	8(%esp),%esi
184	leal	1(%ebx),%ebp
185	movl	12(%esp),%edi
186	xorl	%ecx,%ecx
187	movl	%esi,%edx
188	andl	$1,%ebp
189	subl	%edi,%edx
190	leal	4(%edi,%ebx,4),%eax
191	orl	%edx,%ebp
192	movl	(%edi),%edi
193	jz	.L009bn_sqr_mont
194	movl	%eax,28(%esp)
195	movl	(%esi),%eax
196	xorl	%edx,%edx
197.align	16
198.L010mull:
199	movl	%edx,%ebp
200	mull	%edi
201	addl	%eax,%ebp
202	leal	1(%ecx),%ecx
203	adcl	$0,%edx
204	movl	(%esi,%ecx,4),%eax
205	cmpl	%ebx,%ecx
206	movl	%ebp,28(%esp,%ecx,4)
207	jl	.L010mull
208	movl	%edx,%ebp
209	mull	%edi
210	movl	20(%esp),%edi
211	addl	%ebp,%eax
212	movl	16(%esp),%esi
213	adcl	$0,%edx
214	imull	32(%esp),%edi
215	movl	%eax,32(%esp,%ebx,4)
216	xorl	%ecx,%ecx
217	movl	%edx,36(%esp,%ebx,4)
218	movl	%ecx,40(%esp,%ebx,4)
219	movl	(%esi),%eax
220	mull	%edi
221	addl	32(%esp),%eax
222	movl	4(%esi),%eax
223	adcl	$0,%edx
224	incl	%ecx
225	jmp	.L0112ndmadd
226.align	16
227.L0121stmadd:
228	movl	%edx,%ebp
229	mull	%edi
230	addl	32(%esp,%ecx,4),%ebp
231	leal	1(%ecx),%ecx
232	adcl	$0,%edx
233	addl	%eax,%ebp
234	movl	(%esi,%ecx,4),%eax
235	adcl	$0,%edx
236	cmpl	%ebx,%ecx
237	movl	%ebp,28(%esp,%ecx,4)
238	jl	.L0121stmadd
239	movl	%edx,%ebp
240	mull	%edi
241	addl	32(%esp,%ebx,4),%eax
242	movl	20(%esp),%edi
243	adcl	$0,%edx
244	movl	16(%esp),%esi
245	addl	%eax,%ebp
246	adcl	$0,%edx
247	imull	32(%esp),%edi
248	xorl	%ecx,%ecx
249	addl	36(%esp,%ebx,4),%edx
250	movl	%ebp,32(%esp,%ebx,4)
251	adcl	$0,%ecx
252	movl	(%esi),%eax
253	movl	%edx,36(%esp,%ebx,4)
254	movl	%ecx,40(%esp,%ebx,4)
255	mull	%edi
256	addl	32(%esp),%eax
257	movl	4(%esi),%eax
258	adcl	$0,%edx
259	movl	$1,%ecx
260.align	16
261.L0112ndmadd:
262	movl	%edx,%ebp
263	mull	%edi
264	addl	32(%esp,%ecx,4),%ebp
265	leal	1(%ecx),%ecx
266	adcl	$0,%edx
267	addl	%eax,%ebp
268	movl	(%esi,%ecx,4),%eax
269	adcl	$0,%edx
270	cmpl	%ebx,%ecx
271	movl	%ebp,24(%esp,%ecx,4)
272	jl	.L0112ndmadd
273	movl	%edx,%ebp
274	mull	%edi
275	addl	32(%esp,%ebx,4),%ebp
276	adcl	$0,%edx
277	addl	%eax,%ebp
278	adcl	$0,%edx
279	movl	%ebp,28(%esp,%ebx,4)
280	xorl	%eax,%eax
281	movl	12(%esp),%ecx
282	addl	36(%esp,%ebx,4),%edx
283	adcl	40(%esp,%ebx,4),%eax
284	leal	4(%ecx),%ecx
285	movl	%edx,32(%esp,%ebx,4)
286	cmpl	28(%esp),%ecx
287	movl	%eax,36(%esp,%ebx,4)
288	je	.L008common_tail
289	movl	(%ecx),%edi
290	movl	8(%esp),%esi
291	movl	%ecx,12(%esp)
292	xorl	%ecx,%ecx
293	xorl	%edx,%edx
294	movl	(%esi),%eax
295	jmp	.L0121stmadd
296.align	16
297.L009bn_sqr_mont:
298	movl	%ebx,(%esp)
299	movl	%ecx,12(%esp)
300	movl	%edi,%eax
301	mull	%edi
302	movl	%eax,32(%esp)
303	movl	%edx,%ebx
304	shrl	$1,%edx
305	andl	$1,%ebx
306	incl	%ecx
307.align	16
308.L013sqr:
309	movl	(%esi,%ecx,4),%eax
310	movl	%edx,%ebp
311	mull	%edi
312	addl	%ebp,%eax
313	leal	1(%ecx),%ecx
314	adcl	$0,%edx
315	leal	(%ebx,%eax,2),%ebp
316	shrl	$31,%eax
317	cmpl	(%esp),%ecx
318	movl	%eax,%ebx
319	movl	%ebp,28(%esp,%ecx,4)
320	jl	.L013sqr
321	movl	(%esi,%ecx,4),%eax
322	movl	%edx,%ebp
323	mull	%edi
324	addl	%ebp,%eax
325	movl	20(%esp),%edi
326	adcl	$0,%edx
327	movl	16(%esp),%esi
328	leal	(%ebx,%eax,2),%ebp
329	imull	32(%esp),%edi
330	shrl	$31,%eax
331	movl	%ebp,32(%esp,%ecx,4)
332	leal	(%eax,%edx,2),%ebp
333	movl	(%esi),%eax
334	shrl	$31,%edx
335	movl	%ebp,36(%esp,%ecx,4)
336	movl	%edx,40(%esp,%ecx,4)
337	mull	%edi
338	addl	32(%esp),%eax
339	movl	%ecx,%ebx
340	adcl	$0,%edx
341	movl	4(%esi),%eax
342	movl	$1,%ecx
343.align	16
344.L0143rdmadd:
345	movl	%edx,%ebp
346	mull	%edi
347	addl	32(%esp,%ecx,4),%ebp
348	adcl	$0,%edx
349	addl	%eax,%ebp
350	movl	4(%esi,%ecx,4),%eax
351	adcl	$0,%edx
352	movl	%ebp,28(%esp,%ecx,4)
353	movl	%edx,%ebp
354	mull	%edi
355	addl	36(%esp,%ecx,4),%ebp
356	leal	2(%ecx),%ecx
357	adcl	$0,%edx
358	addl	%eax,%ebp
359	movl	(%esi,%ecx,4),%eax
360	adcl	$0,%edx
361	cmpl	%ebx,%ecx
362	movl	%ebp,24(%esp,%ecx,4)
363	jl	.L0143rdmadd
364	movl	%edx,%ebp
365	mull	%edi
366	addl	32(%esp,%ebx,4),%ebp
367	adcl	$0,%edx
368	addl	%eax,%ebp
369	adcl	$0,%edx
370	movl	%ebp,28(%esp,%ebx,4)
371	movl	12(%esp),%ecx
372	xorl	%eax,%eax
373	movl	8(%esp),%esi
374	addl	36(%esp,%ebx,4),%edx
375	adcl	40(%esp,%ebx,4),%eax
376	movl	%edx,32(%esp,%ebx,4)
377	cmpl	%ebx,%ecx
378	movl	%eax,36(%esp,%ebx,4)
379	je	.L008common_tail
380	movl	4(%esi,%ecx,4),%edi
381	leal	1(%ecx),%ecx
382	movl	%edi,%eax
383	movl	%ecx,12(%esp)
384	mull	%edi
385	addl	32(%esp,%ecx,4),%eax
386	adcl	$0,%edx
387	movl	%eax,32(%esp,%ecx,4)
388	xorl	%ebp,%ebp
389	cmpl	%ebx,%ecx
390	leal	1(%ecx),%ecx
391	je	.L015sqrlast
392	movl	%edx,%ebx
393	shrl	$1,%edx
394	andl	$1,%ebx
395.align	16
396.L016sqradd:
397	movl	(%esi,%ecx,4),%eax
398	movl	%edx,%ebp
399	mull	%edi
400	addl	%ebp,%eax
401	leal	(%eax,%eax,1),%ebp
402	adcl	$0,%edx
403	shrl	$31,%eax
404	addl	32(%esp,%ecx,4),%ebp
405	leal	1(%ecx),%ecx
406	adcl	$0,%eax
407	addl	%ebx,%ebp
408	adcl	$0,%eax
409	cmpl	(%esp),%ecx
410	movl	%ebp,28(%esp,%ecx,4)
411	movl	%eax,%ebx
412	jle	.L016sqradd
413	movl	%edx,%ebp
414	addl	%edx,%edx
415	shrl	$31,%ebp
416	addl	%ebx,%edx
417	adcl	$0,%ebp
418.L015sqrlast:
419	movl	20(%esp),%edi
420	movl	16(%esp),%esi
421	imull	32(%esp),%edi
422	addl	32(%esp,%ecx,4),%edx
423	movl	(%esi),%eax
424	adcl	$0,%ebp
425	movl	%edx,32(%esp,%ecx,4)
426	movl	%ebp,36(%esp,%ecx,4)
427	mull	%edi
428	addl	32(%esp),%eax
429	leal	-1(%ecx),%ebx
430	adcl	$0,%edx
431	movl	$1,%ecx
432	movl	4(%esi),%eax
433	jmp	.L0143rdmadd
434.align	16
435.L008common_tail:
436	movl	16(%esp),%ebp
437	movl	4(%esp),%edi
438	leal	32(%esp),%esi
439	movl	(%esi),%eax
440	movl	%ebx,%ecx
441	xorl	%edx,%edx
442.align	16
443.L017sub:
444	sbbl	(%ebp,%edx,4),%eax
445	movl	%eax,(%edi,%edx,4)
446	decl	%ecx
447	movl	4(%esi,%edx,4),%eax
448	leal	1(%edx),%edx
449	jge	.L017sub
450	sbbl	$0,%eax
451	movl	$-1,%edx
452	xorl	%eax,%edx
453	jmp	.L018copy
454.align	16
455.L018copy:
456	movl	32(%esp,%ebx,4),%esi
457	movl	(%edi,%ebx,4),%ebp
458	movl	%ecx,32(%esp,%ebx,4)
459	andl	%eax,%esi
460	andl	%edx,%ebp
461	orl	%esi,%ebp
462	movl	%ebp,(%edi,%ebx,4)
463	decl	%ebx
464	jge	.L018copy
465	movl	24(%esp),%esp
466	movl	$1,%eax
467.L000just_leave:
468	popl	%edi
469	popl	%esi
470	popl	%ebx
471	popl	%ebp
472	ret
473.size	bn_mul_mont,.-.L_bn_mul_mont_begin
474.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
475.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
476.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
477.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
478.byte	111,114,103,62,0
479.comm	OPENSSL_ia32cap_P,16,4
480#else
481.file	"x86-mont.S"
482.text
483.globl	bn_mul_mont
484.type	bn_mul_mont,@function
485.align	16
486bn_mul_mont:
487.L_bn_mul_mont_begin:
488	pushl	%ebp
489	pushl	%ebx
490	pushl	%esi
491	pushl	%edi
492	xorl	%eax,%eax
493	movl	40(%esp),%edi
494	cmpl	$4,%edi
495	jl	.L000just_leave
496	leal	20(%esp),%esi
497	leal	24(%esp),%edx
498	addl	$2,%edi
499	negl	%edi
500	leal	-32(%esp,%edi,4),%ebp
501	negl	%edi
502	movl	%ebp,%eax
503	subl	%edx,%eax
504	andl	$2047,%eax
505	subl	%eax,%ebp
506	xorl	%ebp,%edx
507	andl	$2048,%edx
508	xorl	$2048,%edx
509	subl	%edx,%ebp
510	andl	$-64,%ebp
511	movl	%esp,%eax
512	subl	%ebp,%eax
513	andl	$-4096,%eax
514	movl	%esp,%edx
515	leal	(%ebp,%eax,1),%esp
516	movl	(%esp),%eax
517	cmpl	%ebp,%esp
518	ja	.L001page_walk
519	jmp	.L002page_walk_done
520.align	16
521.L001page_walk:
522	leal	-4096(%esp),%esp
523	movl	(%esp),%eax
524	cmpl	%ebp,%esp
525	ja	.L001page_walk
526.L002page_walk_done:
527	movl	(%esi),%eax
528	movl	4(%esi),%ebx
529	movl	8(%esi),%ecx
530	movl	12(%esi),%ebp
531	movl	16(%esi),%esi
532	movl	(%esi),%esi
533	movl	%eax,4(%esp)
534	movl	%ebx,8(%esp)
535	movl	%ecx,12(%esp)
536	movl	%ebp,16(%esp)
537	movl	%esi,20(%esp)
538	leal	-3(%edi),%ebx
539	movl	%edx,24(%esp)
540	leal	OPENSSL_ia32cap_P,%eax
541	btl	$26,(%eax)
542	jnc	.L003non_sse2
543	movl	$-1,%eax
544	movd	%eax,%mm7
545	movl	8(%esp),%esi
546	movl	12(%esp),%edi
547	movl	16(%esp),%ebp
548	xorl	%edx,%edx
549	xorl	%ecx,%ecx
550	movd	(%edi),%mm4
551	movd	(%esi),%mm5
552	movd	(%ebp),%mm3
553	pmuludq	%mm4,%mm5
554	movq	%mm5,%mm2
555	movq	%mm5,%mm0
556	pand	%mm7,%mm0
557	pmuludq	20(%esp),%mm5
558	pmuludq	%mm5,%mm3
559	paddq	%mm0,%mm3
560	movd	4(%ebp),%mm1
561	movd	4(%esi),%mm0
562	psrlq	$32,%mm2
563	psrlq	$32,%mm3
564	incl	%ecx
565.align	16
566.L0041st:
567	pmuludq	%mm4,%mm0
568	pmuludq	%mm5,%mm1
569	paddq	%mm0,%mm2
570	paddq	%mm1,%mm3
571	movq	%mm2,%mm0
572	pand	%mm7,%mm0
573	movd	4(%ebp,%ecx,4),%mm1
574	paddq	%mm0,%mm3
575	movd	4(%esi,%ecx,4),%mm0
576	psrlq	$32,%mm2
577	movd	%mm3,28(%esp,%ecx,4)
578	psrlq	$32,%mm3
579	leal	1(%ecx),%ecx
580	cmpl	%ebx,%ecx
581	jl	.L0041st
582	pmuludq	%mm4,%mm0
583	pmuludq	%mm5,%mm1
584	paddq	%mm0,%mm2
585	paddq	%mm1,%mm3
586	movq	%mm2,%mm0
587	pand	%mm7,%mm0
588	paddq	%mm0,%mm3
589	movd	%mm3,28(%esp,%ecx,4)
590	psrlq	$32,%mm2
591	psrlq	$32,%mm3
592	paddq	%mm2,%mm3
593	movq	%mm3,32(%esp,%ebx,4)
594	incl	%edx
595.L005outer:
596	xorl	%ecx,%ecx
597	movd	(%edi,%edx,4),%mm4
598	movd	(%esi),%mm5
599	movd	32(%esp),%mm6
600	movd	(%ebp),%mm3
601	pmuludq	%mm4,%mm5
602	paddq	%mm6,%mm5
603	movq	%mm5,%mm0
604	movq	%mm5,%mm2
605	pand	%mm7,%mm0
606	pmuludq	20(%esp),%mm5
607	pmuludq	%mm5,%mm3
608	paddq	%mm0,%mm3
609	movd	36(%esp),%mm6
610	movd	4(%ebp),%mm1
611	movd	4(%esi),%mm0
612	psrlq	$32,%mm2
613	psrlq	$32,%mm3
614	paddq	%mm6,%mm2
615	incl	%ecx
616	decl	%ebx
617.L006inner:
618	pmuludq	%mm4,%mm0
619	pmuludq	%mm5,%mm1
620	paddq	%mm0,%mm2
621	paddq	%mm1,%mm3
622	movq	%mm2,%mm0
623	movd	36(%esp,%ecx,4),%mm6
624	pand	%mm7,%mm0
625	movd	4(%ebp,%ecx,4),%mm1
626	paddq	%mm0,%mm3
627	movd	4(%esi,%ecx,4),%mm0
628	psrlq	$32,%mm2
629	movd	%mm3,28(%esp,%ecx,4)
630	psrlq	$32,%mm3
631	paddq	%mm6,%mm2
632	decl	%ebx
633	leal	1(%ecx),%ecx
634	jnz	.L006inner
635	movl	%ecx,%ebx
636	pmuludq	%mm4,%mm0
637	pmuludq	%mm5,%mm1
638	paddq	%mm0,%mm2
639	paddq	%mm1,%mm3
640	movq	%mm2,%mm0
641	pand	%mm7,%mm0
642	paddq	%mm0,%mm3
643	movd	%mm3,28(%esp,%ecx,4)
644	psrlq	$32,%mm2
645	psrlq	$32,%mm3
646	movd	36(%esp,%ebx,4),%mm6
647	paddq	%mm2,%mm3
648	paddq	%mm6,%mm3
649	movq	%mm3,32(%esp,%ebx,4)
650	leal	1(%edx),%edx
651	cmpl	%ebx,%edx
652	jle	.L005outer
653	emms
654	jmp	.L007common_tail
655.align	16
656.L003non_sse2:
657	movl	8(%esp),%esi
658	leal	1(%ebx),%ebp
659	movl	12(%esp),%edi
660	xorl	%ecx,%ecx
661	movl	%esi,%edx
662	andl	$1,%ebp
663	subl	%edi,%edx
664	leal	4(%edi,%ebx,4),%eax
665	orl	%edx,%ebp
666	movl	(%edi),%edi
667	jz	.L008bn_sqr_mont
668	movl	%eax,28(%esp)
669	movl	(%esi),%eax
670	xorl	%edx,%edx
671.align	16
672.L009mull:
673	movl	%edx,%ebp
674	mull	%edi
675	addl	%eax,%ebp
676	leal	1(%ecx),%ecx
677	adcl	$0,%edx
678	movl	(%esi,%ecx,4),%eax
679	cmpl	%ebx,%ecx
680	movl	%ebp,28(%esp,%ecx,4)
681	jl	.L009mull
682	movl	%edx,%ebp
683	mull	%edi
684	movl	20(%esp),%edi
685	addl	%ebp,%eax
686	movl	16(%esp),%esi
687	adcl	$0,%edx
688	imull	32(%esp),%edi
689	movl	%eax,32(%esp,%ebx,4)
690	xorl	%ecx,%ecx
691	movl	%edx,36(%esp,%ebx,4)
692	movl	%ecx,40(%esp,%ebx,4)
693	movl	(%esi),%eax
694	mull	%edi
695	addl	32(%esp),%eax
696	movl	4(%esi),%eax
697	adcl	$0,%edx
698	incl	%ecx
699	jmp	.L0102ndmadd
700.align	16
701.L0111stmadd:
702	movl	%edx,%ebp
703	mull	%edi
704	addl	32(%esp,%ecx,4),%ebp
705	leal	1(%ecx),%ecx
706	adcl	$0,%edx
707	addl	%eax,%ebp
708	movl	(%esi,%ecx,4),%eax
709	adcl	$0,%edx
710	cmpl	%ebx,%ecx
711	movl	%ebp,28(%esp,%ecx,4)
712	jl	.L0111stmadd
713	movl	%edx,%ebp
714	mull	%edi
715	addl	32(%esp,%ebx,4),%eax
716	movl	20(%esp),%edi
717	adcl	$0,%edx
718	movl	16(%esp),%esi
719	addl	%eax,%ebp
720	adcl	$0,%edx
721	imull	32(%esp),%edi
722	xorl	%ecx,%ecx
723	addl	36(%esp,%ebx,4),%edx
724	movl	%ebp,32(%esp,%ebx,4)
725	adcl	$0,%ecx
726	movl	(%esi),%eax
727	movl	%edx,36(%esp,%ebx,4)
728	movl	%ecx,40(%esp,%ebx,4)
729	mull	%edi
730	addl	32(%esp),%eax
731	movl	4(%esi),%eax
732	adcl	$0,%edx
733	movl	$1,%ecx
734.align	16
735.L0102ndmadd:
736	movl	%edx,%ebp
737	mull	%edi
738	addl	32(%esp,%ecx,4),%ebp
739	leal	1(%ecx),%ecx
740	adcl	$0,%edx
741	addl	%eax,%ebp
742	movl	(%esi,%ecx,4),%eax
743	adcl	$0,%edx
744	cmpl	%ebx,%ecx
745	movl	%ebp,24(%esp,%ecx,4)
746	jl	.L0102ndmadd
747	movl	%edx,%ebp
748	mull	%edi
749	addl	32(%esp,%ebx,4),%ebp
750	adcl	$0,%edx
751	addl	%eax,%ebp
752	adcl	$0,%edx
753	movl	%ebp,28(%esp,%ebx,4)
754	xorl	%eax,%eax
755	movl	12(%esp),%ecx
756	addl	36(%esp,%ebx,4),%edx
757	adcl	40(%esp,%ebx,4),%eax
758	leal	4(%ecx),%ecx
759	movl	%edx,32(%esp,%ebx,4)
760	cmpl	28(%esp),%ecx
761	movl	%eax,36(%esp,%ebx,4)
762	je	.L007common_tail
763	movl	(%ecx),%edi
764	movl	8(%esp),%esi
765	movl	%ecx,12(%esp)
766	xorl	%ecx,%ecx
767	xorl	%edx,%edx
768	movl	(%esi),%eax
769	jmp	.L0111stmadd
770.align	16
771.L008bn_sqr_mont:
772	movl	%ebx,(%esp)
773	movl	%ecx,12(%esp)
774	movl	%edi,%eax
775	mull	%edi
776	movl	%eax,32(%esp)
777	movl	%edx,%ebx
778	shrl	$1,%edx
779	andl	$1,%ebx
780	incl	%ecx
781.align	16
782.L012sqr:
783	movl	(%esi,%ecx,4),%eax
784	movl	%edx,%ebp
785	mull	%edi
786	addl	%ebp,%eax
787	leal	1(%ecx),%ecx
788	adcl	$0,%edx
789	leal	(%ebx,%eax,2),%ebp
790	shrl	$31,%eax
791	cmpl	(%esp),%ecx
792	movl	%eax,%ebx
793	movl	%ebp,28(%esp,%ecx,4)
794	jl	.L012sqr
795	movl	(%esi,%ecx,4),%eax
796	movl	%edx,%ebp
797	mull	%edi
798	addl	%ebp,%eax
799	movl	20(%esp),%edi
800	adcl	$0,%edx
801	movl	16(%esp),%esi
802	leal	(%ebx,%eax,2),%ebp
803	imull	32(%esp),%edi
804	shrl	$31,%eax
805	movl	%ebp,32(%esp,%ecx,4)
806	leal	(%eax,%edx,2),%ebp
807	movl	(%esi),%eax
808	shrl	$31,%edx
809	movl	%ebp,36(%esp,%ecx,4)
810	movl	%edx,40(%esp,%ecx,4)
811	mull	%edi
812	addl	32(%esp),%eax
813	movl	%ecx,%ebx
814	adcl	$0,%edx
815	movl	4(%esi),%eax
816	movl	$1,%ecx
817.align	16
818.L0133rdmadd:
819	movl	%edx,%ebp
820	mull	%edi
821	addl	32(%esp,%ecx,4),%ebp
822	adcl	$0,%edx
823	addl	%eax,%ebp
824	movl	4(%esi,%ecx,4),%eax
825	adcl	$0,%edx
826	movl	%ebp,28(%esp,%ecx,4)
827	movl	%edx,%ebp
828	mull	%edi
829	addl	36(%esp,%ecx,4),%ebp
830	leal	2(%ecx),%ecx
831	adcl	$0,%edx
832	addl	%eax,%ebp
833	movl	(%esi,%ecx,4),%eax
834	adcl	$0,%edx
835	cmpl	%ebx,%ecx
836	movl	%ebp,24(%esp,%ecx,4)
837	jl	.L0133rdmadd
838	movl	%edx,%ebp
839	mull	%edi
840	addl	32(%esp,%ebx,4),%ebp
841	adcl	$0,%edx
842	addl	%eax,%ebp
843	adcl	$0,%edx
844	movl	%ebp,28(%esp,%ebx,4)
845	movl	12(%esp),%ecx
846	xorl	%eax,%eax
847	movl	8(%esp),%esi
848	addl	36(%esp,%ebx,4),%edx
849	adcl	40(%esp,%ebx,4),%eax
850	movl	%edx,32(%esp,%ebx,4)
851	cmpl	%ebx,%ecx
852	movl	%eax,36(%esp,%ebx,4)
853	je	.L007common_tail
854	movl	4(%esi,%ecx,4),%edi
855	leal	1(%ecx),%ecx
856	movl	%edi,%eax
857	movl	%ecx,12(%esp)
858	mull	%edi
859	addl	32(%esp,%ecx,4),%eax
860	adcl	$0,%edx
861	movl	%eax,32(%esp,%ecx,4)
862	xorl	%ebp,%ebp
863	cmpl	%ebx,%ecx
864	leal	1(%ecx),%ecx
865	je	.L014sqrlast
866	movl	%edx,%ebx
867	shrl	$1,%edx
868	andl	$1,%ebx
869.align	16
870.L015sqradd:
871	movl	(%esi,%ecx,4),%eax
872	movl	%edx,%ebp
873	mull	%edi
874	addl	%ebp,%eax
875	leal	(%eax,%eax,1),%ebp
876	adcl	$0,%edx
877	shrl	$31,%eax
878	addl	32(%esp,%ecx,4),%ebp
879	leal	1(%ecx),%ecx
880	adcl	$0,%eax
881	addl	%ebx,%ebp
882	adcl	$0,%eax
883	cmpl	(%esp),%ecx
884	movl	%ebp,28(%esp,%ecx,4)
885	movl	%eax,%ebx
886	jle	.L015sqradd
887	movl	%edx,%ebp
888	addl	%edx,%edx
889	shrl	$31,%ebp
890	addl	%ebx,%edx
891	adcl	$0,%ebp
892.L014sqrlast:
893	movl	20(%esp),%edi
894	movl	16(%esp),%esi
895	imull	32(%esp),%edi
896	addl	32(%esp,%ecx,4),%edx
897	movl	(%esi),%eax
898	adcl	$0,%ebp
899	movl	%edx,32(%esp,%ecx,4)
900	movl	%ebp,36(%esp,%ecx,4)
901	mull	%edi
902	addl	32(%esp),%eax
903	leal	-1(%ecx),%ebx
904	adcl	$0,%edx
905	movl	$1,%ecx
906	movl	4(%esi),%eax
907	jmp	.L0133rdmadd
908.align	16
909.L007common_tail:
910	movl	16(%esp),%ebp
911	movl	4(%esp),%edi
912	leal	32(%esp),%esi
913	movl	(%esi),%eax
914	movl	%ebx,%ecx
915	xorl	%edx,%edx
916.align	16
917.L016sub:
918	sbbl	(%ebp,%edx,4),%eax
919	movl	%eax,(%edi,%edx,4)
920	decl	%ecx
921	movl	4(%esi,%edx,4),%eax
922	leal	1(%edx),%edx
923	jge	.L016sub
924	sbbl	$0,%eax
925	movl	$-1,%edx
926	xorl	%eax,%edx
927	jmp	.L017copy
928.align	16
929.L017copy:
930	movl	32(%esp,%ebx,4),%esi
931	movl	(%edi,%ebx,4),%ebp
932	movl	%ecx,32(%esp,%ebx,4)
933	andl	%eax,%esi
934	andl	%edx,%ebp
935	orl	%esi,%ebp
936	movl	%ebp,(%edi,%ebx,4)
937	decl	%ebx
938	jge	.L017copy
939	movl	24(%esp),%esp
940	movl	$1,%eax
941.L000just_leave:
942	popl	%edi
943	popl	%esi
944	popl	%ebx
945	popl	%ebp
946	ret
947.size	bn_mul_mont,.-.L_bn_mul_mont_begin
948.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
949.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
950.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
951.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
952.byte	111,114,103,62,0
953.comm	OPENSSL_ia32cap_P,16,4
954#endif
955