x86-mont.S revision 305153
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-mont.S 305153 2016-08-31 20:33:59Z jkim $ */
2/* Do not modify. This file is auto-generated from x86-mont.pl. */
3#ifdef PIC
4.file	"x86-mont.S"
5.text
6.globl	bn_mul_mont
7.type	bn_mul_mont,@function
8.align	16
9bn_mul_mont:
10.L_bn_mul_mont_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	xorl	%eax,%eax
16	movl	40(%esp),%edi
17	cmpl	$4,%edi
18	jl	.L000just_leave
19	leal	20(%esp),%esi
20	leal	24(%esp),%edx
21	movl	%esp,%ebp
22	addl	$2,%edi
23	negl	%edi
24	leal	-32(%esp,%edi,4),%esp
25	negl	%edi
26	movl	%esp,%eax
27	subl	%edx,%eax
28	andl	$2047,%eax
29	subl	%eax,%esp
30	xorl	%esp,%edx
31	andl	$2048,%edx
32	xorl	$2048,%edx
33	subl	%edx,%esp
34	andl	$-64,%esp
35	movl	%ebp,%eax
36	subl	%esp,%eax
37	andl	$-4096,%eax
38.L001page_walk:
39	movl	(%esp,%eax,1),%edx
40	subl	$4096,%eax
41.byte	46
42	jnc	.L001page_walk
43	movl	(%esi),%eax
44	movl	4(%esi),%ebx
45	movl	8(%esi),%ecx
46	movl	12(%esi),%edx
47	movl	16(%esi),%esi
48	movl	(%esi),%esi
49	movl	%eax,4(%esp)
50	movl	%ebx,8(%esp)
51	movl	%ecx,12(%esp)
52	movl	%edx,16(%esp)
53	movl	%esi,20(%esp)
54	leal	-3(%edi),%ebx
55	movl	%ebp,24(%esp)
56	call	.L002PIC_me_up
57.L002PIC_me_up:
58	popl	%eax
59	leal	OPENSSL_ia32cap_P-.L002PIC_me_up(%eax),%eax
60	btl	$26,(%eax)
61	jnc	.L003non_sse2
62	movl	$-1,%eax
63	movd	%eax,%mm7
64	movl	8(%esp),%esi
65	movl	12(%esp),%edi
66	movl	16(%esp),%ebp
67	xorl	%edx,%edx
68	xorl	%ecx,%ecx
69	movd	(%edi),%mm4
70	movd	(%esi),%mm5
71	movd	(%ebp),%mm3
72	pmuludq	%mm4,%mm5
73	movq	%mm5,%mm2
74	movq	%mm5,%mm0
75	pand	%mm7,%mm0
76	pmuludq	20(%esp),%mm5
77	pmuludq	%mm5,%mm3
78	paddq	%mm0,%mm3
79	movd	4(%ebp),%mm1
80	movd	4(%esi),%mm0
81	psrlq	$32,%mm2
82	psrlq	$32,%mm3
83	incl	%ecx
84.align	16
85.L0041st:
86	pmuludq	%mm4,%mm0
87	pmuludq	%mm5,%mm1
88	paddq	%mm0,%mm2
89	paddq	%mm1,%mm3
90	movq	%mm2,%mm0
91	pand	%mm7,%mm0
92	movd	4(%ebp,%ecx,4),%mm1
93	paddq	%mm0,%mm3
94	movd	4(%esi,%ecx,4),%mm0
95	psrlq	$32,%mm2
96	movd	%mm3,28(%esp,%ecx,4)
97	psrlq	$32,%mm3
98	leal	1(%ecx),%ecx
99	cmpl	%ebx,%ecx
100	jl	.L0041st
101	pmuludq	%mm4,%mm0
102	pmuludq	%mm5,%mm1
103	paddq	%mm0,%mm2
104	paddq	%mm1,%mm3
105	movq	%mm2,%mm0
106	pand	%mm7,%mm0
107	paddq	%mm0,%mm3
108	movd	%mm3,28(%esp,%ecx,4)
109	psrlq	$32,%mm2
110	psrlq	$32,%mm3
111	paddq	%mm2,%mm3
112	movq	%mm3,32(%esp,%ebx,4)
113	incl	%edx
114.L005outer:
115	xorl	%ecx,%ecx
116	movd	(%edi,%edx,4),%mm4
117	movd	(%esi),%mm5
118	movd	32(%esp),%mm6
119	movd	(%ebp),%mm3
120	pmuludq	%mm4,%mm5
121	paddq	%mm6,%mm5
122	movq	%mm5,%mm0
123	movq	%mm5,%mm2
124	pand	%mm7,%mm0
125	pmuludq	20(%esp),%mm5
126	pmuludq	%mm5,%mm3
127	paddq	%mm0,%mm3
128	movd	36(%esp),%mm6
129	movd	4(%ebp),%mm1
130	movd	4(%esi),%mm0
131	psrlq	$32,%mm2
132	psrlq	$32,%mm3
133	paddq	%mm6,%mm2
134	incl	%ecx
135	decl	%ebx
136.L006inner:
137	pmuludq	%mm4,%mm0
138	pmuludq	%mm5,%mm1
139	paddq	%mm0,%mm2
140	paddq	%mm1,%mm3
141	movq	%mm2,%mm0
142	movd	36(%esp,%ecx,4),%mm6
143	pand	%mm7,%mm0
144	movd	4(%ebp,%ecx,4),%mm1
145	paddq	%mm0,%mm3
146	movd	4(%esi,%ecx,4),%mm0
147	psrlq	$32,%mm2
148	movd	%mm3,28(%esp,%ecx,4)
149	psrlq	$32,%mm3
150	paddq	%mm6,%mm2
151	decl	%ebx
152	leal	1(%ecx),%ecx
153	jnz	.L006inner
154	movl	%ecx,%ebx
155	pmuludq	%mm4,%mm0
156	pmuludq	%mm5,%mm1
157	paddq	%mm0,%mm2
158	paddq	%mm1,%mm3
159	movq	%mm2,%mm0
160	pand	%mm7,%mm0
161	paddq	%mm0,%mm3
162	movd	%mm3,28(%esp,%ecx,4)
163	psrlq	$32,%mm2
164	psrlq	$32,%mm3
165	movd	36(%esp,%ebx,4),%mm6
166	paddq	%mm2,%mm3
167	paddq	%mm6,%mm3
168	movq	%mm3,32(%esp,%ebx,4)
169	leal	1(%edx),%edx
170	cmpl	%ebx,%edx
171	jle	.L005outer
172	emms
173	jmp	.L007common_tail
174.align	16
175.L003non_sse2:
176	movl	8(%esp),%esi
177	leal	1(%ebx),%ebp
178	movl	12(%esp),%edi
179	xorl	%ecx,%ecx
180	movl	%esi,%edx
181	andl	$1,%ebp
182	subl	%edi,%edx
183	leal	4(%edi,%ebx,4),%eax
184	orl	%edx,%ebp
185	movl	(%edi),%edi
186	jz	.L008bn_sqr_mont
187	movl	%eax,28(%esp)
188	movl	(%esi),%eax
189	xorl	%edx,%edx
190.align	16
191.L009mull:
192	movl	%edx,%ebp
193	mull	%edi
194	addl	%eax,%ebp
195	leal	1(%ecx),%ecx
196	adcl	$0,%edx
197	movl	(%esi,%ecx,4),%eax
198	cmpl	%ebx,%ecx
199	movl	%ebp,28(%esp,%ecx,4)
200	jl	.L009mull
201	movl	%edx,%ebp
202	mull	%edi
203	movl	20(%esp),%edi
204	addl	%ebp,%eax
205	movl	16(%esp),%esi
206	adcl	$0,%edx
207	imull	32(%esp),%edi
208	movl	%eax,32(%esp,%ebx,4)
209	xorl	%ecx,%ecx
210	movl	%edx,36(%esp,%ebx,4)
211	movl	%ecx,40(%esp,%ebx,4)
212	movl	(%esi),%eax
213	mull	%edi
214	addl	32(%esp),%eax
215	movl	4(%esi),%eax
216	adcl	$0,%edx
217	incl	%ecx
218	jmp	.L0102ndmadd
219.align	16
220.L0111stmadd:
221	movl	%edx,%ebp
222	mull	%edi
223	addl	32(%esp,%ecx,4),%ebp
224	leal	1(%ecx),%ecx
225	adcl	$0,%edx
226	addl	%eax,%ebp
227	movl	(%esi,%ecx,4),%eax
228	adcl	$0,%edx
229	cmpl	%ebx,%ecx
230	movl	%ebp,28(%esp,%ecx,4)
231	jl	.L0111stmadd
232	movl	%edx,%ebp
233	mull	%edi
234	addl	32(%esp,%ebx,4),%eax
235	movl	20(%esp),%edi
236	adcl	$0,%edx
237	movl	16(%esp),%esi
238	addl	%eax,%ebp
239	adcl	$0,%edx
240	imull	32(%esp),%edi
241	xorl	%ecx,%ecx
242	addl	36(%esp,%ebx,4),%edx
243	movl	%ebp,32(%esp,%ebx,4)
244	adcl	$0,%ecx
245	movl	(%esi),%eax
246	movl	%edx,36(%esp,%ebx,4)
247	movl	%ecx,40(%esp,%ebx,4)
248	mull	%edi
249	addl	32(%esp),%eax
250	movl	4(%esi),%eax
251	adcl	$0,%edx
252	movl	$1,%ecx
253.align	16
254.L0102ndmadd:
255	movl	%edx,%ebp
256	mull	%edi
257	addl	32(%esp,%ecx,4),%ebp
258	leal	1(%ecx),%ecx
259	adcl	$0,%edx
260	addl	%eax,%ebp
261	movl	(%esi,%ecx,4),%eax
262	adcl	$0,%edx
263	cmpl	%ebx,%ecx
264	movl	%ebp,24(%esp,%ecx,4)
265	jl	.L0102ndmadd
266	movl	%edx,%ebp
267	mull	%edi
268	addl	32(%esp,%ebx,4),%ebp
269	adcl	$0,%edx
270	addl	%eax,%ebp
271	adcl	$0,%edx
272	movl	%ebp,28(%esp,%ebx,4)
273	xorl	%eax,%eax
274	movl	12(%esp),%ecx
275	addl	36(%esp,%ebx,4),%edx
276	adcl	40(%esp,%ebx,4),%eax
277	leal	4(%ecx),%ecx
278	movl	%edx,32(%esp,%ebx,4)
279	cmpl	28(%esp),%ecx
280	movl	%eax,36(%esp,%ebx,4)
281	je	.L007common_tail
282	movl	(%ecx),%edi
283	movl	8(%esp),%esi
284	movl	%ecx,12(%esp)
285	xorl	%ecx,%ecx
286	xorl	%edx,%edx
287	movl	(%esi),%eax
288	jmp	.L0111stmadd
289.align	16
290.L008bn_sqr_mont:
291	movl	%ebx,(%esp)
292	movl	%ecx,12(%esp)
293	movl	%edi,%eax
294	mull	%edi
295	movl	%eax,32(%esp)
296	movl	%edx,%ebx
297	shrl	$1,%edx
298	andl	$1,%ebx
299	incl	%ecx
300.align	16
301.L012sqr:
302	movl	(%esi,%ecx,4),%eax
303	movl	%edx,%ebp
304	mull	%edi
305	addl	%ebp,%eax
306	leal	1(%ecx),%ecx
307	adcl	$0,%edx
308	leal	(%ebx,%eax,2),%ebp
309	shrl	$31,%eax
310	cmpl	(%esp),%ecx
311	movl	%eax,%ebx
312	movl	%ebp,28(%esp,%ecx,4)
313	jl	.L012sqr
314	movl	(%esi,%ecx,4),%eax
315	movl	%edx,%ebp
316	mull	%edi
317	addl	%ebp,%eax
318	movl	20(%esp),%edi
319	adcl	$0,%edx
320	movl	16(%esp),%esi
321	leal	(%ebx,%eax,2),%ebp
322	imull	32(%esp),%edi
323	shrl	$31,%eax
324	movl	%ebp,32(%esp,%ecx,4)
325	leal	(%eax,%edx,2),%ebp
326	movl	(%esi),%eax
327	shrl	$31,%edx
328	movl	%ebp,36(%esp,%ecx,4)
329	movl	%edx,40(%esp,%ecx,4)
330	mull	%edi
331	addl	32(%esp),%eax
332	movl	%ecx,%ebx
333	adcl	$0,%edx
334	movl	4(%esi),%eax
335	movl	$1,%ecx
336.align	16
337.L0133rdmadd:
338	movl	%edx,%ebp
339	mull	%edi
340	addl	32(%esp,%ecx,4),%ebp
341	adcl	$0,%edx
342	addl	%eax,%ebp
343	movl	4(%esi,%ecx,4),%eax
344	adcl	$0,%edx
345	movl	%ebp,28(%esp,%ecx,4)
346	movl	%edx,%ebp
347	mull	%edi
348	addl	36(%esp,%ecx,4),%ebp
349	leal	2(%ecx),%ecx
350	adcl	$0,%edx
351	addl	%eax,%ebp
352	movl	(%esi,%ecx,4),%eax
353	adcl	$0,%edx
354	cmpl	%ebx,%ecx
355	movl	%ebp,24(%esp,%ecx,4)
356	jl	.L0133rdmadd
357	movl	%edx,%ebp
358	mull	%edi
359	addl	32(%esp,%ebx,4),%ebp
360	adcl	$0,%edx
361	addl	%eax,%ebp
362	adcl	$0,%edx
363	movl	%ebp,28(%esp,%ebx,4)
364	movl	12(%esp),%ecx
365	xorl	%eax,%eax
366	movl	8(%esp),%esi
367	addl	36(%esp,%ebx,4),%edx
368	adcl	40(%esp,%ebx,4),%eax
369	movl	%edx,32(%esp,%ebx,4)
370	cmpl	%ebx,%ecx
371	movl	%eax,36(%esp,%ebx,4)
372	je	.L007common_tail
373	movl	4(%esi,%ecx,4),%edi
374	leal	1(%ecx),%ecx
375	movl	%edi,%eax
376	movl	%ecx,12(%esp)
377	mull	%edi
378	addl	32(%esp,%ecx,4),%eax
379	adcl	$0,%edx
380	movl	%eax,32(%esp,%ecx,4)
381	xorl	%ebp,%ebp
382	cmpl	%ebx,%ecx
383	leal	1(%ecx),%ecx
384	je	.L014sqrlast
385	movl	%edx,%ebx
386	shrl	$1,%edx
387	andl	$1,%ebx
388.align	16
389.L015sqradd:
390	movl	(%esi,%ecx,4),%eax
391	movl	%edx,%ebp
392	mull	%edi
393	addl	%ebp,%eax
394	leal	(%eax,%eax,1),%ebp
395	adcl	$0,%edx
396	shrl	$31,%eax
397	addl	32(%esp,%ecx,4),%ebp
398	leal	1(%ecx),%ecx
399	adcl	$0,%eax
400	addl	%ebx,%ebp
401	adcl	$0,%eax
402	cmpl	(%esp),%ecx
403	movl	%ebp,28(%esp,%ecx,4)
404	movl	%eax,%ebx
405	jle	.L015sqradd
406	movl	%edx,%ebp
407	addl	%edx,%edx
408	shrl	$31,%ebp
409	addl	%ebx,%edx
410	adcl	$0,%ebp
411.L014sqrlast:
412	movl	20(%esp),%edi
413	movl	16(%esp),%esi
414	imull	32(%esp),%edi
415	addl	32(%esp,%ecx,4),%edx
416	movl	(%esi),%eax
417	adcl	$0,%ebp
418	movl	%edx,32(%esp,%ecx,4)
419	movl	%ebp,36(%esp,%ecx,4)
420	mull	%edi
421	addl	32(%esp),%eax
422	leal	-1(%ecx),%ebx
423	adcl	$0,%edx
424	movl	$1,%ecx
425	movl	4(%esi),%eax
426	jmp	.L0133rdmadd
427.align	16
428.L007common_tail:
429	movl	16(%esp),%ebp
430	movl	4(%esp),%edi
431	leal	32(%esp),%esi
432	movl	(%esi),%eax
433	movl	%ebx,%ecx
434	xorl	%edx,%edx
435.align	16
436.L016sub:
437	sbbl	(%ebp,%edx,4),%eax
438	movl	%eax,(%edi,%edx,4)
439	decl	%ecx
440	movl	4(%esi,%edx,4),%eax
441	leal	1(%edx),%edx
442	jge	.L016sub
443	sbbl	$0,%eax
444	andl	%eax,%esi
445	notl	%eax
446	movl	%edi,%ebp
447	andl	%eax,%ebp
448	orl	%ebp,%esi
449.align	16
450.L017copy:
451	movl	(%esi,%ebx,4),%eax
452	movl	%eax,(%edi,%ebx,4)
453	movl	%ecx,32(%esp,%ebx,4)
454	decl	%ebx
455	jge	.L017copy
456	movl	24(%esp),%esp
457	movl	$1,%eax
458.L000just_leave:
459	popl	%edi
460	popl	%esi
461	popl	%ebx
462	popl	%ebp
463	ret
464.size	bn_mul_mont,.-.L_bn_mul_mont_begin
465.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
466.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
467.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
468.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
469.byte	111,114,103,62,0
470.comm	OPENSSL_ia32cap_P,16,4
471#else
472.file	"x86-mont.S"
473.text
474.globl	bn_mul_mont
475.type	bn_mul_mont,@function
476.align	16
477bn_mul_mont:
478.L_bn_mul_mont_begin:
479	pushl	%ebp
480	pushl	%ebx
481	pushl	%esi
482	pushl	%edi
483	xorl	%eax,%eax
484	movl	40(%esp),%edi
485	cmpl	$4,%edi
486	jl	.L000just_leave
487	leal	20(%esp),%esi
488	leal	24(%esp),%edx
489	movl	%esp,%ebp
490	addl	$2,%edi
491	negl	%edi
492	leal	-32(%esp,%edi,4),%esp
493	negl	%edi
494	movl	%esp,%eax
495	subl	%edx,%eax
496	andl	$2047,%eax
497	subl	%eax,%esp
498	xorl	%esp,%edx
499	andl	$2048,%edx
500	xorl	$2048,%edx
501	subl	%edx,%esp
502	andl	$-64,%esp
503	movl	%ebp,%eax
504	subl	%esp,%eax
505	andl	$-4096,%eax
506.L001page_walk:
507	movl	(%esp,%eax,1),%edx
508	subl	$4096,%eax
509.byte	46
510	jnc	.L001page_walk
511	movl	(%esi),%eax
512	movl	4(%esi),%ebx
513	movl	8(%esi),%ecx
514	movl	12(%esi),%edx
515	movl	16(%esi),%esi
516	movl	(%esi),%esi
517	movl	%eax,4(%esp)
518	movl	%ebx,8(%esp)
519	movl	%ecx,12(%esp)
520	movl	%edx,16(%esp)
521	movl	%esi,20(%esp)
522	leal	-3(%edi),%ebx
523	movl	%ebp,24(%esp)
524	leal	OPENSSL_ia32cap_P,%eax
525	btl	$26,(%eax)
526	jnc	.L002non_sse2
527	movl	$-1,%eax
528	movd	%eax,%mm7
529	movl	8(%esp),%esi
530	movl	12(%esp),%edi
531	movl	16(%esp),%ebp
532	xorl	%edx,%edx
533	xorl	%ecx,%ecx
534	movd	(%edi),%mm4
535	movd	(%esi),%mm5
536	movd	(%ebp),%mm3
537	pmuludq	%mm4,%mm5
538	movq	%mm5,%mm2
539	movq	%mm5,%mm0
540	pand	%mm7,%mm0
541	pmuludq	20(%esp),%mm5
542	pmuludq	%mm5,%mm3
543	paddq	%mm0,%mm3
544	movd	4(%ebp),%mm1
545	movd	4(%esi),%mm0
546	psrlq	$32,%mm2
547	psrlq	$32,%mm3
548	incl	%ecx
549.align	16
550.L0031st:
551	pmuludq	%mm4,%mm0
552	pmuludq	%mm5,%mm1
553	paddq	%mm0,%mm2
554	paddq	%mm1,%mm3
555	movq	%mm2,%mm0
556	pand	%mm7,%mm0
557	movd	4(%ebp,%ecx,4),%mm1
558	paddq	%mm0,%mm3
559	movd	4(%esi,%ecx,4),%mm0
560	psrlq	$32,%mm2
561	movd	%mm3,28(%esp,%ecx,4)
562	psrlq	$32,%mm3
563	leal	1(%ecx),%ecx
564	cmpl	%ebx,%ecx
565	jl	.L0031st
566	pmuludq	%mm4,%mm0
567	pmuludq	%mm5,%mm1
568	paddq	%mm0,%mm2
569	paddq	%mm1,%mm3
570	movq	%mm2,%mm0
571	pand	%mm7,%mm0
572	paddq	%mm0,%mm3
573	movd	%mm3,28(%esp,%ecx,4)
574	psrlq	$32,%mm2
575	psrlq	$32,%mm3
576	paddq	%mm2,%mm3
577	movq	%mm3,32(%esp,%ebx,4)
578	incl	%edx
579.L004outer:
580	xorl	%ecx,%ecx
581	movd	(%edi,%edx,4),%mm4
582	movd	(%esi),%mm5
583	movd	32(%esp),%mm6
584	movd	(%ebp),%mm3
585	pmuludq	%mm4,%mm5
586	paddq	%mm6,%mm5
587	movq	%mm5,%mm0
588	movq	%mm5,%mm2
589	pand	%mm7,%mm0
590	pmuludq	20(%esp),%mm5
591	pmuludq	%mm5,%mm3
592	paddq	%mm0,%mm3
593	movd	36(%esp),%mm6
594	movd	4(%ebp),%mm1
595	movd	4(%esi),%mm0
596	psrlq	$32,%mm2
597	psrlq	$32,%mm3
598	paddq	%mm6,%mm2
599	incl	%ecx
600	decl	%ebx
601.L005inner:
602	pmuludq	%mm4,%mm0
603	pmuludq	%mm5,%mm1
604	paddq	%mm0,%mm2
605	paddq	%mm1,%mm3
606	movq	%mm2,%mm0
607	movd	36(%esp,%ecx,4),%mm6
608	pand	%mm7,%mm0
609	movd	4(%ebp,%ecx,4),%mm1
610	paddq	%mm0,%mm3
611	movd	4(%esi,%ecx,4),%mm0
612	psrlq	$32,%mm2
613	movd	%mm3,28(%esp,%ecx,4)
614	psrlq	$32,%mm3
615	paddq	%mm6,%mm2
616	decl	%ebx
617	leal	1(%ecx),%ecx
618	jnz	.L005inner
619	movl	%ecx,%ebx
620	pmuludq	%mm4,%mm0
621	pmuludq	%mm5,%mm1
622	paddq	%mm0,%mm2
623	paddq	%mm1,%mm3
624	movq	%mm2,%mm0
625	pand	%mm7,%mm0
626	paddq	%mm0,%mm3
627	movd	%mm3,28(%esp,%ecx,4)
628	psrlq	$32,%mm2
629	psrlq	$32,%mm3
630	movd	36(%esp,%ebx,4),%mm6
631	paddq	%mm2,%mm3
632	paddq	%mm6,%mm3
633	movq	%mm3,32(%esp,%ebx,4)
634	leal	1(%edx),%edx
635	cmpl	%ebx,%edx
636	jle	.L004outer
637	emms
638	jmp	.L006common_tail
639.align	16
640.L002non_sse2:
641	movl	8(%esp),%esi
642	leal	1(%ebx),%ebp
643	movl	12(%esp),%edi
644	xorl	%ecx,%ecx
645	movl	%esi,%edx
646	andl	$1,%ebp
647	subl	%edi,%edx
648	leal	4(%edi,%ebx,4),%eax
649	orl	%edx,%ebp
650	movl	(%edi),%edi
651	jz	.L007bn_sqr_mont
652	movl	%eax,28(%esp)
653	movl	(%esi),%eax
654	xorl	%edx,%edx
655.align	16
656.L008mull:
657	movl	%edx,%ebp
658	mull	%edi
659	addl	%eax,%ebp
660	leal	1(%ecx),%ecx
661	adcl	$0,%edx
662	movl	(%esi,%ecx,4),%eax
663	cmpl	%ebx,%ecx
664	movl	%ebp,28(%esp,%ecx,4)
665	jl	.L008mull
666	movl	%edx,%ebp
667	mull	%edi
668	movl	20(%esp),%edi
669	addl	%ebp,%eax
670	movl	16(%esp),%esi
671	adcl	$0,%edx
672	imull	32(%esp),%edi
673	movl	%eax,32(%esp,%ebx,4)
674	xorl	%ecx,%ecx
675	movl	%edx,36(%esp,%ebx,4)
676	movl	%ecx,40(%esp,%ebx,4)
677	movl	(%esi),%eax
678	mull	%edi
679	addl	32(%esp),%eax
680	movl	4(%esi),%eax
681	adcl	$0,%edx
682	incl	%ecx
683	jmp	.L0092ndmadd
684.align	16
685.L0101stmadd:
686	movl	%edx,%ebp
687	mull	%edi
688	addl	32(%esp,%ecx,4),%ebp
689	leal	1(%ecx),%ecx
690	adcl	$0,%edx
691	addl	%eax,%ebp
692	movl	(%esi,%ecx,4),%eax
693	adcl	$0,%edx
694	cmpl	%ebx,%ecx
695	movl	%ebp,28(%esp,%ecx,4)
696	jl	.L0101stmadd
697	movl	%edx,%ebp
698	mull	%edi
699	addl	32(%esp,%ebx,4),%eax
700	movl	20(%esp),%edi
701	adcl	$0,%edx
702	movl	16(%esp),%esi
703	addl	%eax,%ebp
704	adcl	$0,%edx
705	imull	32(%esp),%edi
706	xorl	%ecx,%ecx
707	addl	36(%esp,%ebx,4),%edx
708	movl	%ebp,32(%esp,%ebx,4)
709	adcl	$0,%ecx
710	movl	(%esi),%eax
711	movl	%edx,36(%esp,%ebx,4)
712	movl	%ecx,40(%esp,%ebx,4)
713	mull	%edi
714	addl	32(%esp),%eax
715	movl	4(%esi),%eax
716	adcl	$0,%edx
717	movl	$1,%ecx
718.align	16
719.L0092ndmadd:
720	movl	%edx,%ebp
721	mull	%edi
722	addl	32(%esp,%ecx,4),%ebp
723	leal	1(%ecx),%ecx
724	adcl	$0,%edx
725	addl	%eax,%ebp
726	movl	(%esi,%ecx,4),%eax
727	adcl	$0,%edx
728	cmpl	%ebx,%ecx
729	movl	%ebp,24(%esp,%ecx,4)
730	jl	.L0092ndmadd
731	movl	%edx,%ebp
732	mull	%edi
733	addl	32(%esp,%ebx,4),%ebp
734	adcl	$0,%edx
735	addl	%eax,%ebp
736	adcl	$0,%edx
737	movl	%ebp,28(%esp,%ebx,4)
738	xorl	%eax,%eax
739	movl	12(%esp),%ecx
740	addl	36(%esp,%ebx,4),%edx
741	adcl	40(%esp,%ebx,4),%eax
742	leal	4(%ecx),%ecx
743	movl	%edx,32(%esp,%ebx,4)
744	cmpl	28(%esp),%ecx
745	movl	%eax,36(%esp,%ebx,4)
746	je	.L006common_tail
747	movl	(%ecx),%edi
748	movl	8(%esp),%esi
749	movl	%ecx,12(%esp)
750	xorl	%ecx,%ecx
751	xorl	%edx,%edx
752	movl	(%esi),%eax
753	jmp	.L0101stmadd
754.align	16
755.L007bn_sqr_mont:
756	movl	%ebx,(%esp)
757	movl	%ecx,12(%esp)
758	movl	%edi,%eax
759	mull	%edi
760	movl	%eax,32(%esp)
761	movl	%edx,%ebx
762	shrl	$1,%edx
763	andl	$1,%ebx
764	incl	%ecx
765.align	16
766.L011sqr:
767	movl	(%esi,%ecx,4),%eax
768	movl	%edx,%ebp
769	mull	%edi
770	addl	%ebp,%eax
771	leal	1(%ecx),%ecx
772	adcl	$0,%edx
773	leal	(%ebx,%eax,2),%ebp
774	shrl	$31,%eax
775	cmpl	(%esp),%ecx
776	movl	%eax,%ebx
777	movl	%ebp,28(%esp,%ecx,4)
778	jl	.L011sqr
779	movl	(%esi,%ecx,4),%eax
780	movl	%edx,%ebp
781	mull	%edi
782	addl	%ebp,%eax
783	movl	20(%esp),%edi
784	adcl	$0,%edx
785	movl	16(%esp),%esi
786	leal	(%ebx,%eax,2),%ebp
787	imull	32(%esp),%edi
788	shrl	$31,%eax
789	movl	%ebp,32(%esp,%ecx,4)
790	leal	(%eax,%edx,2),%ebp
791	movl	(%esi),%eax
792	shrl	$31,%edx
793	movl	%ebp,36(%esp,%ecx,4)
794	movl	%edx,40(%esp,%ecx,4)
795	mull	%edi
796	addl	32(%esp),%eax
797	movl	%ecx,%ebx
798	adcl	$0,%edx
799	movl	4(%esi),%eax
800	movl	$1,%ecx
801.align	16
802.L0123rdmadd:
803	movl	%edx,%ebp
804	mull	%edi
805	addl	32(%esp,%ecx,4),%ebp
806	adcl	$0,%edx
807	addl	%eax,%ebp
808	movl	4(%esi,%ecx,4),%eax
809	adcl	$0,%edx
810	movl	%ebp,28(%esp,%ecx,4)
811	movl	%edx,%ebp
812	mull	%edi
813	addl	36(%esp,%ecx,4),%ebp
814	leal	2(%ecx),%ecx
815	adcl	$0,%edx
816	addl	%eax,%ebp
817	movl	(%esi,%ecx,4),%eax
818	adcl	$0,%edx
819	cmpl	%ebx,%ecx
820	movl	%ebp,24(%esp,%ecx,4)
821	jl	.L0123rdmadd
822	movl	%edx,%ebp
823	mull	%edi
824	addl	32(%esp,%ebx,4),%ebp
825	adcl	$0,%edx
826	addl	%eax,%ebp
827	adcl	$0,%edx
828	movl	%ebp,28(%esp,%ebx,4)
829	movl	12(%esp),%ecx
830	xorl	%eax,%eax
831	movl	8(%esp),%esi
832	addl	36(%esp,%ebx,4),%edx
833	adcl	40(%esp,%ebx,4),%eax
834	movl	%edx,32(%esp,%ebx,4)
835	cmpl	%ebx,%ecx
836	movl	%eax,36(%esp,%ebx,4)
837	je	.L006common_tail
838	movl	4(%esi,%ecx,4),%edi
839	leal	1(%ecx),%ecx
840	movl	%edi,%eax
841	movl	%ecx,12(%esp)
842	mull	%edi
843	addl	32(%esp,%ecx,4),%eax
844	adcl	$0,%edx
845	movl	%eax,32(%esp,%ecx,4)
846	xorl	%ebp,%ebp
847	cmpl	%ebx,%ecx
848	leal	1(%ecx),%ecx
849	je	.L013sqrlast
850	movl	%edx,%ebx
851	shrl	$1,%edx
852	andl	$1,%ebx
853.align	16
854.L014sqradd:
855	movl	(%esi,%ecx,4),%eax
856	movl	%edx,%ebp
857	mull	%edi
858	addl	%ebp,%eax
859	leal	(%eax,%eax,1),%ebp
860	adcl	$0,%edx
861	shrl	$31,%eax
862	addl	32(%esp,%ecx,4),%ebp
863	leal	1(%ecx),%ecx
864	adcl	$0,%eax
865	addl	%ebx,%ebp
866	adcl	$0,%eax
867	cmpl	(%esp),%ecx
868	movl	%ebp,28(%esp,%ecx,4)
869	movl	%eax,%ebx
870	jle	.L014sqradd
871	movl	%edx,%ebp
872	addl	%edx,%edx
873	shrl	$31,%ebp
874	addl	%ebx,%edx
875	adcl	$0,%ebp
876.L013sqrlast:
877	movl	20(%esp),%edi
878	movl	16(%esp),%esi
879	imull	32(%esp),%edi
880	addl	32(%esp,%ecx,4),%edx
881	movl	(%esi),%eax
882	adcl	$0,%ebp
883	movl	%edx,32(%esp,%ecx,4)
884	movl	%ebp,36(%esp,%ecx,4)
885	mull	%edi
886	addl	32(%esp),%eax
887	leal	-1(%ecx),%ebx
888	adcl	$0,%edx
889	movl	$1,%ecx
890	movl	4(%esi),%eax
891	jmp	.L0123rdmadd
892.align	16
893.L006common_tail:
894	movl	16(%esp),%ebp
895	movl	4(%esp),%edi
896	leal	32(%esp),%esi
897	movl	(%esi),%eax
898	movl	%ebx,%ecx
899	xorl	%edx,%edx
900.align	16
901.L015sub:
902	sbbl	(%ebp,%edx,4),%eax
903	movl	%eax,(%edi,%edx,4)
904	decl	%ecx
905	movl	4(%esi,%edx,4),%eax
906	leal	1(%edx),%edx
907	jge	.L015sub
908	sbbl	$0,%eax
909	andl	%eax,%esi
910	notl	%eax
911	movl	%edi,%ebp
912	andl	%eax,%ebp
913	orl	%ebp,%esi
914.align	16
915.L016copy:
916	movl	(%esi,%ebx,4),%eax
917	movl	%eax,(%edi,%ebx,4)
918	movl	%ecx,32(%esp,%ebx,4)
919	decl	%ebx
920	jge	.L016copy
921	movl	24(%esp),%esp
922	movl	$1,%eax
923.L000just_leave:
924	popl	%edi
925	popl	%esi
926	popl	%ebx
927	popl	%ebp
928	ret
929.size	bn_mul_mont,.-.L_bn_mul_mont_begin
930.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
931.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
932.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
933.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
934.byte	111,114,103,62,0
935.comm	OPENSSL_ia32cap_P,16,4
936#endif
937