bn-586.S revision 1.7
1#include <machine/asm.h>
2.text
3.globl	bn_mul_add_words
4.type	bn_mul_add_words,@function
5.align	16
6bn_mul_add_words:
7.L_bn_mul_add_words_begin:
8	call	.L000PIC_me_up
9.L000PIC_me_up:
10	popl	%eax
11	leal	OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
12	btl	$26,(%eax)
13	jnc	.L001maw_non_sse2
14	movl	4(%esp),%eax
15	movl	8(%esp),%edx
16	movl	12(%esp),%ecx
17	movd	16(%esp),%mm0
18	pxor	%mm1,%mm1
19	jmp	.L002maw_sse2_entry
20.align	16
21.L003maw_sse2_unrolled:
22	movd	(%eax),%mm3
23	paddq	%mm3,%mm1
24	movd	(%edx),%mm2
25	pmuludq	%mm0,%mm2
26	movd	4(%edx),%mm4
27	pmuludq	%mm0,%mm4
28	movd	8(%edx),%mm6
29	pmuludq	%mm0,%mm6
30	movd	12(%edx),%mm7
31	pmuludq	%mm0,%mm7
32	paddq	%mm2,%mm1
33	movd	4(%eax),%mm3
34	paddq	%mm4,%mm3
35	movd	8(%eax),%mm5
36	paddq	%mm6,%mm5
37	movd	12(%eax),%mm4
38	paddq	%mm4,%mm7
39	movd	%mm1,(%eax)
40	movd	16(%edx),%mm2
41	pmuludq	%mm0,%mm2
42	psrlq	$32,%mm1
43	movd	20(%edx),%mm4
44	pmuludq	%mm0,%mm4
45	paddq	%mm3,%mm1
46	movd	24(%edx),%mm6
47	pmuludq	%mm0,%mm6
48	movd	%mm1,4(%eax)
49	psrlq	$32,%mm1
50	movd	28(%edx),%mm3
51	addl	$32,%edx
52	pmuludq	%mm0,%mm3
53	paddq	%mm5,%mm1
54	movd	16(%eax),%mm5
55	paddq	%mm5,%mm2
56	movd	%mm1,8(%eax)
57	psrlq	$32,%mm1
58	paddq	%mm7,%mm1
59	movd	20(%eax),%mm5
60	paddq	%mm5,%mm4
61	movd	%mm1,12(%eax)
62	psrlq	$32,%mm1
63	paddq	%mm2,%mm1
64	movd	24(%eax),%mm5
65	paddq	%mm5,%mm6
66	movd	%mm1,16(%eax)
67	psrlq	$32,%mm1
68	paddq	%mm4,%mm1
69	movd	28(%eax),%mm5
70	paddq	%mm5,%mm3
71	movd	%mm1,20(%eax)
72	psrlq	$32,%mm1
73	paddq	%mm6,%mm1
74	movd	%mm1,24(%eax)
75	psrlq	$32,%mm1
76	paddq	%mm3,%mm1
77	movd	%mm1,28(%eax)
78	leal	32(%eax),%eax
79	psrlq	$32,%mm1
80	subl	$8,%ecx
81	jz	.L004maw_sse2_exit
82.L002maw_sse2_entry:
83	testl	$4294967288,%ecx
84	jnz	.L003maw_sse2_unrolled
85.align	4
86.L005maw_sse2_loop:
87	movd	(%edx),%mm2
88	movd	(%eax),%mm3
89	pmuludq	%mm0,%mm2
90	leal	4(%edx),%edx
91	paddq	%mm3,%mm1
92	paddq	%mm2,%mm1
93	movd	%mm1,(%eax)
94	subl	$1,%ecx
95	psrlq	$32,%mm1
96	leal	4(%eax),%eax
97	jnz	.L005maw_sse2_loop
98.L004maw_sse2_exit:
99	movd	%mm1,%eax
100	emms
101	ret
102.align	16
103.L001maw_non_sse2:
104	pushl	%ebp
105	pushl	%ebx
106	pushl	%esi
107	pushl	%edi
108
109	xorl	%esi,%esi
110	movl	20(%esp),%edi
111	movl	28(%esp),%ecx
112	movl	24(%esp),%ebx
113	andl	$4294967288,%ecx
114	movl	32(%esp),%ebp
115	pushl	%ecx
116	jz	.L006maw_finish
117.align	16
118.L007maw_loop:
119
120	movl	(%ebx),%eax
121	mull	%ebp
122	addl	%esi,%eax
123	adcl	$0,%edx
124	addl	(%edi),%eax
125	adcl	$0,%edx
126	movl	%eax,(%edi)
127	movl	%edx,%esi
128
129	movl	4(%ebx),%eax
130	mull	%ebp
131	addl	%esi,%eax
132	adcl	$0,%edx
133	addl	4(%edi),%eax
134	adcl	$0,%edx
135	movl	%eax,4(%edi)
136	movl	%edx,%esi
137
138	movl	8(%ebx),%eax
139	mull	%ebp
140	addl	%esi,%eax
141	adcl	$0,%edx
142	addl	8(%edi),%eax
143	adcl	$0,%edx
144	movl	%eax,8(%edi)
145	movl	%edx,%esi
146
147	movl	12(%ebx),%eax
148	mull	%ebp
149	addl	%esi,%eax
150	adcl	$0,%edx
151	addl	12(%edi),%eax
152	adcl	$0,%edx
153	movl	%eax,12(%edi)
154	movl	%edx,%esi
155
156	movl	16(%ebx),%eax
157	mull	%ebp
158	addl	%esi,%eax
159	adcl	$0,%edx
160	addl	16(%edi),%eax
161	adcl	$0,%edx
162	movl	%eax,16(%edi)
163	movl	%edx,%esi
164
165	movl	20(%ebx),%eax
166	mull	%ebp
167	addl	%esi,%eax
168	adcl	$0,%edx
169	addl	20(%edi),%eax
170	adcl	$0,%edx
171	movl	%eax,20(%edi)
172	movl	%edx,%esi
173
174	movl	24(%ebx),%eax
175	mull	%ebp
176	addl	%esi,%eax
177	adcl	$0,%edx
178	addl	24(%edi),%eax
179	adcl	$0,%edx
180	movl	%eax,24(%edi)
181	movl	%edx,%esi
182
183	movl	28(%ebx),%eax
184	mull	%ebp
185	addl	%esi,%eax
186	adcl	$0,%edx
187	addl	28(%edi),%eax
188	adcl	$0,%edx
189	movl	%eax,28(%edi)
190	movl	%edx,%esi
191
192	subl	$8,%ecx
193	leal	32(%ebx),%ebx
194	leal	32(%edi),%edi
195	jnz	.L007maw_loop
196.L006maw_finish:
197	movl	32(%esp),%ecx
198	andl	$7,%ecx
199	jnz	.L008maw_finish2
200	jmp	.L009maw_end
201.L008maw_finish2:
202
203	movl	(%ebx),%eax
204	mull	%ebp
205	addl	%esi,%eax
206	adcl	$0,%edx
207	addl	(%edi),%eax
208	adcl	$0,%edx
209	decl	%ecx
210	movl	%eax,(%edi)
211	movl	%edx,%esi
212	jz	.L009maw_end
213
214	movl	4(%ebx),%eax
215	mull	%ebp
216	addl	%esi,%eax
217	adcl	$0,%edx
218	addl	4(%edi),%eax
219	adcl	$0,%edx
220	decl	%ecx
221	movl	%eax,4(%edi)
222	movl	%edx,%esi
223	jz	.L009maw_end
224
225	movl	8(%ebx),%eax
226	mull	%ebp
227	addl	%esi,%eax
228	adcl	$0,%edx
229	addl	8(%edi),%eax
230	adcl	$0,%edx
231	decl	%ecx
232	movl	%eax,8(%edi)
233	movl	%edx,%esi
234	jz	.L009maw_end
235
236	movl	12(%ebx),%eax
237	mull	%ebp
238	addl	%esi,%eax
239	adcl	$0,%edx
240	addl	12(%edi),%eax
241	adcl	$0,%edx
242	decl	%ecx
243	movl	%eax,12(%edi)
244	movl	%edx,%esi
245	jz	.L009maw_end
246
247	movl	16(%ebx),%eax
248	mull	%ebp
249	addl	%esi,%eax
250	adcl	$0,%edx
251	addl	16(%edi),%eax
252	adcl	$0,%edx
253	decl	%ecx
254	movl	%eax,16(%edi)
255	movl	%edx,%esi
256	jz	.L009maw_end
257
258	movl	20(%ebx),%eax
259	mull	%ebp
260	addl	%esi,%eax
261	adcl	$0,%edx
262	addl	20(%edi),%eax
263	adcl	$0,%edx
264	decl	%ecx
265	movl	%eax,20(%edi)
266	movl	%edx,%esi
267	jz	.L009maw_end
268
269	movl	24(%ebx),%eax
270	mull	%ebp
271	addl	%esi,%eax
272	adcl	$0,%edx
273	addl	24(%edi),%eax
274	adcl	$0,%edx
275	movl	%eax,24(%edi)
276	movl	%edx,%esi
277.L009maw_end:
278	movl	%esi,%eax
279	popl	%ecx
280	popl	%edi
281	popl	%esi
282	popl	%ebx
283	popl	%ebp
284	ret
285.size	bn_mul_add_words,.-.L_bn_mul_add_words_begin
286.globl	bn_mul_words
287.type	bn_mul_words,@function
288.align	16
289bn_mul_words:
290.L_bn_mul_words_begin:
291	call	.L010PIC_me_up
292.L010PIC_me_up:
293	popl	%eax
294	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
295	btl	$26,(%eax)
296	jnc	.L011mw_non_sse2
297	movl	4(%esp),%eax
298	movl	8(%esp),%edx
299	movl	12(%esp),%ecx
300	movd	16(%esp),%mm0
301	pxor	%mm1,%mm1
302.align	16
303.L012mw_sse2_loop:
304	movd	(%edx),%mm2
305	pmuludq	%mm0,%mm2
306	leal	4(%edx),%edx
307	paddq	%mm2,%mm1
308	movd	%mm1,(%eax)
309	subl	$1,%ecx
310	psrlq	$32,%mm1
311	leal	4(%eax),%eax
312	jnz	.L012mw_sse2_loop
313	movd	%mm1,%eax
314	emms
315	ret
316.align	16
317.L011mw_non_sse2:
318	pushl	%ebp
319	pushl	%ebx
320	pushl	%esi
321	pushl	%edi
322
323	xorl	%esi,%esi
324	movl	20(%esp),%edi
325	movl	24(%esp),%ebx
326	movl	28(%esp),%ebp
327	movl	32(%esp),%ecx
328	andl	$4294967288,%ebp
329	jz	.L013mw_finish
330.L014mw_loop:
331
332	movl	(%ebx),%eax
333	mull	%ecx
334	addl	%esi,%eax
335	adcl	$0,%edx
336	movl	%eax,(%edi)
337	movl	%edx,%esi
338
339	movl	4(%ebx),%eax
340	mull	%ecx
341	addl	%esi,%eax
342	adcl	$0,%edx
343	movl	%eax,4(%edi)
344	movl	%edx,%esi
345
346	movl	8(%ebx),%eax
347	mull	%ecx
348	addl	%esi,%eax
349	adcl	$0,%edx
350	movl	%eax,8(%edi)
351	movl	%edx,%esi
352
353	movl	12(%ebx),%eax
354	mull	%ecx
355	addl	%esi,%eax
356	adcl	$0,%edx
357	movl	%eax,12(%edi)
358	movl	%edx,%esi
359
360	movl	16(%ebx),%eax
361	mull	%ecx
362	addl	%esi,%eax
363	adcl	$0,%edx
364	movl	%eax,16(%edi)
365	movl	%edx,%esi
366
367	movl	20(%ebx),%eax
368	mull	%ecx
369	addl	%esi,%eax
370	adcl	$0,%edx
371	movl	%eax,20(%edi)
372	movl	%edx,%esi
373
374	movl	24(%ebx),%eax
375	mull	%ecx
376	addl	%esi,%eax
377	adcl	$0,%edx
378	movl	%eax,24(%edi)
379	movl	%edx,%esi
380
381	movl	28(%ebx),%eax
382	mull	%ecx
383	addl	%esi,%eax
384	adcl	$0,%edx
385	movl	%eax,28(%edi)
386	movl	%edx,%esi
387
388	addl	$32,%ebx
389	addl	$32,%edi
390	subl	$8,%ebp
391	jz	.L013mw_finish
392	jmp	.L014mw_loop
393.L013mw_finish:
394	movl	28(%esp),%ebp
395	andl	$7,%ebp
396	jnz	.L015mw_finish2
397	jmp	.L016mw_end
398.L015mw_finish2:
399
400	movl	(%ebx),%eax
401	mull	%ecx
402	addl	%esi,%eax
403	adcl	$0,%edx
404	movl	%eax,(%edi)
405	movl	%edx,%esi
406	decl	%ebp
407	jz	.L016mw_end
408
409	movl	4(%ebx),%eax
410	mull	%ecx
411	addl	%esi,%eax
412	adcl	$0,%edx
413	movl	%eax,4(%edi)
414	movl	%edx,%esi
415	decl	%ebp
416	jz	.L016mw_end
417
418	movl	8(%ebx),%eax
419	mull	%ecx
420	addl	%esi,%eax
421	adcl	$0,%edx
422	movl	%eax,8(%edi)
423	movl	%edx,%esi
424	decl	%ebp
425	jz	.L016mw_end
426
427	movl	12(%ebx),%eax
428	mull	%ecx
429	addl	%esi,%eax
430	adcl	$0,%edx
431	movl	%eax,12(%edi)
432	movl	%edx,%esi
433	decl	%ebp
434	jz	.L016mw_end
435
436	movl	16(%ebx),%eax
437	mull	%ecx
438	addl	%esi,%eax
439	adcl	$0,%edx
440	movl	%eax,16(%edi)
441	movl	%edx,%esi
442	decl	%ebp
443	jz	.L016mw_end
444
445	movl	20(%ebx),%eax
446	mull	%ecx
447	addl	%esi,%eax
448	adcl	$0,%edx
449	movl	%eax,20(%edi)
450	movl	%edx,%esi
451	decl	%ebp
452	jz	.L016mw_end
453
454	movl	24(%ebx),%eax
455	mull	%ecx
456	addl	%esi,%eax
457	adcl	$0,%edx
458	movl	%eax,24(%edi)
459	movl	%edx,%esi
460.L016mw_end:
461	movl	%esi,%eax
462	popl	%edi
463	popl	%esi
464	popl	%ebx
465	popl	%ebp
466	ret
467.size	bn_mul_words,.-.L_bn_mul_words_begin
468.globl	bn_sqr_words
469.type	bn_sqr_words,@function
470.align	16
471bn_sqr_words:
472.L_bn_sqr_words_begin:
473	call	.L017PIC_me_up
474.L017PIC_me_up:
475	popl	%eax
476	leal	OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
477	btl	$26,(%eax)
478	jnc	.L018sqr_non_sse2
479	movl	4(%esp),%eax
480	movl	8(%esp),%edx
481	movl	12(%esp),%ecx
482.align	16
483.L019sqr_sse2_loop:
484	movd	(%edx),%mm0
485	pmuludq	%mm0,%mm0
486	leal	4(%edx),%edx
487	movq	%mm0,(%eax)
488	subl	$1,%ecx
489	leal	8(%eax),%eax
490	jnz	.L019sqr_sse2_loop
491	emms
492	ret
493.align	16
494.L018sqr_non_sse2:
495	pushl	%ebp
496	pushl	%ebx
497	pushl	%esi
498	pushl	%edi
499
500	movl	20(%esp),%esi
501	movl	24(%esp),%edi
502	movl	28(%esp),%ebx
503	andl	$4294967288,%ebx
504	jz	.L020sw_finish
505.L021sw_loop:
506
507	movl	(%edi),%eax
508	mull	%eax
509	movl	%eax,(%esi)
510	movl	%edx,4(%esi)
511
512	movl	4(%edi),%eax
513	mull	%eax
514	movl	%eax,8(%esi)
515	movl	%edx,12(%esi)
516
517	movl	8(%edi),%eax
518	mull	%eax
519	movl	%eax,16(%esi)
520	movl	%edx,20(%esi)
521
522	movl	12(%edi),%eax
523	mull	%eax
524	movl	%eax,24(%esi)
525	movl	%edx,28(%esi)
526
527	movl	16(%edi),%eax
528	mull	%eax
529	movl	%eax,32(%esi)
530	movl	%edx,36(%esi)
531
532	movl	20(%edi),%eax
533	mull	%eax
534	movl	%eax,40(%esi)
535	movl	%edx,44(%esi)
536
537	movl	24(%edi),%eax
538	mull	%eax
539	movl	%eax,48(%esi)
540	movl	%edx,52(%esi)
541
542	movl	28(%edi),%eax
543	mull	%eax
544	movl	%eax,56(%esi)
545	movl	%edx,60(%esi)
546
547	addl	$32,%edi
548	addl	$64,%esi
549	subl	$8,%ebx
550	jnz	.L021sw_loop
551.L020sw_finish:
552	movl	28(%esp),%ebx
553	andl	$7,%ebx
554	jz	.L022sw_end
555
556	movl	(%edi),%eax
557	mull	%eax
558	movl	%eax,(%esi)
559	decl	%ebx
560	movl	%edx,4(%esi)
561	jz	.L022sw_end
562
563	movl	4(%edi),%eax
564	mull	%eax
565	movl	%eax,8(%esi)
566	decl	%ebx
567	movl	%edx,12(%esi)
568	jz	.L022sw_end
569
570	movl	8(%edi),%eax
571	mull	%eax
572	movl	%eax,16(%esi)
573	decl	%ebx
574	movl	%edx,20(%esi)
575	jz	.L022sw_end
576
577	movl	12(%edi),%eax
578	mull	%eax
579	movl	%eax,24(%esi)
580	decl	%ebx
581	movl	%edx,28(%esi)
582	jz	.L022sw_end
583
584	movl	16(%edi),%eax
585	mull	%eax
586	movl	%eax,32(%esi)
587	decl	%ebx
588	movl	%edx,36(%esi)
589	jz	.L022sw_end
590
591	movl	20(%edi),%eax
592	mull	%eax
593	movl	%eax,40(%esi)
594	decl	%ebx
595	movl	%edx,44(%esi)
596	jz	.L022sw_end
597
598	movl	24(%edi),%eax
599	mull	%eax
600	movl	%eax,48(%esi)
601	movl	%edx,52(%esi)
602.L022sw_end:
603	popl	%edi
604	popl	%esi
605	popl	%ebx
606	popl	%ebp
607	ret
608.size	bn_sqr_words,.-.L_bn_sqr_words_begin
609.globl	bn_div_words
610.type	bn_div_words,@function
611.align	16
612bn_div_words:
613.L_bn_div_words_begin:
614	movl	4(%esp),%edx
615	movl	8(%esp),%eax
616	movl	12(%esp),%ecx
617	divl	%ecx
618	ret
619.size	bn_div_words,.-.L_bn_div_words_begin
620.globl	bn_add_words
621.type	bn_add_words,@function
622.align	16
623bn_add_words:
624.L_bn_add_words_begin:
625	pushl	%ebp
626	pushl	%ebx
627	pushl	%esi
628	pushl	%edi
629
630	movl	20(%esp),%ebx
631	movl	24(%esp),%esi
632	movl	28(%esp),%edi
633	movl	32(%esp),%ebp
634	xorl	%eax,%eax
635	andl	$4294967288,%ebp
636	jz	.L023aw_finish
637.L024aw_loop:
638
639	movl	(%esi),%ecx
640	movl	(%edi),%edx
641	addl	%eax,%ecx
642	movl	$0,%eax
643	adcl	%eax,%eax
644	addl	%edx,%ecx
645	adcl	$0,%eax
646	movl	%ecx,(%ebx)
647
648	movl	4(%esi),%ecx
649	movl	4(%edi),%edx
650	addl	%eax,%ecx
651	movl	$0,%eax
652	adcl	%eax,%eax
653	addl	%edx,%ecx
654	adcl	$0,%eax
655	movl	%ecx,4(%ebx)
656
657	movl	8(%esi),%ecx
658	movl	8(%edi),%edx
659	addl	%eax,%ecx
660	movl	$0,%eax
661	adcl	%eax,%eax
662	addl	%edx,%ecx
663	adcl	$0,%eax
664	movl	%ecx,8(%ebx)
665
666	movl	12(%esi),%ecx
667	movl	12(%edi),%edx
668	addl	%eax,%ecx
669	movl	$0,%eax
670	adcl	%eax,%eax
671	addl	%edx,%ecx
672	adcl	$0,%eax
673	movl	%ecx,12(%ebx)
674
675	movl	16(%esi),%ecx
676	movl	16(%edi),%edx
677	addl	%eax,%ecx
678	movl	$0,%eax
679	adcl	%eax,%eax
680	addl	%edx,%ecx
681	adcl	$0,%eax
682	movl	%ecx,16(%ebx)
683
684	movl	20(%esi),%ecx
685	movl	20(%edi),%edx
686	addl	%eax,%ecx
687	movl	$0,%eax
688	adcl	%eax,%eax
689	addl	%edx,%ecx
690	adcl	$0,%eax
691	movl	%ecx,20(%ebx)
692
693	movl	24(%esi),%ecx
694	movl	24(%edi),%edx
695	addl	%eax,%ecx
696	movl	$0,%eax
697	adcl	%eax,%eax
698	addl	%edx,%ecx
699	adcl	$0,%eax
700	movl	%ecx,24(%ebx)
701
702	movl	28(%esi),%ecx
703	movl	28(%edi),%edx
704	addl	%eax,%ecx
705	movl	$0,%eax
706	adcl	%eax,%eax
707	addl	%edx,%ecx
708	adcl	$0,%eax
709	movl	%ecx,28(%ebx)
710
711	addl	$32,%esi
712	addl	$32,%edi
713	addl	$32,%ebx
714	subl	$8,%ebp
715	jnz	.L024aw_loop
716.L023aw_finish:
717	movl	32(%esp),%ebp
718	andl	$7,%ebp
719	jz	.L025aw_end
720
721	movl	(%esi),%ecx
722	movl	(%edi),%edx
723	addl	%eax,%ecx
724	movl	$0,%eax
725	adcl	%eax,%eax
726	addl	%edx,%ecx
727	adcl	$0,%eax
728	decl	%ebp
729	movl	%ecx,(%ebx)
730	jz	.L025aw_end
731
732	movl	4(%esi),%ecx
733	movl	4(%edi),%edx
734	addl	%eax,%ecx
735	movl	$0,%eax
736	adcl	%eax,%eax
737	addl	%edx,%ecx
738	adcl	$0,%eax
739	decl	%ebp
740	movl	%ecx,4(%ebx)
741	jz	.L025aw_end
742
743	movl	8(%esi),%ecx
744	movl	8(%edi),%edx
745	addl	%eax,%ecx
746	movl	$0,%eax
747	adcl	%eax,%eax
748	addl	%edx,%ecx
749	adcl	$0,%eax
750	decl	%ebp
751	movl	%ecx,8(%ebx)
752	jz	.L025aw_end
753
754	movl	12(%esi),%ecx
755	movl	12(%edi),%edx
756	addl	%eax,%ecx
757	movl	$0,%eax
758	adcl	%eax,%eax
759	addl	%edx,%ecx
760	adcl	$0,%eax
761	decl	%ebp
762	movl	%ecx,12(%ebx)
763	jz	.L025aw_end
764
765	movl	16(%esi),%ecx
766	movl	16(%edi),%edx
767	addl	%eax,%ecx
768	movl	$0,%eax
769	adcl	%eax,%eax
770	addl	%edx,%ecx
771	adcl	$0,%eax
772	decl	%ebp
773	movl	%ecx,16(%ebx)
774	jz	.L025aw_end
775
776	movl	20(%esi),%ecx
777	movl	20(%edi),%edx
778	addl	%eax,%ecx
779	movl	$0,%eax
780	adcl	%eax,%eax
781	addl	%edx,%ecx
782	adcl	$0,%eax
783	decl	%ebp
784	movl	%ecx,20(%ebx)
785	jz	.L025aw_end
786
787	movl	24(%esi),%ecx
788	movl	24(%edi),%edx
789	addl	%eax,%ecx
790	movl	$0,%eax
791	adcl	%eax,%eax
792	addl	%edx,%ecx
793	adcl	$0,%eax
794	movl	%ecx,24(%ebx)
795.L025aw_end:
796	popl	%edi
797	popl	%esi
798	popl	%ebx
799	popl	%ebp
800	ret
801.size	bn_add_words,.-.L_bn_add_words_begin
802.globl	bn_sub_words
803.type	bn_sub_words,@function
804.align	16
805bn_sub_words:
806.L_bn_sub_words_begin:
807	pushl	%ebp
808	pushl	%ebx
809	pushl	%esi
810	pushl	%edi
811
812	movl	20(%esp),%ebx
813	movl	24(%esp),%esi
814	movl	28(%esp),%edi
815	movl	32(%esp),%ebp
816	xorl	%eax,%eax
817	andl	$4294967288,%ebp
818	jz	.L026aw_finish
819.L027aw_loop:
820
821	movl	(%esi),%ecx
822	movl	(%edi),%edx
823	subl	%eax,%ecx
824	movl	$0,%eax
825	adcl	%eax,%eax
826	subl	%edx,%ecx
827	adcl	$0,%eax
828	movl	%ecx,(%ebx)
829
830	movl	4(%esi),%ecx
831	movl	4(%edi),%edx
832	subl	%eax,%ecx
833	movl	$0,%eax
834	adcl	%eax,%eax
835	subl	%edx,%ecx
836	adcl	$0,%eax
837	movl	%ecx,4(%ebx)
838
839	movl	8(%esi),%ecx
840	movl	8(%edi),%edx
841	subl	%eax,%ecx
842	movl	$0,%eax
843	adcl	%eax,%eax
844	subl	%edx,%ecx
845	adcl	$0,%eax
846	movl	%ecx,8(%ebx)
847
848	movl	12(%esi),%ecx
849	movl	12(%edi),%edx
850	subl	%eax,%ecx
851	movl	$0,%eax
852	adcl	%eax,%eax
853	subl	%edx,%ecx
854	adcl	$0,%eax
855	movl	%ecx,12(%ebx)
856
857	movl	16(%esi),%ecx
858	movl	16(%edi),%edx
859	subl	%eax,%ecx
860	movl	$0,%eax
861	adcl	%eax,%eax
862	subl	%edx,%ecx
863	adcl	$0,%eax
864	movl	%ecx,16(%ebx)
865
866	movl	20(%esi),%ecx
867	movl	20(%edi),%edx
868	subl	%eax,%ecx
869	movl	$0,%eax
870	adcl	%eax,%eax
871	subl	%edx,%ecx
872	adcl	$0,%eax
873	movl	%ecx,20(%ebx)
874
875	movl	24(%esi),%ecx
876	movl	24(%edi),%edx
877	subl	%eax,%ecx
878	movl	$0,%eax
879	adcl	%eax,%eax
880	subl	%edx,%ecx
881	adcl	$0,%eax
882	movl	%ecx,24(%ebx)
883
884	movl	28(%esi),%ecx
885	movl	28(%edi),%edx
886	subl	%eax,%ecx
887	movl	$0,%eax
888	adcl	%eax,%eax
889	subl	%edx,%ecx
890	adcl	$0,%eax
891	movl	%ecx,28(%ebx)
892
893	addl	$32,%esi
894	addl	$32,%edi
895	addl	$32,%ebx
896	subl	$8,%ebp
897	jnz	.L027aw_loop
898.L026aw_finish:
899	movl	32(%esp),%ebp
900	andl	$7,%ebp
901	jz	.L028aw_end
902
903	movl	(%esi),%ecx
904	movl	(%edi),%edx
905	subl	%eax,%ecx
906	movl	$0,%eax
907	adcl	%eax,%eax
908	subl	%edx,%ecx
909	adcl	$0,%eax
910	decl	%ebp
911	movl	%ecx,(%ebx)
912	jz	.L028aw_end
913
914	movl	4(%esi),%ecx
915	movl	4(%edi),%edx
916	subl	%eax,%ecx
917	movl	$0,%eax
918	adcl	%eax,%eax
919	subl	%edx,%ecx
920	adcl	$0,%eax
921	decl	%ebp
922	movl	%ecx,4(%ebx)
923	jz	.L028aw_end
924
925	movl	8(%esi),%ecx
926	movl	8(%edi),%edx
927	subl	%eax,%ecx
928	movl	$0,%eax
929	adcl	%eax,%eax
930	subl	%edx,%ecx
931	adcl	$0,%eax
932	decl	%ebp
933	movl	%ecx,8(%ebx)
934	jz	.L028aw_end
935
936	movl	12(%esi),%ecx
937	movl	12(%edi),%edx
938	subl	%eax,%ecx
939	movl	$0,%eax
940	adcl	%eax,%eax
941	subl	%edx,%ecx
942	adcl	$0,%eax
943	decl	%ebp
944	movl	%ecx,12(%ebx)
945	jz	.L028aw_end
946
947	movl	16(%esi),%ecx
948	movl	16(%edi),%edx
949	subl	%eax,%ecx
950	movl	$0,%eax
951	adcl	%eax,%eax
952	subl	%edx,%ecx
953	adcl	$0,%eax
954	decl	%ebp
955	movl	%ecx,16(%ebx)
956	jz	.L028aw_end
957
958	movl	20(%esi),%ecx
959	movl	20(%edi),%edx
960	subl	%eax,%ecx
961	movl	$0,%eax
962	adcl	%eax,%eax
963	subl	%edx,%ecx
964	adcl	$0,%eax
965	decl	%ebp
966	movl	%ecx,20(%ebx)
967	jz	.L028aw_end
968
969	movl	24(%esi),%ecx
970	movl	24(%edi),%edx
971	subl	%eax,%ecx
972	movl	$0,%eax
973	adcl	%eax,%eax
974	subl	%edx,%ecx
975	adcl	$0,%eax
976	movl	%ecx,24(%ebx)
977.L028aw_end:
978	popl	%edi
979	popl	%esi
980	popl	%ebx
981	popl	%ebp
982	ret
983.size	bn_sub_words,.-.L_bn_sub_words_begin
984.globl	bn_sub_part_words
985.type	bn_sub_part_words,@function
986.align	16
987bn_sub_part_words:
988.L_bn_sub_part_words_begin:
989	pushl	%ebp
990	pushl	%ebx
991	pushl	%esi
992	pushl	%edi
993
994	movl	20(%esp),%ebx
995	movl	24(%esp),%esi
996	movl	28(%esp),%edi
997	movl	32(%esp),%ebp
998	xorl	%eax,%eax
999	andl	$4294967288,%ebp
1000	jz	.L029aw_finish
1001.L030aw_loop:
1002
1003	movl	(%esi),%ecx
1004	movl	(%edi),%edx
1005	subl	%eax,%ecx
1006	movl	$0,%eax
1007	adcl	%eax,%eax
1008	subl	%edx,%ecx
1009	adcl	$0,%eax
1010	movl	%ecx,(%ebx)
1011
1012	movl	4(%esi),%ecx
1013	movl	4(%edi),%edx
1014	subl	%eax,%ecx
1015	movl	$0,%eax
1016	adcl	%eax,%eax
1017	subl	%edx,%ecx
1018	adcl	$0,%eax
1019	movl	%ecx,4(%ebx)
1020
1021	movl	8(%esi),%ecx
1022	movl	8(%edi),%edx
1023	subl	%eax,%ecx
1024	movl	$0,%eax
1025	adcl	%eax,%eax
1026	subl	%edx,%ecx
1027	adcl	$0,%eax
1028	movl	%ecx,8(%ebx)
1029
1030	movl	12(%esi),%ecx
1031	movl	12(%edi),%edx
1032	subl	%eax,%ecx
1033	movl	$0,%eax
1034	adcl	%eax,%eax
1035	subl	%edx,%ecx
1036	adcl	$0,%eax
1037	movl	%ecx,12(%ebx)
1038
1039	movl	16(%esi),%ecx
1040	movl	16(%edi),%edx
1041	subl	%eax,%ecx
1042	movl	$0,%eax
1043	adcl	%eax,%eax
1044	subl	%edx,%ecx
1045	adcl	$0,%eax
1046	movl	%ecx,16(%ebx)
1047
1048	movl	20(%esi),%ecx
1049	movl	20(%edi),%edx
1050	subl	%eax,%ecx
1051	movl	$0,%eax
1052	adcl	%eax,%eax
1053	subl	%edx,%ecx
1054	adcl	$0,%eax
1055	movl	%ecx,20(%ebx)
1056
1057	movl	24(%esi),%ecx
1058	movl	24(%edi),%edx
1059	subl	%eax,%ecx
1060	movl	$0,%eax
1061	adcl	%eax,%eax
1062	subl	%edx,%ecx
1063	adcl	$0,%eax
1064	movl	%ecx,24(%ebx)
1065
1066	movl	28(%esi),%ecx
1067	movl	28(%edi),%edx
1068	subl	%eax,%ecx
1069	movl	$0,%eax
1070	adcl	%eax,%eax
1071	subl	%edx,%ecx
1072	adcl	$0,%eax
1073	movl	%ecx,28(%ebx)
1074
1075	addl	$32,%esi
1076	addl	$32,%edi
1077	addl	$32,%ebx
1078	subl	$8,%ebp
1079	jnz	.L030aw_loop
1080.L029aw_finish:
1081	movl	32(%esp),%ebp
1082	andl	$7,%ebp
1083	jz	.L031aw_end
1084
1085	movl	(%esi),%ecx
1086	movl	(%edi),%edx
1087	subl	%eax,%ecx
1088	movl	$0,%eax
1089	adcl	%eax,%eax
1090	subl	%edx,%ecx
1091	adcl	$0,%eax
1092	movl	%ecx,(%ebx)
1093	addl	$4,%esi
1094	addl	$4,%edi
1095	addl	$4,%ebx
1096	decl	%ebp
1097	jz	.L031aw_end
1098
1099	movl	(%esi),%ecx
1100	movl	(%edi),%edx
1101	subl	%eax,%ecx
1102	movl	$0,%eax
1103	adcl	%eax,%eax
1104	subl	%edx,%ecx
1105	adcl	$0,%eax
1106	movl	%ecx,(%ebx)
1107	addl	$4,%esi
1108	addl	$4,%edi
1109	addl	$4,%ebx
1110	decl	%ebp
1111	jz	.L031aw_end
1112
1113	movl	(%esi),%ecx
1114	movl	(%edi),%edx
1115	subl	%eax,%ecx
1116	movl	$0,%eax
1117	adcl	%eax,%eax
1118	subl	%edx,%ecx
1119	adcl	$0,%eax
1120	movl	%ecx,(%ebx)
1121	addl	$4,%esi
1122	addl	$4,%edi
1123	addl	$4,%ebx
1124	decl	%ebp
1125	jz	.L031aw_end
1126
1127	movl	(%esi),%ecx
1128	movl	(%edi),%edx
1129	subl	%eax,%ecx
1130	movl	$0,%eax
1131	adcl	%eax,%eax
1132	subl	%edx,%ecx
1133	adcl	$0,%eax
1134	movl	%ecx,(%ebx)
1135	addl	$4,%esi
1136	addl	$4,%edi
1137	addl	$4,%ebx
1138	decl	%ebp
1139	jz	.L031aw_end
1140
1141	movl	(%esi),%ecx
1142	movl	(%edi),%edx
1143	subl	%eax,%ecx
1144	movl	$0,%eax
1145	adcl	%eax,%eax
1146	subl	%edx,%ecx
1147	adcl	$0,%eax
1148	movl	%ecx,(%ebx)
1149	addl	$4,%esi
1150	addl	$4,%edi
1151	addl	$4,%ebx
1152	decl	%ebp
1153	jz	.L031aw_end
1154
1155	movl	(%esi),%ecx
1156	movl	(%edi),%edx
1157	subl	%eax,%ecx
1158	movl	$0,%eax
1159	adcl	%eax,%eax
1160	subl	%edx,%ecx
1161	adcl	$0,%eax
1162	movl	%ecx,(%ebx)
1163	addl	$4,%esi
1164	addl	$4,%edi
1165	addl	$4,%ebx
1166	decl	%ebp
1167	jz	.L031aw_end
1168
1169	movl	(%esi),%ecx
1170	movl	(%edi),%edx
1171	subl	%eax,%ecx
1172	movl	$0,%eax
1173	adcl	%eax,%eax
1174	subl	%edx,%ecx
1175	adcl	$0,%eax
1176	movl	%ecx,(%ebx)
1177	addl	$4,%esi
1178	addl	$4,%edi
1179	addl	$4,%ebx
1180.L031aw_end:
1181	cmpl	$0,36(%esp)
1182	je	.L032pw_end
1183	movl	36(%esp),%ebp
1184	cmpl	$0,%ebp
1185	je	.L032pw_end
1186	jge	.L033pw_pos
1187
1188	movl	$0,%edx
1189	subl	%ebp,%edx
1190	movl	%edx,%ebp
1191	andl	$4294967288,%ebp
1192	jz	.L034pw_neg_finish
1193.L035pw_neg_loop:
1194
1195	movl	$0,%ecx
1196	movl	(%edi),%edx
1197	subl	%eax,%ecx
1198	movl	$0,%eax
1199	adcl	%eax,%eax
1200	subl	%edx,%ecx
1201	adcl	$0,%eax
1202	movl	%ecx,(%ebx)
1203
1204	movl	$0,%ecx
1205	movl	4(%edi),%edx
1206	subl	%eax,%ecx
1207	movl	$0,%eax
1208	adcl	%eax,%eax
1209	subl	%edx,%ecx
1210	adcl	$0,%eax
1211	movl	%ecx,4(%ebx)
1212
1213	movl	$0,%ecx
1214	movl	8(%edi),%edx
1215	subl	%eax,%ecx
1216	movl	$0,%eax
1217	adcl	%eax,%eax
1218	subl	%edx,%ecx
1219	adcl	$0,%eax
1220	movl	%ecx,8(%ebx)
1221
1222	movl	$0,%ecx
1223	movl	12(%edi),%edx
1224	subl	%eax,%ecx
1225	movl	$0,%eax
1226	adcl	%eax,%eax
1227	subl	%edx,%ecx
1228	adcl	$0,%eax
1229	movl	%ecx,12(%ebx)
1230
1231	movl	$0,%ecx
1232	movl	16(%edi),%edx
1233	subl	%eax,%ecx
1234	movl	$0,%eax
1235	adcl	%eax,%eax
1236	subl	%edx,%ecx
1237	adcl	$0,%eax
1238	movl	%ecx,16(%ebx)
1239
1240	movl	$0,%ecx
1241	movl	20(%edi),%edx
1242	subl	%eax,%ecx
1243	movl	$0,%eax
1244	adcl	%eax,%eax
1245	subl	%edx,%ecx
1246	adcl	$0,%eax
1247	movl	%ecx,20(%ebx)
1248
1249	movl	$0,%ecx
1250	movl	24(%edi),%edx
1251	subl	%eax,%ecx
1252	movl	$0,%eax
1253	adcl	%eax,%eax
1254	subl	%edx,%ecx
1255	adcl	$0,%eax
1256	movl	%ecx,24(%ebx)
1257
1258	movl	$0,%ecx
1259	movl	28(%edi),%edx
1260	subl	%eax,%ecx
1261	movl	$0,%eax
1262	adcl	%eax,%eax
1263	subl	%edx,%ecx
1264	adcl	$0,%eax
1265	movl	%ecx,28(%ebx)
1266
1267	addl	$32,%edi
1268	addl	$32,%ebx
1269	subl	$8,%ebp
1270	jnz	.L035pw_neg_loop
1271.L034pw_neg_finish:
1272	movl	36(%esp),%edx
1273	movl	$0,%ebp
1274	subl	%edx,%ebp
1275	andl	$7,%ebp
1276	jz	.L032pw_end
1277
1278	movl	$0,%ecx
1279	movl	(%edi),%edx
1280	subl	%eax,%ecx
1281	movl	$0,%eax
1282	adcl	%eax,%eax
1283	subl	%edx,%ecx
1284	adcl	$0,%eax
1285	decl	%ebp
1286	movl	%ecx,(%ebx)
1287	jz	.L032pw_end
1288
1289	movl	$0,%ecx
1290	movl	4(%edi),%edx
1291	subl	%eax,%ecx
1292	movl	$0,%eax
1293	adcl	%eax,%eax
1294	subl	%edx,%ecx
1295	adcl	$0,%eax
1296	decl	%ebp
1297	movl	%ecx,4(%ebx)
1298	jz	.L032pw_end
1299
1300	movl	$0,%ecx
1301	movl	8(%edi),%edx
1302	subl	%eax,%ecx
1303	movl	$0,%eax
1304	adcl	%eax,%eax
1305	subl	%edx,%ecx
1306	adcl	$0,%eax
1307	decl	%ebp
1308	movl	%ecx,8(%ebx)
1309	jz	.L032pw_end
1310
1311	movl	$0,%ecx
1312	movl	12(%edi),%edx
1313	subl	%eax,%ecx
1314	movl	$0,%eax
1315	adcl	%eax,%eax
1316	subl	%edx,%ecx
1317	adcl	$0,%eax
1318	decl	%ebp
1319	movl	%ecx,12(%ebx)
1320	jz	.L032pw_end
1321
1322	movl	$0,%ecx
1323	movl	16(%edi),%edx
1324	subl	%eax,%ecx
1325	movl	$0,%eax
1326	adcl	%eax,%eax
1327	subl	%edx,%ecx
1328	adcl	$0,%eax
1329	decl	%ebp
1330	movl	%ecx,16(%ebx)
1331	jz	.L032pw_end
1332
1333	movl	$0,%ecx
1334	movl	20(%edi),%edx
1335	subl	%eax,%ecx
1336	movl	$0,%eax
1337	adcl	%eax,%eax
1338	subl	%edx,%ecx
1339	adcl	$0,%eax
1340	decl	%ebp
1341	movl	%ecx,20(%ebx)
1342	jz	.L032pw_end
1343
1344	movl	$0,%ecx
1345	movl	24(%edi),%edx
1346	subl	%eax,%ecx
1347	movl	$0,%eax
1348	adcl	%eax,%eax
1349	subl	%edx,%ecx
1350	adcl	$0,%eax
1351	movl	%ecx,24(%ebx)
1352	jmp	.L032pw_end
1353.L033pw_pos:
1354	andl	$4294967288,%ebp
1355	jz	.L036pw_pos_finish
1356.L037pw_pos_loop:
1357
1358	movl	(%esi),%ecx
1359	subl	%eax,%ecx
1360	movl	%ecx,(%ebx)
1361	jnc	.L038pw_nc0
1362
1363	movl	4(%esi),%ecx
1364	subl	%eax,%ecx
1365	movl	%ecx,4(%ebx)
1366	jnc	.L039pw_nc1
1367
1368	movl	8(%esi),%ecx
1369	subl	%eax,%ecx
1370	movl	%ecx,8(%ebx)
1371	jnc	.L040pw_nc2
1372
1373	movl	12(%esi),%ecx
1374	subl	%eax,%ecx
1375	movl	%ecx,12(%ebx)
1376	jnc	.L041pw_nc3
1377
1378	movl	16(%esi),%ecx
1379	subl	%eax,%ecx
1380	movl	%ecx,16(%ebx)
1381	jnc	.L042pw_nc4
1382
1383	movl	20(%esi),%ecx
1384	subl	%eax,%ecx
1385	movl	%ecx,20(%ebx)
1386	jnc	.L043pw_nc5
1387
1388	movl	24(%esi),%ecx
1389	subl	%eax,%ecx
1390	movl	%ecx,24(%ebx)
1391	jnc	.L044pw_nc6
1392
1393	movl	28(%esi),%ecx
1394	subl	%eax,%ecx
1395	movl	%ecx,28(%ebx)
1396	jnc	.L045pw_nc7
1397
1398	addl	$32,%esi
1399	addl	$32,%ebx
1400	subl	$8,%ebp
1401	jnz	.L037pw_pos_loop
1402.L036pw_pos_finish:
1403	movl	36(%esp),%ebp
1404	andl	$7,%ebp
1405	jz	.L032pw_end
1406
1407	movl	(%esi),%ecx
1408	subl	%eax,%ecx
1409	movl	%ecx,(%ebx)
1410	jnc	.L046pw_tail_nc0
1411	decl	%ebp
1412	jz	.L032pw_end
1413
1414	movl	4(%esi),%ecx
1415	subl	%eax,%ecx
1416	movl	%ecx,4(%ebx)
1417	jnc	.L047pw_tail_nc1
1418	decl	%ebp
1419	jz	.L032pw_end
1420
1421	movl	8(%esi),%ecx
1422	subl	%eax,%ecx
1423	movl	%ecx,8(%ebx)
1424	jnc	.L048pw_tail_nc2
1425	decl	%ebp
1426	jz	.L032pw_end
1427
1428	movl	12(%esi),%ecx
1429	subl	%eax,%ecx
1430	movl	%ecx,12(%ebx)
1431	jnc	.L049pw_tail_nc3
1432	decl	%ebp
1433	jz	.L032pw_end
1434
1435	movl	16(%esi),%ecx
1436	subl	%eax,%ecx
1437	movl	%ecx,16(%ebx)
1438	jnc	.L050pw_tail_nc4
1439	decl	%ebp
1440	jz	.L032pw_end
1441
1442	movl	20(%esi),%ecx
1443	subl	%eax,%ecx
1444	movl	%ecx,20(%ebx)
1445	jnc	.L051pw_tail_nc5
1446	decl	%ebp
1447	jz	.L032pw_end
1448
1449	movl	24(%esi),%ecx
1450	subl	%eax,%ecx
1451	movl	%ecx,24(%ebx)
1452	jnc	.L052pw_tail_nc6
1453	movl	$1,%eax
1454	jmp	.L032pw_end
1455.L053pw_nc_loop:
1456	movl	(%esi),%ecx
1457	movl	%ecx,(%ebx)
1458.L038pw_nc0:
1459	movl	4(%esi),%ecx
1460	movl	%ecx,4(%ebx)
1461.L039pw_nc1:
1462	movl	8(%esi),%ecx
1463	movl	%ecx,8(%ebx)
1464.L040pw_nc2:
1465	movl	12(%esi),%ecx
1466	movl	%ecx,12(%ebx)
1467.L041pw_nc3:
1468	movl	16(%esi),%ecx
1469	movl	%ecx,16(%ebx)
1470.L042pw_nc4:
1471	movl	20(%esi),%ecx
1472	movl	%ecx,20(%ebx)
1473.L043pw_nc5:
1474	movl	24(%esi),%ecx
1475	movl	%ecx,24(%ebx)
1476.L044pw_nc6:
1477	movl	28(%esi),%ecx
1478	movl	%ecx,28(%ebx)
1479.L045pw_nc7:
1480
1481	addl	$32,%esi
1482	addl	$32,%ebx
1483	subl	$8,%ebp
1484	jnz	.L053pw_nc_loop
1485	movl	36(%esp),%ebp
1486	andl	$7,%ebp
1487	jz	.L054pw_nc_end
1488	movl	(%esi),%ecx
1489	movl	%ecx,(%ebx)
1490.L046pw_tail_nc0:
1491	decl	%ebp
1492	jz	.L054pw_nc_end
1493	movl	4(%esi),%ecx
1494	movl	%ecx,4(%ebx)
1495.L047pw_tail_nc1:
1496	decl	%ebp
1497	jz	.L054pw_nc_end
1498	movl	8(%esi),%ecx
1499	movl	%ecx,8(%ebx)
1500.L048pw_tail_nc2:
1501	decl	%ebp
1502	jz	.L054pw_nc_end
1503	movl	12(%esi),%ecx
1504	movl	%ecx,12(%ebx)
1505.L049pw_tail_nc3:
1506	decl	%ebp
1507	jz	.L054pw_nc_end
1508	movl	16(%esi),%ecx
1509	movl	%ecx,16(%ebx)
1510.L050pw_tail_nc4:
1511	decl	%ebp
1512	jz	.L054pw_nc_end
1513	movl	20(%esi),%ecx
1514	movl	%ecx,20(%ebx)
1515.L051pw_tail_nc5:
1516	decl	%ebp
1517	jz	.L054pw_nc_end
1518	movl	24(%esi),%ecx
1519	movl	%ecx,24(%ebx)
1520.L052pw_tail_nc6:
1521.L054pw_nc_end:
1522	movl	$0,%eax
1523.L032pw_end:
1524	popl	%edi
1525	popl	%esi
1526	popl	%ebx
1527	popl	%ebp
1528	ret
1529.size	bn_sub_part_words,.-.L_bn_sub_part_words_begin
1530.comm	OPENSSL_ia32cap_P,16,4
1531