1#include <machine/asm.h>
2.text
3.align	64
4.globl	poly1305_init
5.type	poly1305_init,@function
6.align	16
7poly1305_init:
8.L_poly1305_init_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	movl	20(%esp),%edi
14	movl	24(%esp),%esi
15	movl	28(%esp),%ebp
16	xorl	%eax,%eax
17	movl	%eax,(%edi)
18	movl	%eax,4(%edi)
19	movl	%eax,8(%edi)
20	movl	%eax,12(%edi)
21	movl	%eax,16(%edi)
22	movl	%eax,20(%edi)
23	cmpl	$0,%esi
24	je	.L000nokey
25	call	.L001pic_point
26.L001pic_point:
27	popl	%ebx
28	leal	poly1305_blocks-.L001pic_point(%ebx),%eax
29	leal	poly1305_emit-.L001pic_point(%ebx),%edx
30	leal	OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi
31	movl	(%edi),%ecx
32	andl	$83886080,%ecx
33	cmpl	$83886080,%ecx
34	jne	.L002no_sse2
35	leal	_poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
36	leal	_poly1305_emit_sse2-.L001pic_point(%ebx),%edx
37.L002no_sse2:
38	movl	20(%esp),%edi
39	movl	%eax,(%ebp)
40	movl	%edx,4(%ebp)
41	movl	(%esi),%eax
42	movl	4(%esi),%ebx
43	movl	8(%esi),%ecx
44	movl	12(%esi),%edx
45	andl	$268435455,%eax
46	andl	$268435452,%ebx
47	andl	$268435452,%ecx
48	andl	$268435452,%edx
49	movl	%eax,24(%edi)
50	movl	%ebx,28(%edi)
51	movl	%ecx,32(%edi)
52	movl	%edx,36(%edi)
53	movl	$1,%eax
54.L000nokey:
55	popl	%edi
56	popl	%esi
57	popl	%ebx
58	popl	%ebp
59	ret
60.size	poly1305_init,.-.L_poly1305_init_begin
61.globl	poly1305_blocks
62.type	poly1305_blocks,@function
63.align	16
64poly1305_blocks:
65.L_poly1305_blocks_begin:
66	pushl	%ebp
67	pushl	%ebx
68	pushl	%esi
69	pushl	%edi
70	movl	20(%esp),%edi
71	movl	24(%esp),%esi
72	movl	28(%esp),%ecx
73.Lenter_blocks:
74	andl	$-15,%ecx
75	jz	.L003nodata
76	subl	$64,%esp
77	movl	24(%edi),%eax
78	movl	28(%edi),%ebx
79	leal	(%esi,%ecx,1),%ebp
80	movl	32(%edi),%ecx
81	movl	36(%edi),%edx
82	movl	%ebp,92(%esp)
83	movl	%esi,%ebp
84	movl	%eax,36(%esp)
85	movl	%ebx,%eax
86	shrl	$2,%eax
87	movl	%ebx,40(%esp)
88	addl	%ebx,%eax
89	movl	%ecx,%ebx
90	shrl	$2,%ebx
91	movl	%ecx,44(%esp)
92	addl	%ecx,%ebx
93	movl	%edx,%ecx
94	shrl	$2,%ecx
95	movl	%edx,48(%esp)
96	addl	%edx,%ecx
97	movl	%eax,52(%esp)
98	movl	%ebx,56(%esp)
99	movl	%ecx,60(%esp)
100	movl	(%edi),%eax
101	movl	4(%edi),%ebx
102	movl	8(%edi),%ecx
103	movl	12(%edi),%esi
104	movl	16(%edi),%edi
105	jmp	.L004loop
106.align	32
107.L004loop:
108	addl	(%ebp),%eax
109	adcl	4(%ebp),%ebx
110	adcl	8(%ebp),%ecx
111	adcl	12(%ebp),%esi
112	leal	16(%ebp),%ebp
113	adcl	96(%esp),%edi
114	movl	%eax,(%esp)
115	movl	%esi,12(%esp)
116	mull	36(%esp)
117	movl	%edi,16(%esp)
118	movl	%eax,%edi
119	movl	%ebx,%eax
120	movl	%edx,%esi
121	mull	60(%esp)
122	addl	%eax,%edi
123	movl	%ecx,%eax
124	adcl	%edx,%esi
125	mull	56(%esp)
126	addl	%eax,%edi
127	movl	12(%esp),%eax
128	adcl	%edx,%esi
129	mull	52(%esp)
130	addl	%eax,%edi
131	movl	(%esp),%eax
132	adcl	%edx,%esi
133	mull	40(%esp)
134	movl	%edi,20(%esp)
135	xorl	%edi,%edi
136	addl	%eax,%esi
137	movl	%ebx,%eax
138	adcl	%edx,%edi
139	mull	36(%esp)
140	addl	%eax,%esi
141	movl	%ecx,%eax
142	adcl	%edx,%edi
143	mull	60(%esp)
144	addl	%eax,%esi
145	movl	12(%esp),%eax
146	adcl	%edx,%edi
147	mull	56(%esp)
148	addl	%eax,%esi
149	movl	16(%esp),%eax
150	adcl	%edx,%edi
151	imull	52(%esp),%eax
152	addl	%eax,%esi
153	movl	(%esp),%eax
154	adcl	$0,%edi
155	mull	44(%esp)
156	movl	%esi,24(%esp)
157	xorl	%esi,%esi
158	addl	%eax,%edi
159	movl	%ebx,%eax
160	adcl	%edx,%esi
161	mull	40(%esp)
162	addl	%eax,%edi
163	movl	%ecx,%eax
164	adcl	%edx,%esi
165	mull	36(%esp)
166	addl	%eax,%edi
167	movl	12(%esp),%eax
168	adcl	%edx,%esi
169	mull	60(%esp)
170	addl	%eax,%edi
171	movl	16(%esp),%eax
172	adcl	%edx,%esi
173	imull	56(%esp),%eax
174	addl	%eax,%edi
175	movl	(%esp),%eax
176	adcl	$0,%esi
177	mull	48(%esp)
178	movl	%edi,28(%esp)
179	xorl	%edi,%edi
180	addl	%eax,%esi
181	movl	%ebx,%eax
182	adcl	%edx,%edi
183	mull	44(%esp)
184	addl	%eax,%esi
185	movl	%ecx,%eax
186	adcl	%edx,%edi
187	mull	40(%esp)
188	addl	%eax,%esi
189	movl	12(%esp),%eax
190	adcl	%edx,%edi
191	mull	36(%esp)
192	addl	%eax,%esi
193	movl	16(%esp),%ecx
194	adcl	%edx,%edi
195	movl	%ecx,%edx
196	imull	60(%esp),%ecx
197	addl	%ecx,%esi
198	movl	20(%esp),%eax
199	adcl	$0,%edi
200	imull	36(%esp),%edx
201	addl	%edi,%edx
202	movl	24(%esp),%ebx
203	movl	28(%esp),%ecx
204	movl	%edx,%edi
205	shrl	$2,%edx
206	andl	$3,%edi
207	leal	(%edx,%edx,4),%edx
208	addl	%edx,%eax
209	adcl	$0,%ebx
210	adcl	$0,%ecx
211	adcl	$0,%esi
212	adcl	$0,%edi
213	cmpl	92(%esp),%ebp
214	jne	.L004loop
215	movl	84(%esp),%edx
216	addl	$64,%esp
217	movl	%eax,(%edx)
218	movl	%ebx,4(%edx)
219	movl	%ecx,8(%edx)
220	movl	%esi,12(%edx)
221	movl	%edi,16(%edx)
222.L003nodata:
223	popl	%edi
224	popl	%esi
225	popl	%ebx
226	popl	%ebp
227	ret
228.size	poly1305_blocks,.-.L_poly1305_blocks_begin
229.globl	poly1305_emit
230.type	poly1305_emit,@function
231.align	16
232poly1305_emit:
233.L_poly1305_emit_begin:
234	pushl	%ebp
235	pushl	%ebx
236	pushl	%esi
237	pushl	%edi
238	movl	20(%esp),%ebp
239.Lenter_emit:
240	movl	24(%esp),%edi
241	movl	(%ebp),%eax
242	movl	4(%ebp),%ebx
243	movl	8(%ebp),%ecx
244	movl	12(%ebp),%edx
245	movl	16(%ebp),%esi
246	addl	$5,%eax
247	adcl	$0,%ebx
248	adcl	$0,%ecx
249	adcl	$0,%edx
250	adcl	$0,%esi
251	shrl	$2,%esi
252	negl	%esi
253	andl	%esi,%eax
254	andl	%esi,%ebx
255	andl	%esi,%ecx
256	andl	%esi,%edx
257	movl	%eax,(%edi)
258	movl	%ebx,4(%edi)
259	movl	%ecx,8(%edi)
260	movl	%edx,12(%edi)
261	notl	%esi
262	movl	(%ebp),%eax
263	movl	4(%ebp),%ebx
264	movl	8(%ebp),%ecx
265	movl	12(%ebp),%edx
266	movl	28(%esp),%ebp
267	andl	%esi,%eax
268	andl	%esi,%ebx
269	andl	%esi,%ecx
270	andl	%esi,%edx
271	orl	(%edi),%eax
272	orl	4(%edi),%ebx
273	orl	8(%edi),%ecx
274	orl	12(%edi),%edx
275	addl	(%ebp),%eax
276	adcl	4(%ebp),%ebx
277	adcl	8(%ebp),%ecx
278	adcl	12(%ebp),%edx
279	movl	%eax,(%edi)
280	movl	%ebx,4(%edi)
281	movl	%ecx,8(%edi)
282	movl	%edx,12(%edi)
283	popl	%edi
284	popl	%esi
285	popl	%ebx
286	popl	%ebp
287	ret
288.size	poly1305_emit,.-.L_poly1305_emit_begin
289.align	32
290.type	_poly1305_init_sse2,@function
291.align	16
292_poly1305_init_sse2:
293	movdqu	24(%edi),%xmm4
294	leal	48(%edi),%edi
295	movl	%esp,%ebp
296	subl	$224,%esp
297	andl	$-16,%esp
298	movq	64(%ebx),%xmm7
299	movdqa	%xmm4,%xmm0
300	movdqa	%xmm4,%xmm1
301	movdqa	%xmm4,%xmm2
302	pand	%xmm7,%xmm0
303	psrlq	$26,%xmm1
304	psrldq	$6,%xmm2
305	pand	%xmm7,%xmm1
306	movdqa	%xmm2,%xmm3
307	psrlq	$4,%xmm2
308	psrlq	$30,%xmm3
309	pand	%xmm7,%xmm2
310	pand	%xmm7,%xmm3
311	psrldq	$13,%xmm4
312	leal	144(%esp),%edx
313	movl	$2,%ecx
314.L005square:
315	movdqa	%xmm0,(%esp)
316	movdqa	%xmm1,16(%esp)
317	movdqa	%xmm2,32(%esp)
318	movdqa	%xmm3,48(%esp)
319	movdqa	%xmm4,64(%esp)
320	movdqa	%xmm1,%xmm6
321	movdqa	%xmm2,%xmm5
322	pslld	$2,%xmm6
323	pslld	$2,%xmm5
324	paddd	%xmm1,%xmm6
325	paddd	%xmm2,%xmm5
326	movdqa	%xmm6,80(%esp)
327	movdqa	%xmm5,96(%esp)
328	movdqa	%xmm3,%xmm6
329	movdqa	%xmm4,%xmm5
330	pslld	$2,%xmm6
331	pslld	$2,%xmm5
332	paddd	%xmm3,%xmm6
333	paddd	%xmm4,%xmm5
334	movdqa	%xmm6,112(%esp)
335	movdqa	%xmm5,128(%esp)
336	pshufd	$68,%xmm0,%xmm6
337	movdqa	%xmm1,%xmm5
338	pshufd	$68,%xmm1,%xmm1
339	pshufd	$68,%xmm2,%xmm2
340	pshufd	$68,%xmm3,%xmm3
341	pshufd	$68,%xmm4,%xmm4
342	movdqa	%xmm6,(%edx)
343	movdqa	%xmm1,16(%edx)
344	movdqa	%xmm2,32(%edx)
345	movdqa	%xmm3,48(%edx)
346	movdqa	%xmm4,64(%edx)
347	pmuludq	%xmm0,%xmm4
348	pmuludq	%xmm0,%xmm3
349	pmuludq	%xmm0,%xmm2
350	pmuludq	%xmm0,%xmm1
351	pmuludq	%xmm6,%xmm0
352	movdqa	%xmm5,%xmm6
353	pmuludq	48(%edx),%xmm5
354	movdqa	%xmm6,%xmm7
355	pmuludq	32(%edx),%xmm6
356	paddq	%xmm5,%xmm4
357	movdqa	%xmm7,%xmm5
358	pmuludq	16(%edx),%xmm7
359	paddq	%xmm6,%xmm3
360	movdqa	80(%esp),%xmm6
361	pmuludq	(%edx),%xmm5
362	paddq	%xmm7,%xmm2
363	pmuludq	64(%edx),%xmm6
364	movdqa	32(%esp),%xmm7
365	paddq	%xmm5,%xmm1
366	movdqa	%xmm7,%xmm5
367	pmuludq	32(%edx),%xmm7
368	paddq	%xmm6,%xmm0
369	movdqa	%xmm5,%xmm6
370	pmuludq	16(%edx),%xmm5
371	paddq	%xmm7,%xmm4
372	movdqa	96(%esp),%xmm7
373	pmuludq	(%edx),%xmm6
374	paddq	%xmm5,%xmm3
375	movdqa	%xmm7,%xmm5
376	pmuludq	64(%edx),%xmm7
377	paddq	%xmm6,%xmm2
378	pmuludq	48(%edx),%xmm5
379	movdqa	48(%esp),%xmm6
380	paddq	%xmm7,%xmm1
381	movdqa	%xmm6,%xmm7
382	pmuludq	16(%edx),%xmm6
383	paddq	%xmm5,%xmm0
384	movdqa	112(%esp),%xmm5
385	pmuludq	(%edx),%xmm7
386	paddq	%xmm6,%xmm4
387	movdqa	%xmm5,%xmm6
388	pmuludq	64(%edx),%xmm5
389	paddq	%xmm7,%xmm3
390	movdqa	%xmm6,%xmm7
391	pmuludq	48(%edx),%xmm6
392	paddq	%xmm5,%xmm2
393	pmuludq	32(%edx),%xmm7
394	movdqa	64(%esp),%xmm5
395	paddq	%xmm6,%xmm1
396	movdqa	128(%esp),%xmm6
397	pmuludq	(%edx),%xmm5
398	paddq	%xmm7,%xmm0
399	movdqa	%xmm6,%xmm7
400	pmuludq	64(%edx),%xmm6
401	paddq	%xmm5,%xmm4
402	movdqa	%xmm7,%xmm5
403	pmuludq	16(%edx),%xmm7
404	paddq	%xmm6,%xmm3
405	movdqa	%xmm5,%xmm6
406	pmuludq	32(%edx),%xmm5
407	paddq	%xmm7,%xmm0
408	pmuludq	48(%edx),%xmm6
409	movdqa	64(%ebx),%xmm7
410	paddq	%xmm5,%xmm1
411	paddq	%xmm6,%xmm2
412	movdqa	%xmm3,%xmm5
413	pand	%xmm7,%xmm3
414	psrlq	$26,%xmm5
415	paddq	%xmm4,%xmm5
416	movdqa	%xmm0,%xmm6
417	pand	%xmm7,%xmm0
418	psrlq	$26,%xmm6
419	movdqa	%xmm5,%xmm4
420	paddq	%xmm1,%xmm6
421	psrlq	$26,%xmm5
422	pand	%xmm7,%xmm4
423	movdqa	%xmm6,%xmm1
424	psrlq	$26,%xmm6
425	paddd	%xmm5,%xmm0
426	psllq	$2,%xmm5
427	paddq	%xmm2,%xmm6
428	paddq	%xmm0,%xmm5
429	pand	%xmm7,%xmm1
430	movdqa	%xmm6,%xmm2
431	psrlq	$26,%xmm6
432	pand	%xmm7,%xmm2
433	paddd	%xmm3,%xmm6
434	movdqa	%xmm5,%xmm0
435	psrlq	$26,%xmm5
436	movdqa	%xmm6,%xmm3
437	psrlq	$26,%xmm6
438	pand	%xmm7,%xmm0
439	paddd	%xmm5,%xmm1
440	pand	%xmm7,%xmm3
441	paddd	%xmm6,%xmm4
442	decl	%ecx
443	jz	.L006square_break
444	punpcklqdq	(%esp),%xmm0
445	punpcklqdq	16(%esp),%xmm1
446	punpcklqdq	32(%esp),%xmm2
447	punpcklqdq	48(%esp),%xmm3
448	punpcklqdq	64(%esp),%xmm4
449	jmp	.L005square
450.L006square_break:
451	psllq	$32,%xmm0
452	psllq	$32,%xmm1
453	psllq	$32,%xmm2
454	psllq	$32,%xmm3
455	psllq	$32,%xmm4
456	por	(%esp),%xmm0
457	por	16(%esp),%xmm1
458	por	32(%esp),%xmm2
459	por	48(%esp),%xmm3
460	por	64(%esp),%xmm4
461	pshufd	$141,%xmm0,%xmm0
462	pshufd	$141,%xmm1,%xmm1
463	pshufd	$141,%xmm2,%xmm2
464	pshufd	$141,%xmm3,%xmm3
465	pshufd	$141,%xmm4,%xmm4
466	movdqu	%xmm0,(%edi)
467	movdqu	%xmm1,16(%edi)
468	movdqu	%xmm2,32(%edi)
469	movdqu	%xmm3,48(%edi)
470	movdqu	%xmm4,64(%edi)
471	movdqa	%xmm1,%xmm6
472	movdqa	%xmm2,%xmm5
473	pslld	$2,%xmm6
474	pslld	$2,%xmm5
475	paddd	%xmm1,%xmm6
476	paddd	%xmm2,%xmm5
477	movdqu	%xmm6,80(%edi)
478	movdqu	%xmm5,96(%edi)
479	movdqa	%xmm3,%xmm6
480	movdqa	%xmm4,%xmm5
481	pslld	$2,%xmm6
482	pslld	$2,%xmm5
483	paddd	%xmm3,%xmm6
484	paddd	%xmm4,%xmm5
485	movdqu	%xmm6,112(%edi)
486	movdqu	%xmm5,128(%edi)
487	movl	%ebp,%esp
488	leal	-48(%edi),%edi
489	ret
490.size	_poly1305_init_sse2,.-_poly1305_init_sse2
491.align	32
492.type	_poly1305_blocks_sse2,@function
493.align	16
494_poly1305_blocks_sse2:
495	pushl	%ebp
496	pushl	%ebx
497	pushl	%esi
498	pushl	%edi
499	movl	20(%esp),%edi
500	movl	24(%esp),%esi
501	movl	28(%esp),%ecx
502	movl	20(%edi),%eax
503	andl	$-16,%ecx
504	jz	.L007nodata
505	cmpl	$64,%ecx
506	jae	.L008enter_sse2
507	testl	%eax,%eax
508	jz	.Lenter_blocks
509.align	16
510.L008enter_sse2:
511	call	.L009pic_point
512.L009pic_point:
513	popl	%ebx
514	leal	.Lconst_sse2-.L009pic_point(%ebx),%ebx
515	testl	%eax,%eax
516	jnz	.L010base2_26
517	call	_poly1305_init_sse2
518	movl	(%edi),%eax
519	movl	3(%edi),%ecx
520	movl	6(%edi),%edx
521	movl	9(%edi),%esi
522	movl	13(%edi),%ebp
523	movl	$1,20(%edi)
524	shrl	$2,%ecx
525	andl	$67108863,%eax
526	shrl	$4,%edx
527	andl	$67108863,%ecx
528	shrl	$6,%esi
529	andl	$67108863,%edx
530	movd	%eax,%xmm0
531	movd	%ecx,%xmm1
532	movd	%edx,%xmm2
533	movd	%esi,%xmm3
534	movd	%ebp,%xmm4
535	movl	24(%esp),%esi
536	movl	28(%esp),%ecx
537	jmp	.L011base2_32
538.align	16
539.L010base2_26:
540	movd	(%edi),%xmm0
541	movd	4(%edi),%xmm1
542	movd	8(%edi),%xmm2
543	movd	12(%edi),%xmm3
544	movd	16(%edi),%xmm4
545	movdqa	64(%ebx),%xmm7
546.L011base2_32:
547	movl	32(%esp),%eax
548	movl	%esp,%ebp
549	subl	$528,%esp
550	andl	$-16,%esp
551	leal	48(%edi),%edi
552	shll	$24,%eax
553	testl	$31,%ecx
554	jz	.L012even
555	movdqu	(%esi),%xmm6
556	leal	16(%esi),%esi
557	movdqa	%xmm6,%xmm5
558	pand	%xmm7,%xmm6
559	paddd	%xmm6,%xmm0
560	movdqa	%xmm5,%xmm6
561	psrlq	$26,%xmm5
562	psrldq	$6,%xmm6
563	pand	%xmm7,%xmm5
564	paddd	%xmm5,%xmm1
565	movdqa	%xmm6,%xmm5
566	psrlq	$4,%xmm6
567	pand	%xmm7,%xmm6
568	paddd	%xmm6,%xmm2
569	movdqa	%xmm5,%xmm6
570	psrlq	$30,%xmm5
571	pand	%xmm7,%xmm5
572	psrldq	$7,%xmm6
573	paddd	%xmm5,%xmm3
574	movd	%eax,%xmm5
575	paddd	%xmm6,%xmm4
576	movd	12(%edi),%xmm6
577	paddd	%xmm5,%xmm4
578	movdqa	%xmm0,(%esp)
579	movdqa	%xmm1,16(%esp)
580	movdqa	%xmm2,32(%esp)
581	movdqa	%xmm3,48(%esp)
582	movdqa	%xmm4,64(%esp)
583	pmuludq	%xmm6,%xmm0
584	pmuludq	%xmm6,%xmm1
585	pmuludq	%xmm6,%xmm2
586	movd	28(%edi),%xmm5
587	pmuludq	%xmm6,%xmm3
588	pmuludq	%xmm6,%xmm4
589	movdqa	%xmm5,%xmm6
590	pmuludq	48(%esp),%xmm5
591	movdqa	%xmm6,%xmm7
592	pmuludq	32(%esp),%xmm6
593	paddq	%xmm5,%xmm4
594	movdqa	%xmm7,%xmm5
595	pmuludq	16(%esp),%xmm7
596	paddq	%xmm6,%xmm3
597	movd	92(%edi),%xmm6
598	pmuludq	(%esp),%xmm5
599	paddq	%xmm7,%xmm2
600	pmuludq	64(%esp),%xmm6
601	movd	44(%edi),%xmm7
602	paddq	%xmm5,%xmm1
603	movdqa	%xmm7,%xmm5
604	pmuludq	32(%esp),%xmm7
605	paddq	%xmm6,%xmm0
606	movdqa	%xmm5,%xmm6
607	pmuludq	16(%esp),%xmm5
608	paddq	%xmm7,%xmm4
609	movd	108(%edi),%xmm7
610	pmuludq	(%esp),%xmm6
611	paddq	%xmm5,%xmm3
612	movdqa	%xmm7,%xmm5
613	pmuludq	64(%esp),%xmm7
614	paddq	%xmm6,%xmm2
615	pmuludq	48(%esp),%xmm5
616	movd	60(%edi),%xmm6
617	paddq	%xmm7,%xmm1
618	movdqa	%xmm6,%xmm7
619	pmuludq	16(%esp),%xmm6
620	paddq	%xmm5,%xmm0
621	movd	124(%edi),%xmm5
622	pmuludq	(%esp),%xmm7
623	paddq	%xmm6,%xmm4
624	movdqa	%xmm5,%xmm6
625	pmuludq	64(%esp),%xmm5
626	paddq	%xmm7,%xmm3
627	movdqa	%xmm6,%xmm7
628	pmuludq	48(%esp),%xmm6
629	paddq	%xmm5,%xmm2
630	pmuludq	32(%esp),%xmm7
631	movd	76(%edi),%xmm5
632	paddq	%xmm6,%xmm1
633	movd	140(%edi),%xmm6
634	pmuludq	(%esp),%xmm5
635	paddq	%xmm7,%xmm0
636	movdqa	%xmm6,%xmm7
637	pmuludq	64(%esp),%xmm6
638	paddq	%xmm5,%xmm4
639	movdqa	%xmm7,%xmm5
640	pmuludq	16(%esp),%xmm7
641	paddq	%xmm6,%xmm3
642	movdqa	%xmm5,%xmm6
643	pmuludq	32(%esp),%xmm5
644	paddq	%xmm7,%xmm0
645	pmuludq	48(%esp),%xmm6
646	movdqa	64(%ebx),%xmm7
647	paddq	%xmm5,%xmm1
648	paddq	%xmm6,%xmm2
649	movdqa	%xmm3,%xmm5
650	pand	%xmm7,%xmm3
651	psrlq	$26,%xmm5
652	paddq	%xmm4,%xmm5
653	movdqa	%xmm0,%xmm6
654	pand	%xmm7,%xmm0
655	psrlq	$26,%xmm6
656	movdqa	%xmm5,%xmm4
657	paddq	%xmm1,%xmm6
658	psrlq	$26,%xmm5
659	pand	%xmm7,%xmm4
660	movdqa	%xmm6,%xmm1
661	psrlq	$26,%xmm6
662	paddd	%xmm5,%xmm0
663	psllq	$2,%xmm5
664	paddq	%xmm2,%xmm6
665	paddq	%xmm0,%xmm5
666	pand	%xmm7,%xmm1
667	movdqa	%xmm6,%xmm2
668	psrlq	$26,%xmm6
669	pand	%xmm7,%xmm2
670	paddd	%xmm3,%xmm6
671	movdqa	%xmm5,%xmm0
672	psrlq	$26,%xmm5
673	movdqa	%xmm6,%xmm3
674	psrlq	$26,%xmm6
675	pand	%xmm7,%xmm0
676	paddd	%xmm5,%xmm1
677	pand	%xmm7,%xmm3
678	paddd	%xmm6,%xmm4
679	subl	$16,%ecx
680	jz	.L013done
681.L012even:
682	leal	384(%esp),%edx
683	leal	-32(%esi),%eax
684	subl	$64,%ecx
685	movdqu	(%edi),%xmm5
686	pshufd	$68,%xmm5,%xmm6
687	cmovbl	%eax,%esi
688	pshufd	$238,%xmm5,%xmm5
689	movdqa	%xmm6,(%edx)
690	leal	160(%esp),%eax
691	movdqu	16(%edi),%xmm6
692	movdqa	%xmm5,-144(%edx)
693	pshufd	$68,%xmm6,%xmm5
694	pshufd	$238,%xmm6,%xmm6
695	movdqa	%xmm5,16(%edx)
696	movdqu	32(%edi),%xmm5
697	movdqa	%xmm6,-128(%edx)
698	pshufd	$68,%xmm5,%xmm6
699	pshufd	$238,%xmm5,%xmm5
700	movdqa	%xmm6,32(%edx)
701	movdqu	48(%edi),%xmm6
702	movdqa	%xmm5,-112(%edx)
703	pshufd	$68,%xmm6,%xmm5
704	pshufd	$238,%xmm6,%xmm6
705	movdqa	%xmm5,48(%edx)
706	movdqu	64(%edi),%xmm5
707	movdqa	%xmm6,-96(%edx)
708	pshufd	$68,%xmm5,%xmm6
709	pshufd	$238,%xmm5,%xmm5
710	movdqa	%xmm6,64(%edx)
711	movdqu	80(%edi),%xmm6
712	movdqa	%xmm5,-80(%edx)
713	pshufd	$68,%xmm6,%xmm5
714	pshufd	$238,%xmm6,%xmm6
715	movdqa	%xmm5,80(%edx)
716	movdqu	96(%edi),%xmm5
717	movdqa	%xmm6,-64(%edx)
718	pshufd	$68,%xmm5,%xmm6
719	pshufd	$238,%xmm5,%xmm5
720	movdqa	%xmm6,96(%edx)
721	movdqu	112(%edi),%xmm6
722	movdqa	%xmm5,-48(%edx)
723	pshufd	$68,%xmm6,%xmm5
724	pshufd	$238,%xmm6,%xmm6
725	movdqa	%xmm5,112(%edx)
726	movdqu	128(%edi),%xmm5
727	movdqa	%xmm6,-32(%edx)
728	pshufd	$68,%xmm5,%xmm6
729	pshufd	$238,%xmm5,%xmm5
730	movdqa	%xmm6,128(%edx)
731	movdqa	%xmm5,-16(%edx)
732	movdqu	32(%esi),%xmm5
733	movdqu	48(%esi),%xmm6
734	leal	32(%esi),%esi
735	movdqa	%xmm2,112(%esp)
736	movdqa	%xmm3,128(%esp)
737	movdqa	%xmm4,144(%esp)
738	movdqa	%xmm5,%xmm2
739	movdqa	%xmm6,%xmm3
740	psrldq	$6,%xmm2
741	psrldq	$6,%xmm3
742	movdqa	%xmm5,%xmm4
743	punpcklqdq	%xmm3,%xmm2
744	punpckhqdq	%xmm6,%xmm4
745	punpcklqdq	%xmm6,%xmm5
746	movdqa	%xmm2,%xmm3
747	psrlq	$4,%xmm2
748	psrlq	$30,%xmm3
749	movdqa	%xmm5,%xmm6
750	psrlq	$40,%xmm4
751	psrlq	$26,%xmm6
752	pand	%xmm7,%xmm5
753	pand	%xmm7,%xmm6
754	pand	%xmm7,%xmm2
755	pand	%xmm7,%xmm3
756	por	(%ebx),%xmm4
757	movdqa	%xmm0,80(%esp)
758	movdqa	%xmm1,96(%esp)
759	jbe	.L014skip_loop
760	jmp	.L015loop
761.align	32
762.L015loop:
763	movdqa	-144(%edx),%xmm7
764	movdqa	%xmm6,16(%eax)
765	movdqa	%xmm2,32(%eax)
766	movdqa	%xmm3,48(%eax)
767	movdqa	%xmm4,64(%eax)
768	movdqa	%xmm5,%xmm1
769	pmuludq	%xmm7,%xmm5
770	movdqa	%xmm6,%xmm0
771	pmuludq	%xmm7,%xmm6
772	pmuludq	%xmm7,%xmm2
773	pmuludq	%xmm7,%xmm3
774	pmuludq	%xmm7,%xmm4
775	pmuludq	-16(%edx),%xmm0
776	movdqa	%xmm1,%xmm7
777	pmuludq	-128(%edx),%xmm1
778	paddq	%xmm5,%xmm0
779	movdqa	%xmm7,%xmm5
780	pmuludq	-112(%edx),%xmm7
781	paddq	%xmm6,%xmm1
782	movdqa	%xmm5,%xmm6
783	pmuludq	-96(%edx),%xmm5
784	paddq	%xmm7,%xmm2
785	movdqa	16(%eax),%xmm7
786	pmuludq	-80(%edx),%xmm6
787	paddq	%xmm5,%xmm3
788	movdqa	%xmm7,%xmm5
789	pmuludq	-128(%edx),%xmm7
790	paddq	%xmm6,%xmm4
791	movdqa	%xmm5,%xmm6
792	pmuludq	-112(%edx),%xmm5
793	paddq	%xmm7,%xmm2
794	movdqa	32(%eax),%xmm7
795	pmuludq	-96(%edx),%xmm6
796	paddq	%xmm5,%xmm3
797	movdqa	%xmm7,%xmm5
798	pmuludq	-32(%edx),%xmm7
799	paddq	%xmm6,%xmm4
800	movdqa	%xmm5,%xmm6
801	pmuludq	-16(%edx),%xmm5
802	paddq	%xmm7,%xmm0
803	movdqa	%xmm6,%xmm7
804	pmuludq	-128(%edx),%xmm6
805	paddq	%xmm5,%xmm1
806	movdqa	48(%eax),%xmm5
807	pmuludq	-112(%edx),%xmm7
808	paddq	%xmm6,%xmm3
809	movdqa	%xmm5,%xmm6
810	pmuludq	-48(%edx),%xmm5
811	paddq	%xmm7,%xmm4
812	movdqa	%xmm6,%xmm7
813	pmuludq	-32(%edx),%xmm6
814	paddq	%xmm5,%xmm0
815	movdqa	%xmm7,%xmm5
816	pmuludq	-16(%edx),%xmm7
817	paddq	%xmm6,%xmm1
818	movdqa	64(%eax),%xmm6
819	pmuludq	-128(%edx),%xmm5
820	paddq	%xmm7,%xmm2
821	movdqa	%xmm6,%xmm7
822	pmuludq	-16(%edx),%xmm6
823	paddq	%xmm5,%xmm4
824	movdqa	%xmm7,%xmm5
825	pmuludq	-64(%edx),%xmm7
826	paddq	%xmm6,%xmm3
827	movdqa	%xmm5,%xmm6
828	pmuludq	-48(%edx),%xmm5
829	paddq	%xmm7,%xmm0
830	movdqa	64(%ebx),%xmm7
831	pmuludq	-32(%edx),%xmm6
832	paddq	%xmm5,%xmm1
833	paddq	%xmm6,%xmm2
834	movdqu	-32(%esi),%xmm5
835	movdqu	-16(%esi),%xmm6
836	leal	32(%esi),%esi
837	movdqa	%xmm2,32(%esp)
838	movdqa	%xmm3,48(%esp)
839	movdqa	%xmm4,64(%esp)
840	movdqa	%xmm5,%xmm2
841	movdqa	%xmm6,%xmm3
842	psrldq	$6,%xmm2
843	psrldq	$6,%xmm3
844	movdqa	%xmm5,%xmm4
845	punpcklqdq	%xmm3,%xmm2
846	punpckhqdq	%xmm6,%xmm4
847	punpcklqdq	%xmm6,%xmm5
848	movdqa	%xmm2,%xmm3
849	psrlq	$4,%xmm2
850	psrlq	$30,%xmm3
851	movdqa	%xmm5,%xmm6
852	psrlq	$40,%xmm4
853	psrlq	$26,%xmm6
854	pand	%xmm7,%xmm5
855	pand	%xmm7,%xmm6
856	pand	%xmm7,%xmm2
857	pand	%xmm7,%xmm3
858	por	(%ebx),%xmm4
859	leal	-32(%esi),%eax
860	subl	$64,%ecx
861	paddd	80(%esp),%xmm5
862	paddd	96(%esp),%xmm6
863	paddd	112(%esp),%xmm2
864	paddd	128(%esp),%xmm3
865	paddd	144(%esp),%xmm4
866	cmovbl	%eax,%esi
867	leal	160(%esp),%eax
868	movdqa	(%edx),%xmm7
869	movdqa	%xmm1,16(%esp)
870	movdqa	%xmm6,16(%eax)
871	movdqa	%xmm2,32(%eax)
872	movdqa	%xmm3,48(%eax)
873	movdqa	%xmm4,64(%eax)
874	movdqa	%xmm5,%xmm1
875	pmuludq	%xmm7,%xmm5
876	paddq	%xmm0,%xmm5
877	movdqa	%xmm6,%xmm0
878	pmuludq	%xmm7,%xmm6
879	pmuludq	%xmm7,%xmm2
880	pmuludq	%xmm7,%xmm3
881	pmuludq	%xmm7,%xmm4
882	paddq	16(%esp),%xmm6
883	paddq	32(%esp),%xmm2
884	paddq	48(%esp),%xmm3
885	paddq	64(%esp),%xmm4
886	pmuludq	128(%edx),%xmm0
887	movdqa	%xmm1,%xmm7
888	pmuludq	16(%edx),%xmm1
889	paddq	%xmm5,%xmm0
890	movdqa	%xmm7,%xmm5
891	pmuludq	32(%edx),%xmm7
892	paddq	%xmm6,%xmm1
893	movdqa	%xmm5,%xmm6
894	pmuludq	48(%edx),%xmm5
895	paddq	%xmm7,%xmm2
896	movdqa	16(%eax),%xmm7
897	pmuludq	64(%edx),%xmm6
898	paddq	%xmm5,%xmm3
899	movdqa	%xmm7,%xmm5
900	pmuludq	16(%edx),%xmm7
901	paddq	%xmm6,%xmm4
902	movdqa	%xmm5,%xmm6
903	pmuludq	32(%edx),%xmm5
904	paddq	%xmm7,%xmm2
905	movdqa	32(%eax),%xmm7
906	pmuludq	48(%edx),%xmm6
907	paddq	%xmm5,%xmm3
908	movdqa	%xmm7,%xmm5
909	pmuludq	112(%edx),%xmm7
910	paddq	%xmm6,%xmm4
911	movdqa	%xmm5,%xmm6
912	pmuludq	128(%edx),%xmm5
913	paddq	%xmm7,%xmm0
914	movdqa	%xmm6,%xmm7
915	pmuludq	16(%edx),%xmm6
916	paddq	%xmm5,%xmm1
917	movdqa	48(%eax),%xmm5
918	pmuludq	32(%edx),%xmm7
919	paddq	%xmm6,%xmm3
920	movdqa	%xmm5,%xmm6
921	pmuludq	96(%edx),%xmm5
922	paddq	%xmm7,%xmm4
923	movdqa	%xmm6,%xmm7
924	pmuludq	112(%edx),%xmm6
925	paddq	%xmm5,%xmm0
926	movdqa	%xmm7,%xmm5
927	pmuludq	128(%edx),%xmm7
928	paddq	%xmm6,%xmm1
929	movdqa	64(%eax),%xmm6
930	pmuludq	16(%edx),%xmm5
931	paddq	%xmm7,%xmm2
932	movdqa	%xmm6,%xmm7
933	pmuludq	128(%edx),%xmm6
934	paddq	%xmm5,%xmm4
935	movdqa	%xmm7,%xmm5
936	pmuludq	80(%edx),%xmm7
937	paddq	%xmm6,%xmm3
938	movdqa	%xmm5,%xmm6
939	pmuludq	96(%edx),%xmm5
940	paddq	%xmm7,%xmm0
941	movdqa	64(%ebx),%xmm7
942	pmuludq	112(%edx),%xmm6
943	paddq	%xmm5,%xmm1
944	paddq	%xmm6,%xmm2
945	movdqa	%xmm3,%xmm5
946	pand	%xmm7,%xmm3
947	psrlq	$26,%xmm5
948	paddq	%xmm4,%xmm5
949	movdqa	%xmm0,%xmm6
950	pand	%xmm7,%xmm0
951	psrlq	$26,%xmm6
952	movdqa	%xmm5,%xmm4
953	paddq	%xmm1,%xmm6
954	psrlq	$26,%xmm5
955	pand	%xmm7,%xmm4
956	movdqa	%xmm6,%xmm1
957	psrlq	$26,%xmm6
958	paddd	%xmm5,%xmm0
959	psllq	$2,%xmm5
960	paddq	%xmm2,%xmm6
961	paddq	%xmm0,%xmm5
962	pand	%xmm7,%xmm1
963	movdqa	%xmm6,%xmm2
964	psrlq	$26,%xmm6
965	pand	%xmm7,%xmm2
966	paddd	%xmm3,%xmm6
967	movdqa	%xmm5,%xmm0
968	psrlq	$26,%xmm5
969	movdqa	%xmm6,%xmm3
970	psrlq	$26,%xmm6
971	pand	%xmm7,%xmm0
972	paddd	%xmm5,%xmm1
973	pand	%xmm7,%xmm3
974	paddd	%xmm6,%xmm4
975	movdqu	32(%esi),%xmm5
976	movdqu	48(%esi),%xmm6
977	leal	32(%esi),%esi
978	movdqa	%xmm2,112(%esp)
979	movdqa	%xmm3,128(%esp)
980	movdqa	%xmm4,144(%esp)
981	movdqa	%xmm5,%xmm2
982	movdqa	%xmm6,%xmm3
983	psrldq	$6,%xmm2
984	psrldq	$6,%xmm3
985	movdqa	%xmm5,%xmm4
986	punpcklqdq	%xmm3,%xmm2
987	punpckhqdq	%xmm6,%xmm4
988	punpcklqdq	%xmm6,%xmm5
989	movdqa	%xmm2,%xmm3
990	psrlq	$4,%xmm2
991	psrlq	$30,%xmm3
992	movdqa	%xmm5,%xmm6
993	psrlq	$40,%xmm4
994	psrlq	$26,%xmm6
995	pand	%xmm7,%xmm5
996	pand	%xmm7,%xmm6
997	pand	%xmm7,%xmm2
998	pand	%xmm7,%xmm3
999	por	(%ebx),%xmm4
1000	movdqa	%xmm0,80(%esp)
1001	movdqa	%xmm1,96(%esp)
1002	ja	.L015loop
1003.L014skip_loop:
1004	pshufd	$16,-144(%edx),%xmm7
1005	addl	$32,%ecx
1006	jnz	.L016long_tail
1007	paddd	%xmm0,%xmm5
1008	paddd	%xmm1,%xmm6
1009	paddd	112(%esp),%xmm2
1010	paddd	128(%esp),%xmm3
1011	paddd	144(%esp),%xmm4
1012.L016long_tail:
1013	movdqa	%xmm5,(%eax)
1014	movdqa	%xmm6,16(%eax)
1015	movdqa	%xmm2,32(%eax)
1016	movdqa	%xmm3,48(%eax)
1017	movdqa	%xmm4,64(%eax)
1018	pmuludq	%xmm7,%xmm5
1019	pmuludq	%xmm7,%xmm6
1020	pmuludq	%xmm7,%xmm2
1021	movdqa	%xmm5,%xmm0
1022	pshufd	$16,-128(%edx),%xmm5
1023	pmuludq	%xmm7,%xmm3
1024	movdqa	%xmm6,%xmm1
1025	pmuludq	%xmm7,%xmm4
1026	movdqa	%xmm5,%xmm6
1027	pmuludq	48(%eax),%xmm5
1028	movdqa	%xmm6,%xmm7
1029	pmuludq	32(%eax),%xmm6
1030	paddq	%xmm5,%xmm4
1031	movdqa	%xmm7,%xmm5
1032	pmuludq	16(%eax),%xmm7
1033	paddq	%xmm6,%xmm3
1034	pshufd	$16,-64(%edx),%xmm6
1035	pmuludq	(%eax),%xmm5
1036	paddq	%xmm7,%xmm2
1037	pmuludq	64(%eax),%xmm6
1038	pshufd	$16,-112(%edx),%xmm7
1039	paddq	%xmm5,%xmm1
1040	movdqa	%xmm7,%xmm5
1041	pmuludq	32(%eax),%xmm7
1042	paddq	%xmm6,%xmm0
1043	movdqa	%xmm5,%xmm6
1044	pmuludq	16(%eax),%xmm5
1045	paddq	%xmm7,%xmm4
1046	pshufd	$16,-48(%edx),%xmm7
1047	pmuludq	(%eax),%xmm6
1048	paddq	%xmm5,%xmm3
1049	movdqa	%xmm7,%xmm5
1050	pmuludq	64(%eax),%xmm7
1051	paddq	%xmm6,%xmm2
1052	pmuludq	48(%eax),%xmm5
1053	pshufd	$16,-96(%edx),%xmm6
1054	paddq	%xmm7,%xmm1
1055	movdqa	%xmm6,%xmm7
1056	pmuludq	16(%eax),%xmm6
1057	paddq	%xmm5,%xmm0
1058	pshufd	$16,-32(%edx),%xmm5
1059	pmuludq	(%eax),%xmm7
1060	paddq	%xmm6,%xmm4
1061	movdqa	%xmm5,%xmm6
1062	pmuludq	64(%eax),%xmm5
1063	paddq	%xmm7,%xmm3
1064	movdqa	%xmm6,%xmm7
1065	pmuludq	48(%eax),%xmm6
1066	paddq	%xmm5,%xmm2
1067	pmuludq	32(%eax),%xmm7
1068	pshufd	$16,-80(%edx),%xmm5
1069	paddq	%xmm6,%xmm1
1070	pshufd	$16,-16(%edx),%xmm6
1071	pmuludq	(%eax),%xmm5
1072	paddq	%xmm7,%xmm0
1073	movdqa	%xmm6,%xmm7
1074	pmuludq	64(%eax),%xmm6
1075	paddq	%xmm5,%xmm4
1076	movdqa	%xmm7,%xmm5
1077	pmuludq	16(%eax),%xmm7
1078	paddq	%xmm6,%xmm3
1079	movdqa	%xmm5,%xmm6
1080	pmuludq	32(%eax),%xmm5
1081	paddq	%xmm7,%xmm0
1082	pmuludq	48(%eax),%xmm6
1083	movdqa	64(%ebx),%xmm7
1084	paddq	%xmm5,%xmm1
1085	paddq	%xmm6,%xmm2
1086	jz	.L017short_tail
1087	movdqu	-32(%esi),%xmm5
1088	movdqu	-16(%esi),%xmm6
1089	leal	32(%esi),%esi
1090	movdqa	%xmm2,32(%esp)
1091	movdqa	%xmm3,48(%esp)
1092	movdqa	%xmm4,64(%esp)
1093	movdqa	%xmm5,%xmm2
1094	movdqa	%xmm6,%xmm3
1095	psrldq	$6,%xmm2
1096	psrldq	$6,%xmm3
1097	movdqa	%xmm5,%xmm4
1098	punpcklqdq	%xmm3,%xmm2
1099	punpckhqdq	%xmm6,%xmm4
1100	punpcklqdq	%xmm6,%xmm5
1101	movdqa	%xmm2,%xmm3
1102	psrlq	$4,%xmm2
1103	psrlq	$30,%xmm3
1104	movdqa	%xmm5,%xmm6
1105	psrlq	$40,%xmm4
1106	psrlq	$26,%xmm6
1107	pand	%xmm7,%xmm5
1108	pand	%xmm7,%xmm6
1109	pand	%xmm7,%xmm2
1110	pand	%xmm7,%xmm3
1111	por	(%ebx),%xmm4
1112	pshufd	$16,(%edx),%xmm7
1113	paddd	80(%esp),%xmm5
1114	paddd	96(%esp),%xmm6
1115	paddd	112(%esp),%xmm2
1116	paddd	128(%esp),%xmm3
1117	paddd	144(%esp),%xmm4
1118	movdqa	%xmm5,(%esp)
1119	pmuludq	%xmm7,%xmm5
1120	movdqa	%xmm6,16(%esp)
1121	pmuludq	%xmm7,%xmm6
1122	paddq	%xmm5,%xmm0
1123	movdqa	%xmm2,%xmm5
1124	pmuludq	%xmm7,%xmm2
1125	paddq	%xmm6,%xmm1
1126	movdqa	%xmm3,%xmm6
1127	pmuludq	%xmm7,%xmm3
1128	paddq	32(%esp),%xmm2
1129	movdqa	%xmm5,32(%esp)
1130	pshufd	$16,16(%edx),%xmm5
1131	paddq	48(%esp),%xmm3
1132	movdqa	%xmm6,48(%esp)
1133	movdqa	%xmm4,%xmm6
1134	pmuludq	%xmm7,%xmm4
1135	paddq	64(%esp),%xmm4
1136	movdqa	%xmm6,64(%esp)
1137	movdqa	%xmm5,%xmm6
1138	pmuludq	48(%esp),%xmm5
1139	movdqa	%xmm6,%xmm7
1140	pmuludq	32(%esp),%xmm6
1141	paddq	%xmm5,%xmm4
1142	movdqa	%xmm7,%xmm5
1143	pmuludq	16(%esp),%xmm7
1144	paddq	%xmm6,%xmm3
1145	pshufd	$16,80(%edx),%xmm6
1146	pmuludq	(%esp),%xmm5
1147	paddq	%xmm7,%xmm2
1148	pmuludq	64(%esp),%xmm6
1149	pshufd	$16,32(%edx),%xmm7
1150	paddq	%xmm5,%xmm1
1151	movdqa	%xmm7,%xmm5
1152	pmuludq	32(%esp),%xmm7
1153	paddq	%xmm6,%xmm0
1154	movdqa	%xmm5,%xmm6
1155	pmuludq	16(%esp),%xmm5
1156	paddq	%xmm7,%xmm4
1157	pshufd	$16,96(%edx),%xmm7
1158	pmuludq	(%esp),%xmm6
1159	paddq	%xmm5,%xmm3
1160	movdqa	%xmm7,%xmm5
1161	pmuludq	64(%esp),%xmm7
1162	paddq	%xmm6,%xmm2
1163	pmuludq	48(%esp),%xmm5
1164	pshufd	$16,48(%edx),%xmm6
1165	paddq	%xmm7,%xmm1
1166	movdqa	%xmm6,%xmm7
1167	pmuludq	16(%esp),%xmm6
1168	paddq	%xmm5,%xmm0
1169	pshufd	$16,112(%edx),%xmm5
1170	pmuludq	(%esp),%xmm7
1171	paddq	%xmm6,%xmm4
1172	movdqa	%xmm5,%xmm6
1173	pmuludq	64(%esp),%xmm5
1174	paddq	%xmm7,%xmm3
1175	movdqa	%xmm6,%xmm7
1176	pmuludq	48(%esp),%xmm6
1177	paddq	%xmm5,%xmm2
1178	pmuludq	32(%esp),%xmm7
1179	pshufd	$16,64(%edx),%xmm5
1180	paddq	%xmm6,%xmm1
1181	pshufd	$16,128(%edx),%xmm6
1182	pmuludq	(%esp),%xmm5
1183	paddq	%xmm7,%xmm0
1184	movdqa	%xmm6,%xmm7
1185	pmuludq	64(%esp),%xmm6
1186	paddq	%xmm5,%xmm4
1187	movdqa	%xmm7,%xmm5
1188	pmuludq	16(%esp),%xmm7
1189	paddq	%xmm6,%xmm3
1190	movdqa	%xmm5,%xmm6
1191	pmuludq	32(%esp),%xmm5
1192	paddq	%xmm7,%xmm0
1193	pmuludq	48(%esp),%xmm6
1194	movdqa	64(%ebx),%xmm7
1195	paddq	%xmm5,%xmm1
1196	paddq	%xmm6,%xmm2
1197.L017short_tail:
1198	pshufd	$78,%xmm4,%xmm6
1199	pshufd	$78,%xmm3,%xmm5
1200	paddq	%xmm6,%xmm4
1201	paddq	%xmm5,%xmm3
1202	pshufd	$78,%xmm0,%xmm6
1203	pshufd	$78,%xmm1,%xmm5
1204	paddq	%xmm6,%xmm0
1205	paddq	%xmm5,%xmm1
1206	pshufd	$78,%xmm2,%xmm6
1207	movdqa	%xmm3,%xmm5
1208	pand	%xmm7,%xmm3
1209	psrlq	$26,%xmm5
1210	paddq	%xmm6,%xmm2
1211	paddq	%xmm4,%xmm5
1212	movdqa	%xmm0,%xmm6
1213	pand	%xmm7,%xmm0
1214	psrlq	$26,%xmm6
1215	movdqa	%xmm5,%xmm4
1216	paddq	%xmm1,%xmm6
1217	psrlq	$26,%xmm5
1218	pand	%xmm7,%xmm4
1219	movdqa	%xmm6,%xmm1
1220	psrlq	$26,%xmm6
1221	paddd	%xmm5,%xmm0
1222	psllq	$2,%xmm5
1223	paddq	%xmm2,%xmm6
1224	paddq	%xmm0,%xmm5
1225	pand	%xmm7,%xmm1
1226	movdqa	%xmm6,%xmm2
1227	psrlq	$26,%xmm6
1228	pand	%xmm7,%xmm2
1229	paddd	%xmm3,%xmm6
1230	movdqa	%xmm5,%xmm0
1231	psrlq	$26,%xmm5
1232	movdqa	%xmm6,%xmm3
1233	psrlq	$26,%xmm6
1234	pand	%xmm7,%xmm0
1235	paddd	%xmm5,%xmm1
1236	pand	%xmm7,%xmm3
1237	paddd	%xmm6,%xmm4
1238.L013done:
1239	movd	%xmm0,-48(%edi)
1240	movd	%xmm1,-44(%edi)
1241	movd	%xmm2,-40(%edi)
1242	movd	%xmm3,-36(%edi)
1243	movd	%xmm4,-32(%edi)
1244	movl	%ebp,%esp
1245.L007nodata:
1246	popl	%edi
1247	popl	%esi
1248	popl	%ebx
1249	popl	%ebp
1250	ret
1251.size	_poly1305_blocks_sse2,.-_poly1305_blocks_sse2
1252.align	32
1253.type	_poly1305_emit_sse2,@function
1254.align	16
1255_poly1305_emit_sse2:
1256	pushl	%ebp
1257	pushl	%ebx
1258	pushl	%esi
1259	pushl	%edi
1260	movl	20(%esp),%ebp
1261	cmpl	$0,20(%ebp)
1262	je	.Lenter_emit
1263	movl	(%ebp),%eax
1264	movl	4(%ebp),%edi
1265	movl	8(%ebp),%ecx
1266	movl	12(%ebp),%edx
1267	movl	16(%ebp),%esi
1268	movl	%edi,%ebx
1269	shll	$26,%edi
1270	shrl	$6,%ebx
1271	addl	%edi,%eax
1272	movl	%ecx,%edi
1273	adcl	$0,%ebx
1274	shll	$20,%edi
1275	shrl	$12,%ecx
1276	addl	%edi,%ebx
1277	movl	%edx,%edi
1278	adcl	$0,%ecx
1279	shll	$14,%edi
1280	shrl	$18,%edx
1281	addl	%edi,%ecx
1282	movl	%esi,%edi
1283	adcl	$0,%edx
1284	shll	$8,%edi
1285	shrl	$24,%esi
1286	addl	%edi,%edx
1287	adcl	$0,%esi
1288	movl	%esi,%edi
1289	andl	$3,%esi
1290	shrl	$2,%edi
1291	leal	(%edi,%edi,4),%ebp
1292	movl	24(%esp),%edi
1293	addl	%ebp,%eax
1294	movl	28(%esp),%ebp
1295	adcl	$0,%ebx
1296	adcl	$0,%ecx
1297	adcl	$0,%edx
1298	adcl	$0,%esi
1299	movd	%eax,%xmm0
1300	addl	$5,%eax
1301	movd	%ebx,%xmm1
1302	adcl	$0,%ebx
1303	movd	%ecx,%xmm2
1304	adcl	$0,%ecx
1305	movd	%edx,%xmm3
1306	adcl	$0,%edx
1307	adcl	$0,%esi
1308	shrl	$2,%esi
1309	negl	%esi
1310	andl	%esi,%eax
1311	andl	%esi,%ebx
1312	andl	%esi,%ecx
1313	andl	%esi,%edx
1314	movl	%eax,(%edi)
1315	movd	%xmm0,%eax
1316	movl	%ebx,4(%edi)
1317	movd	%xmm1,%ebx
1318	movl	%ecx,8(%edi)
1319	movd	%xmm2,%ecx
1320	movl	%edx,12(%edi)
1321	movd	%xmm3,%edx
1322	notl	%esi
1323	andl	%esi,%eax
1324	andl	%esi,%ebx
1325	orl	(%edi),%eax
1326	andl	%esi,%ecx
1327	orl	4(%edi),%ebx
1328	andl	%esi,%edx
1329	orl	8(%edi),%ecx
1330	orl	12(%edi),%edx
1331	addl	(%ebp),%eax
1332	adcl	4(%ebp),%ebx
1333	movl	%eax,(%edi)
1334	adcl	8(%ebp),%ecx
1335	movl	%ebx,4(%edi)
1336	adcl	12(%ebp),%edx
1337	movl	%ecx,8(%edi)
1338	movl	%edx,12(%edi)
1339	popl	%edi
1340	popl	%esi
1341	popl	%ebx
1342	popl	%ebp
1343	ret
1344.size	_poly1305_emit_sse2,.-_poly1305_emit_sse2
1345.align	64
1346.Lconst_sse2:
1347.long	16777216,0,16777216,0,16777216,0,16777216,0
1348.long	0,0,0,0,0,0,0,0
1349.long	67108863,0,67108863,0,67108863,0,67108863,0
1350.long	268435455,268435452,268435452,268435452
1351.byte	80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
1352.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1353.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1354.byte	114,103,62,0
1355.align	4
1356.comm	OPENSSL_ia32cap_P,16,4
1357