ghash-x86.S revision 305153
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/ghash-x86.S 305153 2016-08-31 20:33:59Z jkim $ */
2/* Do not modify. This file is auto-generated from ghash-x86.pl. */
3#ifdef PIC
4.file	"ghash-x86.S"
5.text
6.globl	gcm_gmult_4bit_x86
7.type	gcm_gmult_4bit_x86,@function
8.align	16
9gcm_gmult_4bit_x86:
10.L_gcm_gmult_4bit_x86_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	subl	$84,%esp
16	movl	104(%esp),%edi
17	movl	108(%esp),%esi
18	movl	(%edi),%ebp
19	movl	4(%edi),%edx
20	movl	8(%edi),%ecx
21	movl	12(%edi),%ebx
22	movl	$0,16(%esp)
23	movl	$471859200,20(%esp)
24	movl	$943718400,24(%esp)
25	movl	$610271232,28(%esp)
26	movl	$1887436800,32(%esp)
27	movl	$1822425088,36(%esp)
28	movl	$1220542464,40(%esp)
29	movl	$1423966208,44(%esp)
30	movl	$3774873600,48(%esp)
31	movl	$4246732800,52(%esp)
32	movl	$3644850176,56(%esp)
33	movl	$3311403008,60(%esp)
34	movl	$2441084928,64(%esp)
35	movl	$2376073216,68(%esp)
36	movl	$2847932416,72(%esp)
37	movl	$3051356160,76(%esp)
38	movl	%ebp,(%esp)
39	movl	%edx,4(%esp)
40	movl	%ecx,8(%esp)
41	movl	%ebx,12(%esp)
42	shrl	$20,%ebx
43	andl	$240,%ebx
44	movl	4(%esi,%ebx,1),%ebp
45	movl	(%esi,%ebx,1),%edx
46	movl	12(%esi,%ebx,1),%ecx
47	movl	8(%esi,%ebx,1),%ebx
48	xorl	%eax,%eax
49	movl	$15,%edi
50	jmp	.L000x86_loop
51.align	16
52.L000x86_loop:
53	movb	%bl,%al
54	shrdl	$4,%ecx,%ebx
55	andb	$15,%al
56	shrdl	$4,%edx,%ecx
57	shrdl	$4,%ebp,%edx
58	shrl	$4,%ebp
59	xorl	16(%esp,%eax,4),%ebp
60	movb	(%esp,%edi,1),%al
61	andb	$240,%al
62	xorl	8(%esi,%eax,1),%ebx
63	xorl	12(%esi,%eax,1),%ecx
64	xorl	(%esi,%eax,1),%edx
65	xorl	4(%esi,%eax,1),%ebp
66	decl	%edi
67	js	.L001x86_break
68	movb	%bl,%al
69	shrdl	$4,%ecx,%ebx
70	andb	$15,%al
71	shrdl	$4,%edx,%ecx
72	shrdl	$4,%ebp,%edx
73	shrl	$4,%ebp
74	xorl	16(%esp,%eax,4),%ebp
75	movb	(%esp,%edi,1),%al
76	shlb	$4,%al
77	xorl	8(%esi,%eax,1),%ebx
78	xorl	12(%esi,%eax,1),%ecx
79	xorl	(%esi,%eax,1),%edx
80	xorl	4(%esi,%eax,1),%ebp
81	jmp	.L000x86_loop
82.align	16
83.L001x86_break:
84	bswap	%ebx
85	bswap	%ecx
86	bswap	%edx
87	bswap	%ebp
88	movl	104(%esp),%edi
89	movl	%ebx,12(%edi)
90	movl	%ecx,8(%edi)
91	movl	%edx,4(%edi)
92	movl	%ebp,(%edi)
93	addl	$84,%esp
94	popl	%edi
95	popl	%esi
96	popl	%ebx
97	popl	%ebp
98	ret
99.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
100.globl	gcm_ghash_4bit_x86
101.type	gcm_ghash_4bit_x86,@function
102.align	16
103gcm_ghash_4bit_x86:
104.L_gcm_ghash_4bit_x86_begin:
105	pushl	%ebp
106	pushl	%ebx
107	pushl	%esi
108	pushl	%edi
109	subl	$84,%esp
110	movl	104(%esp),%ebx
111	movl	108(%esp),%esi
112	movl	112(%esp),%edi
113	movl	116(%esp),%ecx
114	addl	%edi,%ecx
115	movl	%ecx,116(%esp)
116	movl	(%ebx),%ebp
117	movl	4(%ebx),%edx
118	movl	8(%ebx),%ecx
119	movl	12(%ebx),%ebx
120	movl	$0,16(%esp)
121	movl	$471859200,20(%esp)
122	movl	$943718400,24(%esp)
123	movl	$610271232,28(%esp)
124	movl	$1887436800,32(%esp)
125	movl	$1822425088,36(%esp)
126	movl	$1220542464,40(%esp)
127	movl	$1423966208,44(%esp)
128	movl	$3774873600,48(%esp)
129	movl	$4246732800,52(%esp)
130	movl	$3644850176,56(%esp)
131	movl	$3311403008,60(%esp)
132	movl	$2441084928,64(%esp)
133	movl	$2376073216,68(%esp)
134	movl	$2847932416,72(%esp)
135	movl	$3051356160,76(%esp)
136.align	16
137.L002x86_outer_loop:
138	xorl	12(%edi),%ebx
139	xorl	8(%edi),%ecx
140	xorl	4(%edi),%edx
141	xorl	(%edi),%ebp
142	movl	%ebx,12(%esp)
143	movl	%ecx,8(%esp)
144	movl	%edx,4(%esp)
145	movl	%ebp,(%esp)
146	shrl	$20,%ebx
147	andl	$240,%ebx
148	movl	4(%esi,%ebx,1),%ebp
149	movl	(%esi,%ebx,1),%edx
150	movl	12(%esi,%ebx,1),%ecx
151	movl	8(%esi,%ebx,1),%ebx
152	xorl	%eax,%eax
153	movl	$15,%edi
154	jmp	.L003x86_loop
155.align	16
156.L003x86_loop:
157	movb	%bl,%al
158	shrdl	$4,%ecx,%ebx
159	andb	$15,%al
160	shrdl	$4,%edx,%ecx
161	shrdl	$4,%ebp,%edx
162	shrl	$4,%ebp
163	xorl	16(%esp,%eax,4),%ebp
164	movb	(%esp,%edi,1),%al
165	andb	$240,%al
166	xorl	8(%esi,%eax,1),%ebx
167	xorl	12(%esi,%eax,1),%ecx
168	xorl	(%esi,%eax,1),%edx
169	xorl	4(%esi,%eax,1),%ebp
170	decl	%edi
171	js	.L004x86_break
172	movb	%bl,%al
173	shrdl	$4,%ecx,%ebx
174	andb	$15,%al
175	shrdl	$4,%edx,%ecx
176	shrdl	$4,%ebp,%edx
177	shrl	$4,%ebp
178	xorl	16(%esp,%eax,4),%ebp
179	movb	(%esp,%edi,1),%al
180	shlb	$4,%al
181	xorl	8(%esi,%eax,1),%ebx
182	xorl	12(%esi,%eax,1),%ecx
183	xorl	(%esi,%eax,1),%edx
184	xorl	4(%esi,%eax,1),%ebp
185	jmp	.L003x86_loop
186.align	16
187.L004x86_break:
188	bswap	%ebx
189	bswap	%ecx
190	bswap	%edx
191	bswap	%ebp
192	movl	112(%esp),%edi
193	leal	16(%edi),%edi
194	cmpl	116(%esp),%edi
195	movl	%edi,112(%esp)
196	jb	.L002x86_outer_loop
197	movl	104(%esp),%edi
198	movl	%ebx,12(%edi)
199	movl	%ecx,8(%edi)
200	movl	%edx,4(%edi)
201	movl	%ebp,(%edi)
202	addl	$84,%esp
203	popl	%edi
204	popl	%esi
205	popl	%ebx
206	popl	%ebp
207	ret
208.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
209.globl	gcm_gmult_4bit_mmx
210.type	gcm_gmult_4bit_mmx,@function
211.align	16
212gcm_gmult_4bit_mmx:
213.L_gcm_gmult_4bit_mmx_begin:
214	pushl	%ebp
215	pushl	%ebx
216	pushl	%esi
217	pushl	%edi
218	movl	20(%esp),%edi
219	movl	24(%esp),%esi
220	call	.L005pic_point
221.L005pic_point:
222	popl	%eax
223	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
224	movzbl	15(%edi),%ebx
225	xorl	%ecx,%ecx
226	movl	%ebx,%edx
227	movb	%dl,%cl
228	movl	$14,%ebp
229	shlb	$4,%cl
230	andl	$240,%edx
231	movq	8(%esi,%ecx,1),%mm0
232	movq	(%esi,%ecx,1),%mm1
233	movd	%mm0,%ebx
234	jmp	.L006mmx_loop
235.align	16
236.L006mmx_loop:
237	psrlq	$4,%mm0
238	andl	$15,%ebx
239	movq	%mm1,%mm2
240	psrlq	$4,%mm1
241	pxor	8(%esi,%edx,1),%mm0
242	movb	(%edi,%ebp,1),%cl
243	psllq	$60,%mm2
244	pxor	(%eax,%ebx,8),%mm1
245	decl	%ebp
246	movd	%mm0,%ebx
247	pxor	(%esi,%edx,1),%mm1
248	movl	%ecx,%edx
249	pxor	%mm2,%mm0
250	js	.L007mmx_break
251	shlb	$4,%cl
252	andl	$15,%ebx
253	psrlq	$4,%mm0
254	andl	$240,%edx
255	movq	%mm1,%mm2
256	psrlq	$4,%mm1
257	pxor	8(%esi,%ecx,1),%mm0
258	psllq	$60,%mm2
259	pxor	(%eax,%ebx,8),%mm1
260	movd	%mm0,%ebx
261	pxor	(%esi,%ecx,1),%mm1
262	pxor	%mm2,%mm0
263	jmp	.L006mmx_loop
264.align	16
265.L007mmx_break:
266	shlb	$4,%cl
267	andl	$15,%ebx
268	psrlq	$4,%mm0
269	andl	$240,%edx
270	movq	%mm1,%mm2
271	psrlq	$4,%mm1
272	pxor	8(%esi,%ecx,1),%mm0
273	psllq	$60,%mm2
274	pxor	(%eax,%ebx,8),%mm1
275	movd	%mm0,%ebx
276	pxor	(%esi,%ecx,1),%mm1
277	pxor	%mm2,%mm0
278	psrlq	$4,%mm0
279	andl	$15,%ebx
280	movq	%mm1,%mm2
281	psrlq	$4,%mm1
282	pxor	8(%esi,%edx,1),%mm0
283	psllq	$60,%mm2
284	pxor	(%eax,%ebx,8),%mm1
285	movd	%mm0,%ebx
286	pxor	(%esi,%edx,1),%mm1
287	pxor	%mm2,%mm0
288	psrlq	$32,%mm0
289	movd	%mm1,%edx
290	psrlq	$32,%mm1
291	movd	%mm0,%ecx
292	movd	%mm1,%ebp
293	bswap	%ebx
294	bswap	%edx
295	bswap	%ecx
296	bswap	%ebp
297	emms
298	movl	%ebx,12(%edi)
299	movl	%edx,4(%edi)
300	movl	%ecx,8(%edi)
301	movl	%ebp,(%edi)
302	popl	%edi
303	popl	%esi
304	popl	%ebx
305	popl	%ebp
306	ret
307.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
308.globl	gcm_ghash_4bit_mmx
309.type	gcm_ghash_4bit_mmx,@function
310.align	16
311gcm_ghash_4bit_mmx:
312.L_gcm_ghash_4bit_mmx_begin:
313	pushl	%ebp
314	pushl	%ebx
315	pushl	%esi
316	pushl	%edi
317	movl	20(%esp),%eax
318	movl	24(%esp),%ebx
319	movl	28(%esp),%ecx
320	movl	32(%esp),%edx
321	movl	%esp,%ebp
322	call	.L008pic_point
323.L008pic_point:
324	popl	%esi
325	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
326	subl	$544,%esp
327	andl	$-64,%esp
328	subl	$16,%esp
329	addl	%ecx,%edx
330	movl	%eax,544(%esp)
331	movl	%edx,552(%esp)
332	movl	%ebp,556(%esp)
333	addl	$128,%ebx
334	leal	144(%esp),%edi
335	leal	400(%esp),%ebp
336	movl	-120(%ebx),%edx
337	movq	-120(%ebx),%mm0
338	movq	-128(%ebx),%mm3
339	shll	$4,%edx
340	movb	%dl,(%esp)
341	movl	-104(%ebx),%edx
342	movq	-104(%ebx),%mm2
343	movq	-112(%ebx),%mm5
344	movq	%mm0,-128(%edi)
345	psrlq	$4,%mm0
346	movq	%mm3,(%edi)
347	movq	%mm3,%mm7
348	psrlq	$4,%mm3
349	shll	$4,%edx
350	movb	%dl,1(%esp)
351	movl	-88(%ebx),%edx
352	movq	-88(%ebx),%mm1
353	psllq	$60,%mm7
354	movq	-96(%ebx),%mm4
355	por	%mm7,%mm0
356	movq	%mm2,-120(%edi)
357	psrlq	$4,%mm2
358	movq	%mm5,8(%edi)
359	movq	%mm5,%mm6
360	movq	%mm0,-128(%ebp)
361	psrlq	$4,%mm5
362	movq	%mm3,(%ebp)
363	shll	$4,%edx
364	movb	%dl,2(%esp)
365	movl	-72(%ebx),%edx
366	movq	-72(%ebx),%mm0
367	psllq	$60,%mm6
368	movq	-80(%ebx),%mm3
369	por	%mm6,%mm2
370	movq	%mm1,-112(%edi)
371	psrlq	$4,%mm1
372	movq	%mm4,16(%edi)
373	movq	%mm4,%mm7
374	movq	%mm2,-120(%ebp)
375	psrlq	$4,%mm4
376	movq	%mm5,8(%ebp)
377	shll	$4,%edx
378	movb	%dl,3(%esp)
379	movl	-56(%ebx),%edx
380	movq	-56(%ebx),%mm2
381	psllq	$60,%mm7
382	movq	-64(%ebx),%mm5
383	por	%mm7,%mm1
384	movq	%mm0,-104(%edi)
385	psrlq	$4,%mm0
386	movq	%mm3,24(%edi)
387	movq	%mm3,%mm6
388	movq	%mm1,-112(%ebp)
389	psrlq	$4,%mm3
390	movq	%mm4,16(%ebp)
391	shll	$4,%edx
392	movb	%dl,4(%esp)
393	movl	-40(%ebx),%edx
394	movq	-40(%ebx),%mm1
395	psllq	$60,%mm6
396	movq	-48(%ebx),%mm4
397	por	%mm6,%mm0
398	movq	%mm2,-96(%edi)
399	psrlq	$4,%mm2
400	movq	%mm5,32(%edi)
401	movq	%mm5,%mm7
402	movq	%mm0,-104(%ebp)
403	psrlq	$4,%mm5
404	movq	%mm3,24(%ebp)
405	shll	$4,%edx
406	movb	%dl,5(%esp)
407	movl	-24(%ebx),%edx
408	movq	-24(%ebx),%mm0
409	psllq	$60,%mm7
410	movq	-32(%ebx),%mm3
411	por	%mm7,%mm2
412	movq	%mm1,-88(%edi)
413	psrlq	$4,%mm1
414	movq	%mm4,40(%edi)
415	movq	%mm4,%mm6
416	movq	%mm2,-96(%ebp)
417	psrlq	$4,%mm4
418	movq	%mm5,32(%ebp)
419	shll	$4,%edx
420	movb	%dl,6(%esp)
421	movl	-8(%ebx),%edx
422	movq	-8(%ebx),%mm2
423	psllq	$60,%mm6
424	movq	-16(%ebx),%mm5
425	por	%mm6,%mm1
426	movq	%mm0,-80(%edi)
427	psrlq	$4,%mm0
428	movq	%mm3,48(%edi)
429	movq	%mm3,%mm7
430	movq	%mm1,-88(%ebp)
431	psrlq	$4,%mm3
432	movq	%mm4,40(%ebp)
433	shll	$4,%edx
434	movb	%dl,7(%esp)
435	movl	8(%ebx),%edx
436	movq	8(%ebx),%mm1
437	psllq	$60,%mm7
438	movq	(%ebx),%mm4
439	por	%mm7,%mm0
440	movq	%mm2,-72(%edi)
441	psrlq	$4,%mm2
442	movq	%mm5,56(%edi)
443	movq	%mm5,%mm6
444	movq	%mm0,-80(%ebp)
445	psrlq	$4,%mm5
446	movq	%mm3,48(%ebp)
447	shll	$4,%edx
448	movb	%dl,8(%esp)
449	movl	24(%ebx),%edx
450	movq	24(%ebx),%mm0
451	psllq	$60,%mm6
452	movq	16(%ebx),%mm3
453	por	%mm6,%mm2
454	movq	%mm1,-64(%edi)
455	psrlq	$4,%mm1
456	movq	%mm4,64(%edi)
457	movq	%mm4,%mm7
458	movq	%mm2,-72(%ebp)
459	psrlq	$4,%mm4
460	movq	%mm5,56(%ebp)
461	shll	$4,%edx
462	movb	%dl,9(%esp)
463	movl	40(%ebx),%edx
464	movq	40(%ebx),%mm2
465	psllq	$60,%mm7
466	movq	32(%ebx),%mm5
467	por	%mm7,%mm1
468	movq	%mm0,-56(%edi)
469	psrlq	$4,%mm0
470	movq	%mm3,72(%edi)
471	movq	%mm3,%mm6
472	movq	%mm1,-64(%ebp)
473	psrlq	$4,%mm3
474	movq	%mm4,64(%ebp)
475	shll	$4,%edx
476	movb	%dl,10(%esp)
477	movl	56(%ebx),%edx
478	movq	56(%ebx),%mm1
479	psllq	$60,%mm6
480	movq	48(%ebx),%mm4
481	por	%mm6,%mm0
482	movq	%mm2,-48(%edi)
483	psrlq	$4,%mm2
484	movq	%mm5,80(%edi)
485	movq	%mm5,%mm7
486	movq	%mm0,-56(%ebp)
487	psrlq	$4,%mm5
488	movq	%mm3,72(%ebp)
489	shll	$4,%edx
490	movb	%dl,11(%esp)
491	movl	72(%ebx),%edx
492	movq	72(%ebx),%mm0
493	psllq	$60,%mm7
494	movq	64(%ebx),%mm3
495	por	%mm7,%mm2
496	movq	%mm1,-40(%edi)
497	psrlq	$4,%mm1
498	movq	%mm4,88(%edi)
499	movq	%mm4,%mm6
500	movq	%mm2,-48(%ebp)
501	psrlq	$4,%mm4
502	movq	%mm5,80(%ebp)
503	shll	$4,%edx
504	movb	%dl,12(%esp)
505	movl	88(%ebx),%edx
506	movq	88(%ebx),%mm2
507	psllq	$60,%mm6
508	movq	80(%ebx),%mm5
509	por	%mm6,%mm1
510	movq	%mm0,-32(%edi)
511	psrlq	$4,%mm0
512	movq	%mm3,96(%edi)
513	movq	%mm3,%mm7
514	movq	%mm1,-40(%ebp)
515	psrlq	$4,%mm3
516	movq	%mm4,88(%ebp)
517	shll	$4,%edx
518	movb	%dl,13(%esp)
519	movl	104(%ebx),%edx
520	movq	104(%ebx),%mm1
521	psllq	$60,%mm7
522	movq	96(%ebx),%mm4
523	por	%mm7,%mm0
524	movq	%mm2,-24(%edi)
525	psrlq	$4,%mm2
526	movq	%mm5,104(%edi)
527	movq	%mm5,%mm6
528	movq	%mm0,-32(%ebp)
529	psrlq	$4,%mm5
530	movq	%mm3,96(%ebp)
531	shll	$4,%edx
532	movb	%dl,14(%esp)
533	movl	120(%ebx),%edx
534	movq	120(%ebx),%mm0
535	psllq	$60,%mm6
536	movq	112(%ebx),%mm3
537	por	%mm6,%mm2
538	movq	%mm1,-16(%edi)
539	psrlq	$4,%mm1
540	movq	%mm4,112(%edi)
541	movq	%mm4,%mm7
542	movq	%mm2,-24(%ebp)
543	psrlq	$4,%mm4
544	movq	%mm5,104(%ebp)
545	shll	$4,%edx
546	movb	%dl,15(%esp)
547	psllq	$60,%mm7
548	por	%mm7,%mm1
549	movq	%mm0,-8(%edi)
550	psrlq	$4,%mm0
551	movq	%mm3,120(%edi)
552	movq	%mm3,%mm6
553	movq	%mm1,-16(%ebp)
554	psrlq	$4,%mm3
555	movq	%mm4,112(%ebp)
556	psllq	$60,%mm6
557	por	%mm6,%mm0
558	movq	%mm0,-8(%ebp)
559	movq	%mm3,120(%ebp)
560	movq	(%eax),%mm6
561	movl	8(%eax),%ebx
562	movl	12(%eax),%edx
563.align	16
564.L009outer:
565	xorl	12(%ecx),%edx
566	xorl	8(%ecx),%ebx
567	pxor	(%ecx),%mm6
568	leal	16(%ecx),%ecx
569	movl	%ebx,536(%esp)
570	movq	%mm6,528(%esp)
571	movl	%ecx,548(%esp)
572	xorl	%eax,%eax
573	roll	$8,%edx
574	movb	%dl,%al
575	movl	%eax,%ebp
576	andb	$15,%al
577	shrl	$4,%ebp
578	pxor	%mm0,%mm0
579	roll	$8,%edx
580	pxor	%mm1,%mm1
581	pxor	%mm2,%mm2
582	movq	16(%esp,%eax,8),%mm7
583	movq	144(%esp,%eax,8),%mm6
584	movb	%dl,%al
585	movd	%mm7,%ebx
586	psrlq	$8,%mm7
587	movq	%mm6,%mm3
588	movl	%eax,%edi
589	psrlq	$8,%mm6
590	pxor	272(%esp,%ebp,8),%mm7
591	andb	$15,%al
592	psllq	$56,%mm3
593	shrl	$4,%edi
594	pxor	16(%esp,%eax,8),%mm7
595	roll	$8,%edx
596	pxor	144(%esp,%eax,8),%mm6
597	pxor	%mm3,%mm7
598	pxor	400(%esp,%ebp,8),%mm6
599	xorb	(%esp,%ebp,1),%bl
600	movb	%dl,%al
601	movd	%mm7,%ecx
602	movzbl	%bl,%ebx
603	psrlq	$8,%mm7
604	movq	%mm6,%mm3
605	movl	%eax,%ebp
606	psrlq	$8,%mm6
607	pxor	272(%esp,%edi,8),%mm7
608	andb	$15,%al
609	psllq	$56,%mm3
610	shrl	$4,%ebp
611	pinsrw	$2,(%esi,%ebx,2),%mm2
612	pxor	16(%esp,%eax,8),%mm7
613	roll	$8,%edx
614	pxor	144(%esp,%eax,8),%mm6
615	pxor	%mm3,%mm7
616	pxor	400(%esp,%edi,8),%mm6
617	xorb	(%esp,%edi,1),%cl
618	movb	%dl,%al
619	movl	536(%esp),%edx
620	movd	%mm7,%ebx
621	movzbl	%cl,%ecx
622	psrlq	$8,%mm7
623	movq	%mm6,%mm3
624	movl	%eax,%edi
625	psrlq	$8,%mm6
626	pxor	272(%esp,%ebp,8),%mm7
627	andb	$15,%al
628	psllq	$56,%mm3
629	pxor	%mm2,%mm6
630	shrl	$4,%edi
631	pinsrw	$2,(%esi,%ecx,2),%mm1
632	pxor	16(%esp,%eax,8),%mm7
633	roll	$8,%edx
634	pxor	144(%esp,%eax,8),%mm6
635	pxor	%mm3,%mm7
636	pxor	400(%esp,%ebp,8),%mm6
637	xorb	(%esp,%ebp,1),%bl
638	movb	%dl,%al
639	movd	%mm7,%ecx
640	movzbl	%bl,%ebx
641	psrlq	$8,%mm7
642	movq	%mm6,%mm3
643	movl	%eax,%ebp
644	psrlq	$8,%mm6
645	pxor	272(%esp,%edi,8),%mm7
646	andb	$15,%al
647	psllq	$56,%mm3
648	pxor	%mm1,%mm6
649	shrl	$4,%ebp
650	pinsrw	$2,(%esi,%ebx,2),%mm0
651	pxor	16(%esp,%eax,8),%mm7
652	roll	$8,%edx
653	pxor	144(%esp,%eax,8),%mm6
654	pxor	%mm3,%mm7
655	pxor	400(%esp,%edi,8),%mm6
656	xorb	(%esp,%edi,1),%cl
657	movb	%dl,%al
658	movd	%mm7,%ebx
659	movzbl	%cl,%ecx
660	psrlq	$8,%mm7
661	movq	%mm6,%mm3
662	movl	%eax,%edi
663	psrlq	$8,%mm6
664	pxor	272(%esp,%ebp,8),%mm7
665	andb	$15,%al
666	psllq	$56,%mm3
667	pxor	%mm0,%mm6
668	shrl	$4,%edi
669	pinsrw	$2,(%esi,%ecx,2),%mm2
670	pxor	16(%esp,%eax,8),%mm7
671	roll	$8,%edx
672	pxor	144(%esp,%eax,8),%mm6
673	pxor	%mm3,%mm7
674	pxor	400(%esp,%ebp,8),%mm6
675	xorb	(%esp,%ebp,1),%bl
676	movb	%dl,%al
677	movd	%mm7,%ecx
678	movzbl	%bl,%ebx
679	psrlq	$8,%mm7
680	movq	%mm6,%mm3
681	movl	%eax,%ebp
682	psrlq	$8,%mm6
683	pxor	272(%esp,%edi,8),%mm7
684	andb	$15,%al
685	psllq	$56,%mm3
686	pxor	%mm2,%mm6
687	shrl	$4,%ebp
688	pinsrw	$2,(%esi,%ebx,2),%mm1
689	pxor	16(%esp,%eax,8),%mm7
690	roll	$8,%edx
691	pxor	144(%esp,%eax,8),%mm6
692	pxor	%mm3,%mm7
693	pxor	400(%esp,%edi,8),%mm6
694	xorb	(%esp,%edi,1),%cl
695	movb	%dl,%al
696	movl	532(%esp),%edx
697	movd	%mm7,%ebx
698	movzbl	%cl,%ecx
699	psrlq	$8,%mm7
700	movq	%mm6,%mm3
701	movl	%eax,%edi
702	psrlq	$8,%mm6
703	pxor	272(%esp,%ebp,8),%mm7
704	andb	$15,%al
705	psllq	$56,%mm3
706	pxor	%mm1,%mm6
707	shrl	$4,%edi
708	pinsrw	$2,(%esi,%ecx,2),%mm0
709	pxor	16(%esp,%eax,8),%mm7
710	roll	$8,%edx
711	pxor	144(%esp,%eax,8),%mm6
712	pxor	%mm3,%mm7
713	pxor	400(%esp,%ebp,8),%mm6
714	xorb	(%esp,%ebp,1),%bl
715	movb	%dl,%al
716	movd	%mm7,%ecx
717	movzbl	%bl,%ebx
718	psrlq	$8,%mm7
719	movq	%mm6,%mm3
720	movl	%eax,%ebp
721	psrlq	$8,%mm6
722	pxor	272(%esp,%edi,8),%mm7
723	andb	$15,%al
724	psllq	$56,%mm3
725	pxor	%mm0,%mm6
726	shrl	$4,%ebp
727	pinsrw	$2,(%esi,%ebx,2),%mm2
728	pxor	16(%esp,%eax,8),%mm7
729	roll	$8,%edx
730	pxor	144(%esp,%eax,8),%mm6
731	pxor	%mm3,%mm7
732	pxor	400(%esp,%edi,8),%mm6
733	xorb	(%esp,%edi,1),%cl
734	movb	%dl,%al
735	movd	%mm7,%ebx
736	movzbl	%cl,%ecx
737	psrlq	$8,%mm7
738	movq	%mm6,%mm3
739	movl	%eax,%edi
740	psrlq	$8,%mm6
741	pxor	272(%esp,%ebp,8),%mm7
742	andb	$15,%al
743	psllq	$56,%mm3
744	pxor	%mm2,%mm6
745	shrl	$4,%edi
746	pinsrw	$2,(%esi,%ecx,2),%mm1
747	pxor	16(%esp,%eax,8),%mm7
748	roll	$8,%edx
749	pxor	144(%esp,%eax,8),%mm6
750	pxor	%mm3,%mm7
751	pxor	400(%esp,%ebp,8),%mm6
752	xorb	(%esp,%ebp,1),%bl
753	movb	%dl,%al
754	movd	%mm7,%ecx
755	movzbl	%bl,%ebx
756	psrlq	$8,%mm7
757	movq	%mm6,%mm3
758	movl	%eax,%ebp
759	psrlq	$8,%mm6
760	pxor	272(%esp,%edi,8),%mm7
761	andb	$15,%al
762	psllq	$56,%mm3
763	pxor	%mm1,%mm6
764	shrl	$4,%ebp
765	pinsrw	$2,(%esi,%ebx,2),%mm0
766	pxor	16(%esp,%eax,8),%mm7
767	roll	$8,%edx
768	pxor	144(%esp,%eax,8),%mm6
769	pxor	%mm3,%mm7
770	pxor	400(%esp,%edi,8),%mm6
771	xorb	(%esp,%edi,1),%cl
772	movb	%dl,%al
773	movl	528(%esp),%edx
774	movd	%mm7,%ebx
775	movzbl	%cl,%ecx
776	psrlq	$8,%mm7
777	movq	%mm6,%mm3
778	movl	%eax,%edi
779	psrlq	$8,%mm6
780	pxor	272(%esp,%ebp,8),%mm7
781	andb	$15,%al
782	psllq	$56,%mm3
783	pxor	%mm0,%mm6
784	shrl	$4,%edi
785	pinsrw	$2,(%esi,%ecx,2),%mm2
786	pxor	16(%esp,%eax,8),%mm7
787	roll	$8,%edx
788	pxor	144(%esp,%eax,8),%mm6
789	pxor	%mm3,%mm7
790	pxor	400(%esp,%ebp,8),%mm6
791	xorb	(%esp,%ebp,1),%bl
792	movb	%dl,%al
793	movd	%mm7,%ecx
794	movzbl	%bl,%ebx
795	psrlq	$8,%mm7
796	movq	%mm6,%mm3
797	movl	%eax,%ebp
798	psrlq	$8,%mm6
799	pxor	272(%esp,%edi,8),%mm7
800	andb	$15,%al
801	psllq	$56,%mm3
802	pxor	%mm2,%mm6
803	shrl	$4,%ebp
804	pinsrw	$2,(%esi,%ebx,2),%mm1
805	pxor	16(%esp,%eax,8),%mm7
806	roll	$8,%edx
807	pxor	144(%esp,%eax,8),%mm6
808	pxor	%mm3,%mm7
809	pxor	400(%esp,%edi,8),%mm6
810	xorb	(%esp,%edi,1),%cl
811	movb	%dl,%al
812	movd	%mm7,%ebx
813	movzbl	%cl,%ecx
814	psrlq	$8,%mm7
815	movq	%mm6,%mm3
816	movl	%eax,%edi
817	psrlq	$8,%mm6
818	pxor	272(%esp,%ebp,8),%mm7
819	andb	$15,%al
820	psllq	$56,%mm3
821	pxor	%mm1,%mm6
822	shrl	$4,%edi
823	pinsrw	$2,(%esi,%ecx,2),%mm0
824	pxor	16(%esp,%eax,8),%mm7
825	roll	$8,%edx
826	pxor	144(%esp,%eax,8),%mm6
827	pxor	%mm3,%mm7
828	pxor	400(%esp,%ebp,8),%mm6
829	xorb	(%esp,%ebp,1),%bl
830	movb	%dl,%al
831	movd	%mm7,%ecx
832	movzbl	%bl,%ebx
833	psrlq	$8,%mm7
834	movq	%mm6,%mm3
835	movl	%eax,%ebp
836	psrlq	$8,%mm6
837	pxor	272(%esp,%edi,8),%mm7
838	andb	$15,%al
839	psllq	$56,%mm3
840	pxor	%mm0,%mm6
841	shrl	$4,%ebp
842	pinsrw	$2,(%esi,%ebx,2),%mm2
843	pxor	16(%esp,%eax,8),%mm7
844	roll	$8,%edx
845	pxor	144(%esp,%eax,8),%mm6
846	pxor	%mm3,%mm7
847	pxor	400(%esp,%edi,8),%mm6
848	xorb	(%esp,%edi,1),%cl
849	movb	%dl,%al
850	movl	524(%esp),%edx
851	movd	%mm7,%ebx
852	movzbl	%cl,%ecx
853	psrlq	$8,%mm7
854	movq	%mm6,%mm3
855	movl	%eax,%edi
856	psrlq	$8,%mm6
857	pxor	272(%esp,%ebp,8),%mm7
858	andb	$15,%al
859	psllq	$56,%mm3
860	pxor	%mm2,%mm6
861	shrl	$4,%edi
862	pinsrw	$2,(%esi,%ecx,2),%mm1
863	pxor	16(%esp,%eax,8),%mm7
864	pxor	144(%esp,%eax,8),%mm6
865	xorb	(%esp,%ebp,1),%bl
866	pxor	%mm3,%mm7
867	pxor	400(%esp,%ebp,8),%mm6
868	movzbl	%bl,%ebx
869	pxor	%mm2,%mm2
870	psllq	$4,%mm1
871	movd	%mm7,%ecx
872	psrlq	$4,%mm7
873	movq	%mm6,%mm3
874	psrlq	$4,%mm6
875	shll	$4,%ecx
876	pxor	16(%esp,%edi,8),%mm7
877	psllq	$60,%mm3
878	movzbl	%cl,%ecx
879	pxor	%mm3,%mm7
880	pxor	144(%esp,%edi,8),%mm6
881	pinsrw	$2,(%esi,%ebx,2),%mm0
882	pxor	%mm1,%mm6
883	movd	%mm7,%edx
884	pinsrw	$3,(%esi,%ecx,2),%mm2
885	psllq	$12,%mm0
886	pxor	%mm0,%mm6
887	psrlq	$32,%mm7
888	pxor	%mm2,%mm6
889	movl	548(%esp),%ecx
890	movd	%mm7,%ebx
891	movq	%mm6,%mm3
892	psllw	$8,%mm6
893	psrlw	$8,%mm3
894	por	%mm3,%mm6
895	bswap	%edx
896	pshufw	$27,%mm6,%mm6
897	bswap	%ebx
898	cmpl	552(%esp),%ecx
899	jne	.L009outer
900	movl	544(%esp),%eax
901	movl	%edx,12(%eax)
902	movl	%ebx,8(%eax)
903	movq	%mm6,(%eax)
904	movl	556(%esp),%esp
905	emms
906	popl	%edi
907	popl	%esi
908	popl	%ebx
909	popl	%ebp
910	ret
911.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
912.globl	gcm_init_clmul
913.type	gcm_init_clmul,@function
914.align	16
915gcm_init_clmul:
916.L_gcm_init_clmul_begin:
917	movl	4(%esp),%edx
918	movl	8(%esp),%eax
919	call	.L010pic
920.L010pic:
921	popl	%ecx
922	leal	.Lbswap-.L010pic(%ecx),%ecx
923	movdqu	(%eax),%xmm2
924	pshufd	$78,%xmm2,%xmm2
925	pshufd	$255,%xmm2,%xmm4
926	movdqa	%xmm2,%xmm3
927	psllq	$1,%xmm2
928	pxor	%xmm5,%xmm5
929	psrlq	$63,%xmm3
930	pcmpgtd	%xmm4,%xmm5
931	pslldq	$8,%xmm3
932	por	%xmm3,%xmm2
933	pand	16(%ecx),%xmm5
934	pxor	%xmm5,%xmm2
935	movdqa	%xmm2,%xmm0
936	movdqa	%xmm0,%xmm1
937	pshufd	$78,%xmm0,%xmm3
938	pshufd	$78,%xmm2,%xmm4
939	pxor	%xmm0,%xmm3
940	pxor	%xmm2,%xmm4
941.byte	102,15,58,68,194,0
942.byte	102,15,58,68,202,17
943.byte	102,15,58,68,220,0
944	xorps	%xmm0,%xmm3
945	xorps	%xmm1,%xmm3
946	movdqa	%xmm3,%xmm4
947	psrldq	$8,%xmm3
948	pslldq	$8,%xmm4
949	pxor	%xmm3,%xmm1
950	pxor	%xmm4,%xmm0
951	movdqa	%xmm0,%xmm4
952	movdqa	%xmm0,%xmm3
953	psllq	$5,%xmm0
954	pxor	%xmm0,%xmm3
955	psllq	$1,%xmm0
956	pxor	%xmm3,%xmm0
957	psllq	$57,%xmm0
958	movdqa	%xmm0,%xmm3
959	pslldq	$8,%xmm0
960	psrldq	$8,%xmm3
961	pxor	%xmm4,%xmm0
962	pxor	%xmm3,%xmm1
963	movdqa	%xmm0,%xmm4
964	psrlq	$1,%xmm0
965	pxor	%xmm4,%xmm1
966	pxor	%xmm0,%xmm4
967	psrlq	$5,%xmm0
968	pxor	%xmm4,%xmm0
969	psrlq	$1,%xmm0
970	pxor	%xmm1,%xmm0
971	pshufd	$78,%xmm2,%xmm3
972	pshufd	$78,%xmm0,%xmm4
973	pxor	%xmm2,%xmm3
974	movdqu	%xmm2,(%edx)
975	pxor	%xmm0,%xmm4
976	movdqu	%xmm0,16(%edx)
977.byte	102,15,58,15,227,8
978	movdqu	%xmm4,32(%edx)
979	ret
980.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
981.globl	gcm_gmult_clmul
982.type	gcm_gmult_clmul,@function
983.align	16
984gcm_gmult_clmul:
985.L_gcm_gmult_clmul_begin:
986	movl	4(%esp),%eax
987	movl	8(%esp),%edx
988	call	.L011pic
989.L011pic:
990	popl	%ecx
991	leal	.Lbswap-.L011pic(%ecx),%ecx
992	movdqu	(%eax),%xmm0
993	movdqa	(%ecx),%xmm5
994	movups	(%edx),%xmm2
995.byte	102,15,56,0,197
996	movups	32(%edx),%xmm4
997	movdqa	%xmm0,%xmm1
998	pshufd	$78,%xmm0,%xmm3
999	pxor	%xmm0,%xmm3
1000.byte	102,15,58,68,194,0
1001.byte	102,15,58,68,202,17
1002.byte	102,15,58,68,220,0
1003	xorps	%xmm0,%xmm3
1004	xorps	%xmm1,%xmm3
1005	movdqa	%xmm3,%xmm4
1006	psrldq	$8,%xmm3
1007	pslldq	$8,%xmm4
1008	pxor	%xmm3,%xmm1
1009	pxor	%xmm4,%xmm0
1010	movdqa	%xmm0,%xmm4
1011	movdqa	%xmm0,%xmm3
1012	psllq	$5,%xmm0
1013	pxor	%xmm0,%xmm3
1014	psllq	$1,%xmm0
1015	pxor	%xmm3,%xmm0
1016	psllq	$57,%xmm0
1017	movdqa	%xmm0,%xmm3
1018	pslldq	$8,%xmm0
1019	psrldq	$8,%xmm3
1020	pxor	%xmm4,%xmm0
1021	pxor	%xmm3,%xmm1
1022	movdqa	%xmm0,%xmm4
1023	psrlq	$1,%xmm0
1024	pxor	%xmm4,%xmm1
1025	pxor	%xmm0,%xmm4
1026	psrlq	$5,%xmm0
1027	pxor	%xmm4,%xmm0
1028	psrlq	$1,%xmm0
1029	pxor	%xmm1,%xmm0
1030.byte	102,15,56,0,197
1031	movdqu	%xmm0,(%eax)
1032	ret
1033.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1034.globl	gcm_ghash_clmul
1035.type	gcm_ghash_clmul,@function
1036.align	16
1037gcm_ghash_clmul:
1038.L_gcm_ghash_clmul_begin:
1039	pushl	%ebp
1040	pushl	%ebx
1041	pushl	%esi
1042	pushl	%edi
1043	movl	20(%esp),%eax
1044	movl	24(%esp),%edx
1045	movl	28(%esp),%esi
1046	movl	32(%esp),%ebx
1047	call	.L012pic
1048.L012pic:
1049	popl	%ecx
1050	leal	.Lbswap-.L012pic(%ecx),%ecx
1051	movdqu	(%eax),%xmm0
1052	movdqa	(%ecx),%xmm5
1053	movdqu	(%edx),%xmm2
1054.byte	102,15,56,0,197
1055	subl	$16,%ebx
1056	jz	.L013odd_tail
1057	movdqu	(%esi),%xmm3
1058	movdqu	16(%esi),%xmm6
1059.byte	102,15,56,0,221
1060.byte	102,15,56,0,245
1061	movdqu	32(%edx),%xmm5
1062	pxor	%xmm3,%xmm0
1063	pshufd	$78,%xmm6,%xmm3
1064	movdqa	%xmm6,%xmm7
1065	pxor	%xmm6,%xmm3
1066	leal	32(%esi),%esi
1067.byte	102,15,58,68,242,0
1068.byte	102,15,58,68,250,17
1069.byte	102,15,58,68,221,0
1070	movups	16(%edx),%xmm2
1071	nop
1072	subl	$32,%ebx
1073	jbe	.L014even_tail
1074	jmp	.L015mod_loop
1075.align	32
1076.L015mod_loop:
1077	pshufd	$78,%xmm0,%xmm4
1078	movdqa	%xmm0,%xmm1
1079	pxor	%xmm0,%xmm4
1080	nop
1081.byte	102,15,58,68,194,0
1082.byte	102,15,58,68,202,17
1083.byte	102,15,58,68,229,16
1084	movups	(%edx),%xmm2
1085	xorps	%xmm6,%xmm0
1086	movdqa	(%ecx),%xmm5
1087	xorps	%xmm7,%xmm1
1088	movdqu	(%esi),%xmm7
1089	pxor	%xmm0,%xmm3
1090	movdqu	16(%esi),%xmm6
1091	pxor	%xmm1,%xmm3
1092.byte	102,15,56,0,253
1093	pxor	%xmm3,%xmm4
1094	movdqa	%xmm4,%xmm3
1095	psrldq	$8,%xmm4
1096	pslldq	$8,%xmm3
1097	pxor	%xmm4,%xmm1
1098	pxor	%xmm3,%xmm0
1099.byte	102,15,56,0,245
1100	pxor	%xmm7,%xmm1
1101	movdqa	%xmm6,%xmm7
1102	movdqa	%xmm0,%xmm4
1103	movdqa	%xmm0,%xmm3
1104	psllq	$5,%xmm0
1105	pxor	%xmm0,%xmm3
1106	psllq	$1,%xmm0
1107	pxor	%xmm3,%xmm0
1108.byte	102,15,58,68,242,0
1109	movups	32(%edx),%xmm5
1110	psllq	$57,%xmm0
1111	movdqa	%xmm0,%xmm3
1112	pslldq	$8,%xmm0
1113	psrldq	$8,%xmm3
1114	pxor	%xmm4,%xmm0
1115	pxor	%xmm3,%xmm1
1116	pshufd	$78,%xmm7,%xmm3
1117	movdqa	%xmm0,%xmm4
1118	psrlq	$1,%xmm0
1119	pxor	%xmm7,%xmm3
1120	pxor	%xmm4,%xmm1
1121.byte	102,15,58,68,250,17
1122	movups	16(%edx),%xmm2
1123	pxor	%xmm0,%xmm4
1124	psrlq	$5,%xmm0
1125	pxor	%xmm4,%xmm0
1126	psrlq	$1,%xmm0
1127	pxor	%xmm1,%xmm0
1128.byte	102,15,58,68,221,0
1129	leal	32(%esi),%esi
1130	subl	$32,%ebx
1131	ja	.L015mod_loop
1132.L014even_tail:
1133	pshufd	$78,%xmm0,%xmm4
1134	movdqa	%xmm0,%xmm1
1135	pxor	%xmm0,%xmm4
1136.byte	102,15,58,68,194,0
1137.byte	102,15,58,68,202,17
1138.byte	102,15,58,68,229,16
1139	movdqa	(%ecx),%xmm5
1140	xorps	%xmm6,%xmm0
1141	xorps	%xmm7,%xmm1
1142	pxor	%xmm0,%xmm3
1143	pxor	%xmm1,%xmm3
1144	pxor	%xmm3,%xmm4
1145	movdqa	%xmm4,%xmm3
1146	psrldq	$8,%xmm4
1147	pslldq	$8,%xmm3
1148	pxor	%xmm4,%xmm1
1149	pxor	%xmm3,%xmm0
1150	movdqa	%xmm0,%xmm4
1151	movdqa	%xmm0,%xmm3
1152	psllq	$5,%xmm0
1153	pxor	%xmm0,%xmm3
1154	psllq	$1,%xmm0
1155	pxor	%xmm3,%xmm0
1156	psllq	$57,%xmm0
1157	movdqa	%xmm0,%xmm3
1158	pslldq	$8,%xmm0
1159	psrldq	$8,%xmm3
1160	pxor	%xmm4,%xmm0
1161	pxor	%xmm3,%xmm1
1162	movdqa	%xmm0,%xmm4
1163	psrlq	$1,%xmm0
1164	pxor	%xmm4,%xmm1
1165	pxor	%xmm0,%xmm4
1166	psrlq	$5,%xmm0
1167	pxor	%xmm4,%xmm0
1168	psrlq	$1,%xmm0
1169	pxor	%xmm1,%xmm0
1170	testl	%ebx,%ebx
1171	jnz	.L016done
1172	movups	(%edx),%xmm2
1173.L013odd_tail:
1174	movdqu	(%esi),%xmm3
1175.byte	102,15,56,0,221
1176	pxor	%xmm3,%xmm0
1177	movdqa	%xmm0,%xmm1
1178	pshufd	$78,%xmm0,%xmm3
1179	pshufd	$78,%xmm2,%xmm4
1180	pxor	%xmm0,%xmm3
1181	pxor	%xmm2,%xmm4
1182.byte	102,15,58,68,194,0
1183.byte	102,15,58,68,202,17
1184.byte	102,15,58,68,220,0
1185	xorps	%xmm0,%xmm3
1186	xorps	%xmm1,%xmm3
1187	movdqa	%xmm3,%xmm4
1188	psrldq	$8,%xmm3
1189	pslldq	$8,%xmm4
1190	pxor	%xmm3,%xmm1
1191	pxor	%xmm4,%xmm0
1192	movdqa	%xmm0,%xmm4
1193	movdqa	%xmm0,%xmm3
1194	psllq	$5,%xmm0
1195	pxor	%xmm0,%xmm3
1196	psllq	$1,%xmm0
1197	pxor	%xmm3,%xmm0
1198	psllq	$57,%xmm0
1199	movdqa	%xmm0,%xmm3
1200	pslldq	$8,%xmm0
1201	psrldq	$8,%xmm3
1202	pxor	%xmm4,%xmm0
1203	pxor	%xmm3,%xmm1
1204	movdqa	%xmm0,%xmm4
1205	psrlq	$1,%xmm0
1206	pxor	%xmm4,%xmm1
1207	pxor	%xmm0,%xmm4
1208	psrlq	$5,%xmm0
1209	pxor	%xmm4,%xmm0
1210	psrlq	$1,%xmm0
1211	pxor	%xmm1,%xmm0
1212.L016done:
1213.byte	102,15,56,0,197
1214	movdqu	%xmm0,(%eax)
1215	popl	%edi
1216	popl	%esi
1217	popl	%ebx
1218	popl	%ebp
1219	ret
1220.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1221.align	64
1222.Lbswap:
1223.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1224.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1225.align	64
1226.Lrem_8bit:
1227.value	0,450,900,582,1800,1738,1164,1358
1228.value	3600,4050,3476,3158,2328,2266,2716,2910
1229.value	7200,7650,8100,7782,6952,6890,6316,6510
1230.value	4656,5106,4532,4214,5432,5370,5820,6014
1231.value	14400,14722,15300,14854,16200,16010,15564,15630
1232.value	13904,14226,13780,13334,12632,12442,13020,13086
1233.value	9312,9634,10212,9766,9064,8874,8428,8494
1234.value	10864,11186,10740,10294,11640,11450,12028,12094
1235.value	28800,28994,29444,29382,30600,30282,29708,30158
1236.value	32400,32594,32020,31958,31128,30810,31260,31710
1237.value	27808,28002,28452,28390,27560,27242,26668,27118
1238.value	25264,25458,24884,24822,26040,25722,26172,26622
1239.value	18624,18690,19268,19078,20424,19978,19532,19854
1240.value	18128,18194,17748,17558,16856,16410,16988,17310
1241.value	21728,21794,22372,22182,21480,21034,20588,20910
1242.value	23280,23346,22900,22710,24056,23610,24188,24510
1243.value	57600,57538,57988,58182,58888,59338,58764,58446
1244.value	61200,61138,60564,60758,59416,59866,60316,59998
1245.value	64800,64738,65188,65382,64040,64490,63916,63598
1246.value	62256,62194,61620,61814,62520,62970,63420,63102
1247.value	55616,55426,56004,56070,56904,57226,56780,56334
1248.value	55120,54930,54484,54550,53336,53658,54236,53790
1249.value	50528,50338,50916,50982,49768,50090,49644,49198
1250.value	52080,51890,51444,51510,52344,52666,53244,52798
1251.value	37248,36930,37380,37830,38536,38730,38156,38094
1252.value	40848,40530,39956,40406,39064,39258,39708,39646
1253.value	36256,35938,36388,36838,35496,35690,35116,35054
1254.value	33712,33394,32820,33270,33976,34170,34620,34558
1255.value	43456,43010,43588,43910,44744,44810,44364,44174
1256.value	42960,42514,42068,42390,41176,41242,41820,41630
1257.value	46560,46114,46692,47014,45800,45866,45420,45230
1258.value	48112,47666,47220,47542,48376,48442,49020,48830
1259.align	64
1260.Lrem_4bit:
1261.long	0,0,0,471859200,0,943718400,0,610271232
1262.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1263.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1264.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1265.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1266.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1267.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1268.byte	0
1269#else
1270.file	"ghash-x86.S"
1271.text
1272.globl	gcm_gmult_4bit_x86
1273.type	gcm_gmult_4bit_x86,@function
1274.align	16
1275gcm_gmult_4bit_x86:
1276.L_gcm_gmult_4bit_x86_begin:
1277	pushl	%ebp
1278	pushl	%ebx
1279	pushl	%esi
1280	pushl	%edi
1281	subl	$84,%esp
1282	movl	104(%esp),%edi
1283	movl	108(%esp),%esi
1284	movl	(%edi),%ebp
1285	movl	4(%edi),%edx
1286	movl	8(%edi),%ecx
1287	movl	12(%edi),%ebx
1288	movl	$0,16(%esp)
1289	movl	$471859200,20(%esp)
1290	movl	$943718400,24(%esp)
1291	movl	$610271232,28(%esp)
1292	movl	$1887436800,32(%esp)
1293	movl	$1822425088,36(%esp)
1294	movl	$1220542464,40(%esp)
1295	movl	$1423966208,44(%esp)
1296	movl	$3774873600,48(%esp)
1297	movl	$4246732800,52(%esp)
1298	movl	$3644850176,56(%esp)
1299	movl	$3311403008,60(%esp)
1300	movl	$2441084928,64(%esp)
1301	movl	$2376073216,68(%esp)
1302	movl	$2847932416,72(%esp)
1303	movl	$3051356160,76(%esp)
1304	movl	%ebp,(%esp)
1305	movl	%edx,4(%esp)
1306	movl	%ecx,8(%esp)
1307	movl	%ebx,12(%esp)
1308	shrl	$20,%ebx
1309	andl	$240,%ebx
1310	movl	4(%esi,%ebx,1),%ebp
1311	movl	(%esi,%ebx,1),%edx
1312	movl	12(%esi,%ebx,1),%ecx
1313	movl	8(%esi,%ebx,1),%ebx
1314	xorl	%eax,%eax
1315	movl	$15,%edi
1316	jmp	.L000x86_loop
1317.align	16
1318.L000x86_loop:
1319	movb	%bl,%al
1320	shrdl	$4,%ecx,%ebx
1321	andb	$15,%al
1322	shrdl	$4,%edx,%ecx
1323	shrdl	$4,%ebp,%edx
1324	shrl	$4,%ebp
1325	xorl	16(%esp,%eax,4),%ebp
1326	movb	(%esp,%edi,1),%al
1327	andb	$240,%al
1328	xorl	8(%esi,%eax,1),%ebx
1329	xorl	12(%esi,%eax,1),%ecx
1330	xorl	(%esi,%eax,1),%edx
1331	xorl	4(%esi,%eax,1),%ebp
1332	decl	%edi
1333	js	.L001x86_break
1334	movb	%bl,%al
1335	shrdl	$4,%ecx,%ebx
1336	andb	$15,%al
1337	shrdl	$4,%edx,%ecx
1338	shrdl	$4,%ebp,%edx
1339	shrl	$4,%ebp
1340	xorl	16(%esp,%eax,4),%ebp
1341	movb	(%esp,%edi,1),%al
1342	shlb	$4,%al
1343	xorl	8(%esi,%eax,1),%ebx
1344	xorl	12(%esi,%eax,1),%ecx
1345	xorl	(%esi,%eax,1),%edx
1346	xorl	4(%esi,%eax,1),%ebp
1347	jmp	.L000x86_loop
1348.align	16
1349.L001x86_break:
1350	bswap	%ebx
1351	bswap	%ecx
1352	bswap	%edx
1353	bswap	%ebp
1354	movl	104(%esp),%edi
1355	movl	%ebx,12(%edi)
1356	movl	%ecx,8(%edi)
1357	movl	%edx,4(%edi)
1358	movl	%ebp,(%edi)
1359	addl	$84,%esp
1360	popl	%edi
1361	popl	%esi
1362	popl	%ebx
1363	popl	%ebp
1364	ret
1365.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
1366.globl	gcm_ghash_4bit_x86
1367.type	gcm_ghash_4bit_x86,@function
1368.align	16
1369gcm_ghash_4bit_x86:
1370.L_gcm_ghash_4bit_x86_begin:
1371	pushl	%ebp
1372	pushl	%ebx
1373	pushl	%esi
1374	pushl	%edi
1375	subl	$84,%esp
1376	movl	104(%esp),%ebx
1377	movl	108(%esp),%esi
1378	movl	112(%esp),%edi
1379	movl	116(%esp),%ecx
1380	addl	%edi,%ecx
1381	movl	%ecx,116(%esp)
1382	movl	(%ebx),%ebp
1383	movl	4(%ebx),%edx
1384	movl	8(%ebx),%ecx
1385	movl	12(%ebx),%ebx
1386	movl	$0,16(%esp)
1387	movl	$471859200,20(%esp)
1388	movl	$943718400,24(%esp)
1389	movl	$610271232,28(%esp)
1390	movl	$1887436800,32(%esp)
1391	movl	$1822425088,36(%esp)
1392	movl	$1220542464,40(%esp)
1393	movl	$1423966208,44(%esp)
1394	movl	$3774873600,48(%esp)
1395	movl	$4246732800,52(%esp)
1396	movl	$3644850176,56(%esp)
1397	movl	$3311403008,60(%esp)
1398	movl	$2441084928,64(%esp)
1399	movl	$2376073216,68(%esp)
1400	movl	$2847932416,72(%esp)
1401	movl	$3051356160,76(%esp)
1402.align	16
1403.L002x86_outer_loop:
1404	xorl	12(%edi),%ebx
1405	xorl	8(%edi),%ecx
1406	xorl	4(%edi),%edx
1407	xorl	(%edi),%ebp
1408	movl	%ebx,12(%esp)
1409	movl	%ecx,8(%esp)
1410	movl	%edx,4(%esp)
1411	movl	%ebp,(%esp)
1412	shrl	$20,%ebx
1413	andl	$240,%ebx
1414	movl	4(%esi,%ebx,1),%ebp
1415	movl	(%esi,%ebx,1),%edx
1416	movl	12(%esi,%ebx,1),%ecx
1417	movl	8(%esi,%ebx,1),%ebx
1418	xorl	%eax,%eax
1419	movl	$15,%edi
1420	jmp	.L003x86_loop
1421.align	16
1422.L003x86_loop:
1423	movb	%bl,%al
1424	shrdl	$4,%ecx,%ebx
1425	andb	$15,%al
1426	shrdl	$4,%edx,%ecx
1427	shrdl	$4,%ebp,%edx
1428	shrl	$4,%ebp
1429	xorl	16(%esp,%eax,4),%ebp
1430	movb	(%esp,%edi,1),%al
1431	andb	$240,%al
1432	xorl	8(%esi,%eax,1),%ebx
1433	xorl	12(%esi,%eax,1),%ecx
1434	xorl	(%esi,%eax,1),%edx
1435	xorl	4(%esi,%eax,1),%ebp
1436	decl	%edi
1437	js	.L004x86_break
1438	movb	%bl,%al
1439	shrdl	$4,%ecx,%ebx
1440	andb	$15,%al
1441	shrdl	$4,%edx,%ecx
1442	shrdl	$4,%ebp,%edx
1443	shrl	$4,%ebp
1444	xorl	16(%esp,%eax,4),%ebp
1445	movb	(%esp,%edi,1),%al
1446	shlb	$4,%al
1447	xorl	8(%esi,%eax,1),%ebx
1448	xorl	12(%esi,%eax,1),%ecx
1449	xorl	(%esi,%eax,1),%edx
1450	xorl	4(%esi,%eax,1),%ebp
1451	jmp	.L003x86_loop
1452.align	16
1453.L004x86_break:
1454	bswap	%ebx
1455	bswap	%ecx
1456	bswap	%edx
1457	bswap	%ebp
1458	movl	112(%esp),%edi
1459	leal	16(%edi),%edi
1460	cmpl	116(%esp),%edi
1461	movl	%edi,112(%esp)
1462	jb	.L002x86_outer_loop
1463	movl	104(%esp),%edi
1464	movl	%ebx,12(%edi)
1465	movl	%ecx,8(%edi)
1466	movl	%edx,4(%edi)
1467	movl	%ebp,(%edi)
1468	addl	$84,%esp
1469	popl	%edi
1470	popl	%esi
1471	popl	%ebx
1472	popl	%ebp
1473	ret
1474.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
1475.globl	gcm_gmult_4bit_mmx
1476.type	gcm_gmult_4bit_mmx,@function
1477.align	16
1478gcm_gmult_4bit_mmx:
1479.L_gcm_gmult_4bit_mmx_begin:
1480	pushl	%ebp
1481	pushl	%ebx
1482	pushl	%esi
1483	pushl	%edi
1484	movl	20(%esp),%edi
1485	movl	24(%esp),%esi
1486	call	.L005pic_point
1487.L005pic_point:
1488	popl	%eax
1489	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
1490	movzbl	15(%edi),%ebx
1491	xorl	%ecx,%ecx
1492	movl	%ebx,%edx
1493	movb	%dl,%cl
1494	movl	$14,%ebp
1495	shlb	$4,%cl
1496	andl	$240,%edx
1497	movq	8(%esi,%ecx,1),%mm0
1498	movq	(%esi,%ecx,1),%mm1
1499	movd	%mm0,%ebx
1500	jmp	.L006mmx_loop
1501.align	16
1502.L006mmx_loop:
1503	psrlq	$4,%mm0
1504	andl	$15,%ebx
1505	movq	%mm1,%mm2
1506	psrlq	$4,%mm1
1507	pxor	8(%esi,%edx,1),%mm0
1508	movb	(%edi,%ebp,1),%cl
1509	psllq	$60,%mm2
1510	pxor	(%eax,%ebx,8),%mm1
1511	decl	%ebp
1512	movd	%mm0,%ebx
1513	pxor	(%esi,%edx,1),%mm1
1514	movl	%ecx,%edx
1515	pxor	%mm2,%mm0
1516	js	.L007mmx_break
1517	shlb	$4,%cl
1518	andl	$15,%ebx
1519	psrlq	$4,%mm0
1520	andl	$240,%edx
1521	movq	%mm1,%mm2
1522	psrlq	$4,%mm1
1523	pxor	8(%esi,%ecx,1),%mm0
1524	psllq	$60,%mm2
1525	pxor	(%eax,%ebx,8),%mm1
1526	movd	%mm0,%ebx
1527	pxor	(%esi,%ecx,1),%mm1
1528	pxor	%mm2,%mm0
1529	jmp	.L006mmx_loop
1530.align	16
1531.L007mmx_break:
1532	shlb	$4,%cl
1533	andl	$15,%ebx
1534	psrlq	$4,%mm0
1535	andl	$240,%edx
1536	movq	%mm1,%mm2
1537	psrlq	$4,%mm1
1538	pxor	8(%esi,%ecx,1),%mm0
1539	psllq	$60,%mm2
1540	pxor	(%eax,%ebx,8),%mm1
1541	movd	%mm0,%ebx
1542	pxor	(%esi,%ecx,1),%mm1
1543	pxor	%mm2,%mm0
1544	psrlq	$4,%mm0
1545	andl	$15,%ebx
1546	movq	%mm1,%mm2
1547	psrlq	$4,%mm1
1548	pxor	8(%esi,%edx,1),%mm0
1549	psllq	$60,%mm2
1550	pxor	(%eax,%ebx,8),%mm1
1551	movd	%mm0,%ebx
1552	pxor	(%esi,%edx,1),%mm1
1553	pxor	%mm2,%mm0
1554	psrlq	$32,%mm0
1555	movd	%mm1,%edx
1556	psrlq	$32,%mm1
1557	movd	%mm0,%ecx
1558	movd	%mm1,%ebp
1559	bswap	%ebx
1560	bswap	%edx
1561	bswap	%ecx
1562	bswap	%ebp
1563	emms
1564	movl	%ebx,12(%edi)
1565	movl	%edx,4(%edi)
1566	movl	%ecx,8(%edi)
1567	movl	%ebp,(%edi)
1568	popl	%edi
1569	popl	%esi
1570	popl	%ebx
1571	popl	%ebp
1572	ret
1573.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
1574.globl	gcm_ghash_4bit_mmx
1575.type	gcm_ghash_4bit_mmx,@function
1576.align	16
1577gcm_ghash_4bit_mmx:
1578.L_gcm_ghash_4bit_mmx_begin:
1579	pushl	%ebp
1580	pushl	%ebx
1581	pushl	%esi
1582	pushl	%edi
1583	movl	20(%esp),%eax
1584	movl	24(%esp),%ebx
1585	movl	28(%esp),%ecx
1586	movl	32(%esp),%edx
1587	movl	%esp,%ebp
1588	call	.L008pic_point
1589.L008pic_point:
1590	popl	%esi
1591	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
1592	subl	$544,%esp
1593	andl	$-64,%esp
1594	subl	$16,%esp
1595	addl	%ecx,%edx
1596	movl	%eax,544(%esp)
1597	movl	%edx,552(%esp)
1598	movl	%ebp,556(%esp)
1599	addl	$128,%ebx
1600	leal	144(%esp),%edi
1601	leal	400(%esp),%ebp
1602	movl	-120(%ebx),%edx
1603	movq	-120(%ebx),%mm0
1604	movq	-128(%ebx),%mm3
1605	shll	$4,%edx
1606	movb	%dl,(%esp)
1607	movl	-104(%ebx),%edx
1608	movq	-104(%ebx),%mm2
1609	movq	-112(%ebx),%mm5
1610	movq	%mm0,-128(%edi)
1611	psrlq	$4,%mm0
1612	movq	%mm3,(%edi)
1613	movq	%mm3,%mm7
1614	psrlq	$4,%mm3
1615	shll	$4,%edx
1616	movb	%dl,1(%esp)
1617	movl	-88(%ebx),%edx
1618	movq	-88(%ebx),%mm1
1619	psllq	$60,%mm7
1620	movq	-96(%ebx),%mm4
1621	por	%mm7,%mm0
1622	movq	%mm2,-120(%edi)
1623	psrlq	$4,%mm2
1624	movq	%mm5,8(%edi)
1625	movq	%mm5,%mm6
1626	movq	%mm0,-128(%ebp)
1627	psrlq	$4,%mm5
1628	movq	%mm3,(%ebp)
1629	shll	$4,%edx
1630	movb	%dl,2(%esp)
1631	movl	-72(%ebx),%edx
1632	movq	-72(%ebx),%mm0
1633	psllq	$60,%mm6
1634	movq	-80(%ebx),%mm3
1635	por	%mm6,%mm2
1636	movq	%mm1,-112(%edi)
1637	psrlq	$4,%mm1
1638	movq	%mm4,16(%edi)
1639	movq	%mm4,%mm7
1640	movq	%mm2,-120(%ebp)
1641	psrlq	$4,%mm4
1642	movq	%mm5,8(%ebp)
1643	shll	$4,%edx
1644	movb	%dl,3(%esp)
1645	movl	-56(%ebx),%edx
1646	movq	-56(%ebx),%mm2
1647	psllq	$60,%mm7
1648	movq	-64(%ebx),%mm5
1649	por	%mm7,%mm1
1650	movq	%mm0,-104(%edi)
1651	psrlq	$4,%mm0
1652	movq	%mm3,24(%edi)
1653	movq	%mm3,%mm6
1654	movq	%mm1,-112(%ebp)
1655	psrlq	$4,%mm3
1656	movq	%mm4,16(%ebp)
1657	shll	$4,%edx
1658	movb	%dl,4(%esp)
1659	movl	-40(%ebx),%edx
1660	movq	-40(%ebx),%mm1
1661	psllq	$60,%mm6
1662	movq	-48(%ebx),%mm4
1663	por	%mm6,%mm0
1664	movq	%mm2,-96(%edi)
1665	psrlq	$4,%mm2
1666	movq	%mm5,32(%edi)
1667	movq	%mm5,%mm7
1668	movq	%mm0,-104(%ebp)
1669	psrlq	$4,%mm5
1670	movq	%mm3,24(%ebp)
1671	shll	$4,%edx
1672	movb	%dl,5(%esp)
1673	movl	-24(%ebx),%edx
1674	movq	-24(%ebx),%mm0
1675	psllq	$60,%mm7
1676	movq	-32(%ebx),%mm3
1677	por	%mm7,%mm2
1678	movq	%mm1,-88(%edi)
1679	psrlq	$4,%mm1
1680	movq	%mm4,40(%edi)
1681	movq	%mm4,%mm6
1682	movq	%mm2,-96(%ebp)
1683	psrlq	$4,%mm4
1684	movq	%mm5,32(%ebp)
1685	shll	$4,%edx
1686	movb	%dl,6(%esp)
1687	movl	-8(%ebx),%edx
1688	movq	-8(%ebx),%mm2
1689	psllq	$60,%mm6
1690	movq	-16(%ebx),%mm5
1691	por	%mm6,%mm1
1692	movq	%mm0,-80(%edi)
1693	psrlq	$4,%mm0
1694	movq	%mm3,48(%edi)
1695	movq	%mm3,%mm7
1696	movq	%mm1,-88(%ebp)
1697	psrlq	$4,%mm3
1698	movq	%mm4,40(%ebp)
1699	shll	$4,%edx
1700	movb	%dl,7(%esp)
1701	movl	8(%ebx),%edx
1702	movq	8(%ebx),%mm1
1703	psllq	$60,%mm7
1704	movq	(%ebx),%mm4
1705	por	%mm7,%mm0
1706	movq	%mm2,-72(%edi)
1707	psrlq	$4,%mm2
1708	movq	%mm5,56(%edi)
1709	movq	%mm5,%mm6
1710	movq	%mm0,-80(%ebp)
1711	psrlq	$4,%mm5
1712	movq	%mm3,48(%ebp)
1713	shll	$4,%edx
1714	movb	%dl,8(%esp)
1715	movl	24(%ebx),%edx
1716	movq	24(%ebx),%mm0
1717	psllq	$60,%mm6
1718	movq	16(%ebx),%mm3
1719	por	%mm6,%mm2
1720	movq	%mm1,-64(%edi)
1721	psrlq	$4,%mm1
1722	movq	%mm4,64(%edi)
1723	movq	%mm4,%mm7
1724	movq	%mm2,-72(%ebp)
1725	psrlq	$4,%mm4
1726	movq	%mm5,56(%ebp)
1727	shll	$4,%edx
1728	movb	%dl,9(%esp)
1729	movl	40(%ebx),%edx
1730	movq	40(%ebx),%mm2
1731	psllq	$60,%mm7
1732	movq	32(%ebx),%mm5
1733	por	%mm7,%mm1
1734	movq	%mm0,-56(%edi)
1735	psrlq	$4,%mm0
1736	movq	%mm3,72(%edi)
1737	movq	%mm3,%mm6
1738	movq	%mm1,-64(%ebp)
1739	psrlq	$4,%mm3
1740	movq	%mm4,64(%ebp)
1741	shll	$4,%edx
1742	movb	%dl,10(%esp)
1743	movl	56(%ebx),%edx
1744	movq	56(%ebx),%mm1
1745	psllq	$60,%mm6
1746	movq	48(%ebx),%mm4
1747	por	%mm6,%mm0
1748	movq	%mm2,-48(%edi)
1749	psrlq	$4,%mm2
1750	movq	%mm5,80(%edi)
1751	movq	%mm5,%mm7
1752	movq	%mm0,-56(%ebp)
1753	psrlq	$4,%mm5
1754	movq	%mm3,72(%ebp)
1755	shll	$4,%edx
1756	movb	%dl,11(%esp)
1757	movl	72(%ebx),%edx
1758	movq	72(%ebx),%mm0
1759	psllq	$60,%mm7
1760	movq	64(%ebx),%mm3
1761	por	%mm7,%mm2
1762	movq	%mm1,-40(%edi)
1763	psrlq	$4,%mm1
1764	movq	%mm4,88(%edi)
1765	movq	%mm4,%mm6
1766	movq	%mm2,-48(%ebp)
1767	psrlq	$4,%mm4
1768	movq	%mm5,80(%ebp)
1769	shll	$4,%edx
1770	movb	%dl,12(%esp)
1771	movl	88(%ebx),%edx
1772	movq	88(%ebx),%mm2
1773	psllq	$60,%mm6
1774	movq	80(%ebx),%mm5
1775	por	%mm6,%mm1
1776	movq	%mm0,-32(%edi)
1777	psrlq	$4,%mm0
1778	movq	%mm3,96(%edi)
1779	movq	%mm3,%mm7
1780	movq	%mm1,-40(%ebp)
1781	psrlq	$4,%mm3
1782	movq	%mm4,88(%ebp)
1783	shll	$4,%edx
1784	movb	%dl,13(%esp)
1785	movl	104(%ebx),%edx
1786	movq	104(%ebx),%mm1
1787	psllq	$60,%mm7
1788	movq	96(%ebx),%mm4
1789	por	%mm7,%mm0
1790	movq	%mm2,-24(%edi)
1791	psrlq	$4,%mm2
1792	movq	%mm5,104(%edi)
1793	movq	%mm5,%mm6
1794	movq	%mm0,-32(%ebp)
1795	psrlq	$4,%mm5
1796	movq	%mm3,96(%ebp)
1797	shll	$4,%edx
1798	movb	%dl,14(%esp)
1799	movl	120(%ebx),%edx
1800	movq	120(%ebx),%mm0
1801	psllq	$60,%mm6
1802	movq	112(%ebx),%mm3
1803	por	%mm6,%mm2
1804	movq	%mm1,-16(%edi)
1805	psrlq	$4,%mm1
1806	movq	%mm4,112(%edi)
1807	movq	%mm4,%mm7
1808	movq	%mm2,-24(%ebp)
1809	psrlq	$4,%mm4
1810	movq	%mm5,104(%ebp)
1811	shll	$4,%edx
1812	movb	%dl,15(%esp)
1813	psllq	$60,%mm7
1814	por	%mm7,%mm1
1815	movq	%mm0,-8(%edi)
1816	psrlq	$4,%mm0
1817	movq	%mm3,120(%edi)
1818	movq	%mm3,%mm6
1819	movq	%mm1,-16(%ebp)
1820	psrlq	$4,%mm3
1821	movq	%mm4,112(%ebp)
1822	psllq	$60,%mm6
1823	por	%mm6,%mm0
1824	movq	%mm0,-8(%ebp)
1825	movq	%mm3,120(%ebp)
1826	movq	(%eax),%mm6
1827	movl	8(%eax),%ebx
1828	movl	12(%eax),%edx
1829.align	16
1830.L009outer:
1831	xorl	12(%ecx),%edx
1832	xorl	8(%ecx),%ebx
1833	pxor	(%ecx),%mm6
1834	leal	16(%ecx),%ecx
1835	movl	%ebx,536(%esp)
1836	movq	%mm6,528(%esp)
1837	movl	%ecx,548(%esp)
1838	xorl	%eax,%eax
1839	roll	$8,%edx
1840	movb	%dl,%al
1841	movl	%eax,%ebp
1842	andb	$15,%al
1843	shrl	$4,%ebp
1844	pxor	%mm0,%mm0
1845	roll	$8,%edx
1846	pxor	%mm1,%mm1
1847	pxor	%mm2,%mm2
1848	movq	16(%esp,%eax,8),%mm7
1849	movq	144(%esp,%eax,8),%mm6
1850	movb	%dl,%al
1851	movd	%mm7,%ebx
1852	psrlq	$8,%mm7
1853	movq	%mm6,%mm3
1854	movl	%eax,%edi
1855	psrlq	$8,%mm6
1856	pxor	272(%esp,%ebp,8),%mm7
1857	andb	$15,%al
1858	psllq	$56,%mm3
1859	shrl	$4,%edi
1860	pxor	16(%esp,%eax,8),%mm7
1861	roll	$8,%edx
1862	pxor	144(%esp,%eax,8),%mm6
1863	pxor	%mm3,%mm7
1864	pxor	400(%esp,%ebp,8),%mm6
1865	xorb	(%esp,%ebp,1),%bl
1866	movb	%dl,%al
1867	movd	%mm7,%ecx
1868	movzbl	%bl,%ebx
1869	psrlq	$8,%mm7
1870	movq	%mm6,%mm3
1871	movl	%eax,%ebp
1872	psrlq	$8,%mm6
1873	pxor	272(%esp,%edi,8),%mm7
1874	andb	$15,%al
1875	psllq	$56,%mm3
1876	shrl	$4,%ebp
1877	pinsrw	$2,(%esi,%ebx,2),%mm2
1878	pxor	16(%esp,%eax,8),%mm7
1879	roll	$8,%edx
1880	pxor	144(%esp,%eax,8),%mm6
1881	pxor	%mm3,%mm7
1882	pxor	400(%esp,%edi,8),%mm6
1883	xorb	(%esp,%edi,1),%cl
1884	movb	%dl,%al
1885	movl	536(%esp),%edx
1886	movd	%mm7,%ebx
1887	movzbl	%cl,%ecx
1888	psrlq	$8,%mm7
1889	movq	%mm6,%mm3
1890	movl	%eax,%edi
1891	psrlq	$8,%mm6
1892	pxor	272(%esp,%ebp,8),%mm7
1893	andb	$15,%al
1894	psllq	$56,%mm3
1895	pxor	%mm2,%mm6
1896	shrl	$4,%edi
1897	pinsrw	$2,(%esi,%ecx,2),%mm1
1898	pxor	16(%esp,%eax,8),%mm7
1899	roll	$8,%edx
1900	pxor	144(%esp,%eax,8),%mm6
1901	pxor	%mm3,%mm7
1902	pxor	400(%esp,%ebp,8),%mm6
1903	xorb	(%esp,%ebp,1),%bl
1904	movb	%dl,%al
1905	movd	%mm7,%ecx
1906	movzbl	%bl,%ebx
1907	psrlq	$8,%mm7
1908	movq	%mm6,%mm3
1909	movl	%eax,%ebp
1910	psrlq	$8,%mm6
1911	pxor	272(%esp,%edi,8),%mm7
1912	andb	$15,%al
1913	psllq	$56,%mm3
1914	pxor	%mm1,%mm6
1915	shrl	$4,%ebp
1916	pinsrw	$2,(%esi,%ebx,2),%mm0
1917	pxor	16(%esp,%eax,8),%mm7
1918	roll	$8,%edx
1919	pxor	144(%esp,%eax,8),%mm6
1920	pxor	%mm3,%mm7
1921	pxor	400(%esp,%edi,8),%mm6
1922	xorb	(%esp,%edi,1),%cl
1923	movb	%dl,%al
1924	movd	%mm7,%ebx
1925	movzbl	%cl,%ecx
1926	psrlq	$8,%mm7
1927	movq	%mm6,%mm3
1928	movl	%eax,%edi
1929	psrlq	$8,%mm6
1930	pxor	272(%esp,%ebp,8),%mm7
1931	andb	$15,%al
1932	psllq	$56,%mm3
1933	pxor	%mm0,%mm6
1934	shrl	$4,%edi
1935	pinsrw	$2,(%esi,%ecx,2),%mm2
1936	pxor	16(%esp,%eax,8),%mm7
1937	roll	$8,%edx
1938	pxor	144(%esp,%eax,8),%mm6
1939	pxor	%mm3,%mm7
1940	pxor	400(%esp,%ebp,8),%mm6
1941	xorb	(%esp,%ebp,1),%bl
1942	movb	%dl,%al
1943	movd	%mm7,%ecx
1944	movzbl	%bl,%ebx
1945	psrlq	$8,%mm7
1946	movq	%mm6,%mm3
1947	movl	%eax,%ebp
1948	psrlq	$8,%mm6
1949	pxor	272(%esp,%edi,8),%mm7
1950	andb	$15,%al
1951	psllq	$56,%mm3
1952	pxor	%mm2,%mm6
1953	shrl	$4,%ebp
1954	pinsrw	$2,(%esi,%ebx,2),%mm1
1955	pxor	16(%esp,%eax,8),%mm7
1956	roll	$8,%edx
1957	pxor	144(%esp,%eax,8),%mm6
1958	pxor	%mm3,%mm7
1959	pxor	400(%esp,%edi,8),%mm6
1960	xorb	(%esp,%edi,1),%cl
1961	movb	%dl,%al
1962	movl	532(%esp),%edx
1963	movd	%mm7,%ebx
1964	movzbl	%cl,%ecx
1965	psrlq	$8,%mm7
1966	movq	%mm6,%mm3
1967	movl	%eax,%edi
1968	psrlq	$8,%mm6
1969	pxor	272(%esp,%ebp,8),%mm7
1970	andb	$15,%al
1971	psllq	$56,%mm3
1972	pxor	%mm1,%mm6
1973	shrl	$4,%edi
1974	pinsrw	$2,(%esi,%ecx,2),%mm0
1975	pxor	16(%esp,%eax,8),%mm7
1976	roll	$8,%edx
1977	pxor	144(%esp,%eax,8),%mm6
1978	pxor	%mm3,%mm7
1979	pxor	400(%esp,%ebp,8),%mm6
1980	xorb	(%esp,%ebp,1),%bl
1981	movb	%dl,%al
1982	movd	%mm7,%ecx
1983	movzbl	%bl,%ebx
1984	psrlq	$8,%mm7
1985	movq	%mm6,%mm3
1986	movl	%eax,%ebp
1987	psrlq	$8,%mm6
1988	pxor	272(%esp,%edi,8),%mm7
1989	andb	$15,%al
1990	psllq	$56,%mm3
1991	pxor	%mm0,%mm6
1992	shrl	$4,%ebp
1993	pinsrw	$2,(%esi,%ebx,2),%mm2
1994	pxor	16(%esp,%eax,8),%mm7
1995	roll	$8,%edx
1996	pxor	144(%esp,%eax,8),%mm6
1997	pxor	%mm3,%mm7
1998	pxor	400(%esp,%edi,8),%mm6
1999	xorb	(%esp,%edi,1),%cl
2000	movb	%dl,%al
2001	movd	%mm7,%ebx
2002	movzbl	%cl,%ecx
2003	psrlq	$8,%mm7
2004	movq	%mm6,%mm3
2005	movl	%eax,%edi
2006	psrlq	$8,%mm6
2007	pxor	272(%esp,%ebp,8),%mm7
2008	andb	$15,%al
2009	psllq	$56,%mm3
2010	pxor	%mm2,%mm6
2011	shrl	$4,%edi
2012	pinsrw	$2,(%esi,%ecx,2),%mm1
2013	pxor	16(%esp,%eax,8),%mm7
2014	roll	$8,%edx
2015	pxor	144(%esp,%eax,8),%mm6
2016	pxor	%mm3,%mm7
2017	pxor	400(%esp,%ebp,8),%mm6
2018	xorb	(%esp,%ebp,1),%bl
2019	movb	%dl,%al
2020	movd	%mm7,%ecx
2021	movzbl	%bl,%ebx
2022	psrlq	$8,%mm7
2023	movq	%mm6,%mm3
2024	movl	%eax,%ebp
2025	psrlq	$8,%mm6
2026	pxor	272(%esp,%edi,8),%mm7
2027	andb	$15,%al
2028	psllq	$56,%mm3
2029	pxor	%mm1,%mm6
2030	shrl	$4,%ebp
2031	pinsrw	$2,(%esi,%ebx,2),%mm0
2032	pxor	16(%esp,%eax,8),%mm7
2033	roll	$8,%edx
2034	pxor	144(%esp,%eax,8),%mm6
2035	pxor	%mm3,%mm7
2036	pxor	400(%esp,%edi,8),%mm6
2037	xorb	(%esp,%edi,1),%cl
2038	movb	%dl,%al
2039	movl	528(%esp),%edx
2040	movd	%mm7,%ebx
2041	movzbl	%cl,%ecx
2042	psrlq	$8,%mm7
2043	movq	%mm6,%mm3
2044	movl	%eax,%edi
2045	psrlq	$8,%mm6
2046	pxor	272(%esp,%ebp,8),%mm7
2047	andb	$15,%al
2048	psllq	$56,%mm3
2049	pxor	%mm0,%mm6
2050	shrl	$4,%edi
2051	pinsrw	$2,(%esi,%ecx,2),%mm2
2052	pxor	16(%esp,%eax,8),%mm7
2053	roll	$8,%edx
2054	pxor	144(%esp,%eax,8),%mm6
2055	pxor	%mm3,%mm7
2056	pxor	400(%esp,%ebp,8),%mm6
2057	xorb	(%esp,%ebp,1),%bl
2058	movb	%dl,%al
2059	movd	%mm7,%ecx
2060	movzbl	%bl,%ebx
2061	psrlq	$8,%mm7
2062	movq	%mm6,%mm3
2063	movl	%eax,%ebp
2064	psrlq	$8,%mm6
2065	pxor	272(%esp,%edi,8),%mm7
2066	andb	$15,%al
2067	psllq	$56,%mm3
2068	pxor	%mm2,%mm6
2069	shrl	$4,%ebp
2070	pinsrw	$2,(%esi,%ebx,2),%mm1
2071	pxor	16(%esp,%eax,8),%mm7
2072	roll	$8,%edx
2073	pxor	144(%esp,%eax,8),%mm6
2074	pxor	%mm3,%mm7
2075	pxor	400(%esp,%edi,8),%mm6
2076	xorb	(%esp,%edi,1),%cl
2077	movb	%dl,%al
2078	movd	%mm7,%ebx
2079	movzbl	%cl,%ecx
2080	psrlq	$8,%mm7
2081	movq	%mm6,%mm3
2082	movl	%eax,%edi
2083	psrlq	$8,%mm6
2084	pxor	272(%esp,%ebp,8),%mm7
2085	andb	$15,%al
2086	psllq	$56,%mm3
2087	pxor	%mm1,%mm6
2088	shrl	$4,%edi
2089	pinsrw	$2,(%esi,%ecx,2),%mm0
2090	pxor	16(%esp,%eax,8),%mm7
2091	roll	$8,%edx
2092	pxor	144(%esp,%eax,8),%mm6
2093	pxor	%mm3,%mm7
2094	pxor	400(%esp,%ebp,8),%mm6
2095	xorb	(%esp,%ebp,1),%bl
2096	movb	%dl,%al
2097	movd	%mm7,%ecx
2098	movzbl	%bl,%ebx
2099	psrlq	$8,%mm7
2100	movq	%mm6,%mm3
2101	movl	%eax,%ebp
2102	psrlq	$8,%mm6
2103	pxor	272(%esp,%edi,8),%mm7
2104	andb	$15,%al
2105	psllq	$56,%mm3
2106	pxor	%mm0,%mm6
2107	shrl	$4,%ebp
2108	pinsrw	$2,(%esi,%ebx,2),%mm2
2109	pxor	16(%esp,%eax,8),%mm7
2110	roll	$8,%edx
2111	pxor	144(%esp,%eax,8),%mm6
2112	pxor	%mm3,%mm7
2113	pxor	400(%esp,%edi,8),%mm6
2114	xorb	(%esp,%edi,1),%cl
2115	movb	%dl,%al
2116	movl	524(%esp),%edx
2117	movd	%mm7,%ebx
2118	movzbl	%cl,%ecx
2119	psrlq	$8,%mm7
2120	movq	%mm6,%mm3
2121	movl	%eax,%edi
2122	psrlq	$8,%mm6
2123	pxor	272(%esp,%ebp,8),%mm7
2124	andb	$15,%al
2125	psllq	$56,%mm3
2126	pxor	%mm2,%mm6
2127	shrl	$4,%edi
2128	pinsrw	$2,(%esi,%ecx,2),%mm1
2129	pxor	16(%esp,%eax,8),%mm7
2130	pxor	144(%esp,%eax,8),%mm6
2131	xorb	(%esp,%ebp,1),%bl
2132	pxor	%mm3,%mm7
2133	pxor	400(%esp,%ebp,8),%mm6
2134	movzbl	%bl,%ebx
2135	pxor	%mm2,%mm2
2136	psllq	$4,%mm1
2137	movd	%mm7,%ecx
2138	psrlq	$4,%mm7
2139	movq	%mm6,%mm3
2140	psrlq	$4,%mm6
2141	shll	$4,%ecx
2142	pxor	16(%esp,%edi,8),%mm7
2143	psllq	$60,%mm3
2144	movzbl	%cl,%ecx
2145	pxor	%mm3,%mm7
2146	pxor	144(%esp,%edi,8),%mm6
2147	pinsrw	$2,(%esi,%ebx,2),%mm0
2148	pxor	%mm1,%mm6
2149	movd	%mm7,%edx
2150	pinsrw	$3,(%esi,%ecx,2),%mm2
2151	psllq	$12,%mm0
2152	pxor	%mm0,%mm6
2153	psrlq	$32,%mm7
2154	pxor	%mm2,%mm6
2155	movl	548(%esp),%ecx
2156	movd	%mm7,%ebx
2157	movq	%mm6,%mm3
2158	psllw	$8,%mm6
2159	psrlw	$8,%mm3
2160	por	%mm3,%mm6
2161	bswap	%edx
2162	pshufw	$27,%mm6,%mm6
2163	bswap	%ebx
2164	cmpl	552(%esp),%ecx
2165	jne	.L009outer
2166	movl	544(%esp),%eax
2167	movl	%edx,12(%eax)
2168	movl	%ebx,8(%eax)
2169	movq	%mm6,(%eax)
2170	movl	556(%esp),%esp
2171	emms
2172	popl	%edi
2173	popl	%esi
2174	popl	%ebx
2175	popl	%ebp
2176	ret
2177.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
2178.globl	gcm_init_clmul
2179.type	gcm_init_clmul,@function
2180.align	16
2181gcm_init_clmul:
2182.L_gcm_init_clmul_begin:
2183	movl	4(%esp),%edx
2184	movl	8(%esp),%eax
2185	call	.L010pic
2186.L010pic:
2187	popl	%ecx
2188	leal	.Lbswap-.L010pic(%ecx),%ecx
2189	movdqu	(%eax),%xmm2
2190	pshufd	$78,%xmm2,%xmm2
2191	pshufd	$255,%xmm2,%xmm4
2192	movdqa	%xmm2,%xmm3
2193	psllq	$1,%xmm2
2194	pxor	%xmm5,%xmm5
2195	psrlq	$63,%xmm3
2196	pcmpgtd	%xmm4,%xmm5
2197	pslldq	$8,%xmm3
2198	por	%xmm3,%xmm2
2199	pand	16(%ecx),%xmm5
2200	pxor	%xmm5,%xmm2
2201	movdqa	%xmm2,%xmm0
2202	movdqa	%xmm0,%xmm1
2203	pshufd	$78,%xmm0,%xmm3
2204	pshufd	$78,%xmm2,%xmm4
2205	pxor	%xmm0,%xmm3
2206	pxor	%xmm2,%xmm4
2207.byte	102,15,58,68,194,0
2208.byte	102,15,58,68,202,17
2209.byte	102,15,58,68,220,0
2210	xorps	%xmm0,%xmm3
2211	xorps	%xmm1,%xmm3
2212	movdqa	%xmm3,%xmm4
2213	psrldq	$8,%xmm3
2214	pslldq	$8,%xmm4
2215	pxor	%xmm3,%xmm1
2216	pxor	%xmm4,%xmm0
2217	movdqa	%xmm0,%xmm4
2218	movdqa	%xmm0,%xmm3
2219	psllq	$5,%xmm0
2220	pxor	%xmm0,%xmm3
2221	psllq	$1,%xmm0
2222	pxor	%xmm3,%xmm0
2223	psllq	$57,%xmm0
2224	movdqa	%xmm0,%xmm3
2225	pslldq	$8,%xmm0
2226	psrldq	$8,%xmm3
2227	pxor	%xmm4,%xmm0
2228	pxor	%xmm3,%xmm1
2229	movdqa	%xmm0,%xmm4
2230	psrlq	$1,%xmm0
2231	pxor	%xmm4,%xmm1
2232	pxor	%xmm0,%xmm4
2233	psrlq	$5,%xmm0
2234	pxor	%xmm4,%xmm0
2235	psrlq	$1,%xmm0
2236	pxor	%xmm1,%xmm0
2237	pshufd	$78,%xmm2,%xmm3
2238	pshufd	$78,%xmm0,%xmm4
2239	pxor	%xmm2,%xmm3
2240	movdqu	%xmm2,(%edx)
2241	pxor	%xmm0,%xmm4
2242	movdqu	%xmm0,16(%edx)
2243.byte	102,15,58,15,227,8
2244	movdqu	%xmm4,32(%edx)
2245	ret
2246.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
2247.globl	gcm_gmult_clmul
2248.type	gcm_gmult_clmul,@function
2249.align	16
2250gcm_gmult_clmul:
2251.L_gcm_gmult_clmul_begin:
2252	movl	4(%esp),%eax
2253	movl	8(%esp),%edx
2254	call	.L011pic
2255.L011pic:
2256	popl	%ecx
2257	leal	.Lbswap-.L011pic(%ecx),%ecx
2258	movdqu	(%eax),%xmm0
2259	movdqa	(%ecx),%xmm5
2260	movups	(%edx),%xmm2
2261.byte	102,15,56,0,197
2262	movups	32(%edx),%xmm4
2263	movdqa	%xmm0,%xmm1
2264	pshufd	$78,%xmm0,%xmm3
2265	pxor	%xmm0,%xmm3
2266.byte	102,15,58,68,194,0
2267.byte	102,15,58,68,202,17
2268.byte	102,15,58,68,220,0
2269	xorps	%xmm0,%xmm3
2270	xorps	%xmm1,%xmm3
2271	movdqa	%xmm3,%xmm4
2272	psrldq	$8,%xmm3
2273	pslldq	$8,%xmm4
2274	pxor	%xmm3,%xmm1
2275	pxor	%xmm4,%xmm0
2276	movdqa	%xmm0,%xmm4
2277	movdqa	%xmm0,%xmm3
2278	psllq	$5,%xmm0
2279	pxor	%xmm0,%xmm3
2280	psllq	$1,%xmm0
2281	pxor	%xmm3,%xmm0
2282	psllq	$57,%xmm0
2283	movdqa	%xmm0,%xmm3
2284	pslldq	$8,%xmm0
2285	psrldq	$8,%xmm3
2286	pxor	%xmm4,%xmm0
2287	pxor	%xmm3,%xmm1
2288	movdqa	%xmm0,%xmm4
2289	psrlq	$1,%xmm0
2290	pxor	%xmm4,%xmm1
2291	pxor	%xmm0,%xmm4
2292	psrlq	$5,%xmm0
2293	pxor	%xmm4,%xmm0
2294	psrlq	$1,%xmm0
2295	pxor	%xmm1,%xmm0
2296.byte	102,15,56,0,197
2297	movdqu	%xmm0,(%eax)
2298	ret
2299.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
2300.globl	gcm_ghash_clmul
2301.type	gcm_ghash_clmul,@function
2302.align	16
2303gcm_ghash_clmul:
2304.L_gcm_ghash_clmul_begin:
2305	pushl	%ebp
2306	pushl	%ebx
2307	pushl	%esi
2308	pushl	%edi
2309	movl	20(%esp),%eax
2310	movl	24(%esp),%edx
2311	movl	28(%esp),%esi
2312	movl	32(%esp),%ebx
2313	call	.L012pic
2314.L012pic:
2315	popl	%ecx
2316	leal	.Lbswap-.L012pic(%ecx),%ecx
2317	movdqu	(%eax),%xmm0
2318	movdqa	(%ecx),%xmm5
2319	movdqu	(%edx),%xmm2
2320.byte	102,15,56,0,197
2321	subl	$16,%ebx
2322	jz	.L013odd_tail
2323	movdqu	(%esi),%xmm3
2324	movdqu	16(%esi),%xmm6
2325.byte	102,15,56,0,221
2326.byte	102,15,56,0,245
2327	movdqu	32(%edx),%xmm5
2328	pxor	%xmm3,%xmm0
2329	pshufd	$78,%xmm6,%xmm3
2330	movdqa	%xmm6,%xmm7
2331	pxor	%xmm6,%xmm3
2332	leal	32(%esi),%esi
2333.byte	102,15,58,68,242,0
2334.byte	102,15,58,68,250,17
2335.byte	102,15,58,68,221,0
2336	movups	16(%edx),%xmm2
2337	nop
2338	subl	$32,%ebx
2339	jbe	.L014even_tail
2340	jmp	.L015mod_loop
2341.align	32
2342.L015mod_loop:
2343	pshufd	$78,%xmm0,%xmm4
2344	movdqa	%xmm0,%xmm1
2345	pxor	%xmm0,%xmm4
2346	nop
2347.byte	102,15,58,68,194,0
2348.byte	102,15,58,68,202,17
2349.byte	102,15,58,68,229,16
2350	movups	(%edx),%xmm2
2351	xorps	%xmm6,%xmm0
2352	movdqa	(%ecx),%xmm5
2353	xorps	%xmm7,%xmm1
2354	movdqu	(%esi),%xmm7
2355	pxor	%xmm0,%xmm3
2356	movdqu	16(%esi),%xmm6
2357	pxor	%xmm1,%xmm3
2358.byte	102,15,56,0,253
2359	pxor	%xmm3,%xmm4
2360	movdqa	%xmm4,%xmm3
2361	psrldq	$8,%xmm4
2362	pslldq	$8,%xmm3
2363	pxor	%xmm4,%xmm1
2364	pxor	%xmm3,%xmm0
2365.byte	102,15,56,0,245
2366	pxor	%xmm7,%xmm1
2367	movdqa	%xmm6,%xmm7
2368	movdqa	%xmm0,%xmm4
2369	movdqa	%xmm0,%xmm3
2370	psllq	$5,%xmm0
2371	pxor	%xmm0,%xmm3
2372	psllq	$1,%xmm0
2373	pxor	%xmm3,%xmm0
2374.byte	102,15,58,68,242,0
2375	movups	32(%edx),%xmm5
2376	psllq	$57,%xmm0
2377	movdqa	%xmm0,%xmm3
2378	pslldq	$8,%xmm0
2379	psrldq	$8,%xmm3
2380	pxor	%xmm4,%xmm0
2381	pxor	%xmm3,%xmm1
2382	pshufd	$78,%xmm7,%xmm3
2383	movdqa	%xmm0,%xmm4
2384	psrlq	$1,%xmm0
2385	pxor	%xmm7,%xmm3
2386	pxor	%xmm4,%xmm1
2387.byte	102,15,58,68,250,17
2388	movups	16(%edx),%xmm2
2389	pxor	%xmm0,%xmm4
2390	psrlq	$5,%xmm0
2391	pxor	%xmm4,%xmm0
2392	psrlq	$1,%xmm0
2393	pxor	%xmm1,%xmm0
2394.byte	102,15,58,68,221,0
2395	leal	32(%esi),%esi
2396	subl	$32,%ebx
2397	ja	.L015mod_loop
2398.L014even_tail:
2399	pshufd	$78,%xmm0,%xmm4
2400	movdqa	%xmm0,%xmm1
2401	pxor	%xmm0,%xmm4
2402.byte	102,15,58,68,194,0
2403.byte	102,15,58,68,202,17
2404.byte	102,15,58,68,229,16
2405	movdqa	(%ecx),%xmm5
2406	xorps	%xmm6,%xmm0
2407	xorps	%xmm7,%xmm1
2408	pxor	%xmm0,%xmm3
2409	pxor	%xmm1,%xmm3
2410	pxor	%xmm3,%xmm4
2411	movdqa	%xmm4,%xmm3
2412	psrldq	$8,%xmm4
2413	pslldq	$8,%xmm3
2414	pxor	%xmm4,%xmm1
2415	pxor	%xmm3,%xmm0
2416	movdqa	%xmm0,%xmm4
2417	movdqa	%xmm0,%xmm3
2418	psllq	$5,%xmm0
2419	pxor	%xmm0,%xmm3
2420	psllq	$1,%xmm0
2421	pxor	%xmm3,%xmm0
2422	psllq	$57,%xmm0
2423	movdqa	%xmm0,%xmm3
2424	pslldq	$8,%xmm0
2425	psrldq	$8,%xmm3
2426	pxor	%xmm4,%xmm0
2427	pxor	%xmm3,%xmm1
2428	movdqa	%xmm0,%xmm4
2429	psrlq	$1,%xmm0
2430	pxor	%xmm4,%xmm1
2431	pxor	%xmm0,%xmm4
2432	psrlq	$5,%xmm0
2433	pxor	%xmm4,%xmm0
2434	psrlq	$1,%xmm0
2435	pxor	%xmm1,%xmm0
2436	testl	%ebx,%ebx
2437	jnz	.L016done
2438	movups	(%edx),%xmm2
2439.L013odd_tail:
2440	movdqu	(%esi),%xmm3
2441.byte	102,15,56,0,221
2442	pxor	%xmm3,%xmm0
2443	movdqa	%xmm0,%xmm1
2444	pshufd	$78,%xmm0,%xmm3
2445	pshufd	$78,%xmm2,%xmm4
2446	pxor	%xmm0,%xmm3
2447	pxor	%xmm2,%xmm4
2448.byte	102,15,58,68,194,0
2449.byte	102,15,58,68,202,17
2450.byte	102,15,58,68,220,0
2451	xorps	%xmm0,%xmm3
2452	xorps	%xmm1,%xmm3
2453	movdqa	%xmm3,%xmm4
2454	psrldq	$8,%xmm3
2455	pslldq	$8,%xmm4
2456	pxor	%xmm3,%xmm1
2457	pxor	%xmm4,%xmm0
2458	movdqa	%xmm0,%xmm4
2459	movdqa	%xmm0,%xmm3
2460	psllq	$5,%xmm0
2461	pxor	%xmm0,%xmm3
2462	psllq	$1,%xmm0
2463	pxor	%xmm3,%xmm0
2464	psllq	$57,%xmm0
2465	movdqa	%xmm0,%xmm3
2466	pslldq	$8,%xmm0
2467	psrldq	$8,%xmm3
2468	pxor	%xmm4,%xmm0
2469	pxor	%xmm3,%xmm1
2470	movdqa	%xmm0,%xmm4
2471	psrlq	$1,%xmm0
2472	pxor	%xmm4,%xmm1
2473	pxor	%xmm0,%xmm4
2474	psrlq	$5,%xmm0
2475	pxor	%xmm4,%xmm0
2476	psrlq	$1,%xmm0
2477	pxor	%xmm1,%xmm0
2478.L016done:
2479.byte	102,15,56,0,197
2480	movdqu	%xmm0,(%eax)
2481	popl	%edi
2482	popl	%esi
2483	popl	%ebx
2484	popl	%ebp
2485	ret
2486.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
2487.align	64
2488.Lbswap:
2489.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2490.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
2491.align	64
2492.Lrem_8bit:
2493.value	0,450,900,582,1800,1738,1164,1358
2494.value	3600,4050,3476,3158,2328,2266,2716,2910
2495.value	7200,7650,8100,7782,6952,6890,6316,6510
2496.value	4656,5106,4532,4214,5432,5370,5820,6014
2497.value	14400,14722,15300,14854,16200,16010,15564,15630
2498.value	13904,14226,13780,13334,12632,12442,13020,13086
2499.value	9312,9634,10212,9766,9064,8874,8428,8494
2500.value	10864,11186,10740,10294,11640,11450,12028,12094
2501.value	28800,28994,29444,29382,30600,30282,29708,30158
2502.value	32400,32594,32020,31958,31128,30810,31260,31710
2503.value	27808,28002,28452,28390,27560,27242,26668,27118
2504.value	25264,25458,24884,24822,26040,25722,26172,26622
2505.value	18624,18690,19268,19078,20424,19978,19532,19854
2506.value	18128,18194,17748,17558,16856,16410,16988,17310
2507.value	21728,21794,22372,22182,21480,21034,20588,20910
2508.value	23280,23346,22900,22710,24056,23610,24188,24510
2509.value	57600,57538,57988,58182,58888,59338,58764,58446
2510.value	61200,61138,60564,60758,59416,59866,60316,59998
2511.value	64800,64738,65188,65382,64040,64490,63916,63598
2512.value	62256,62194,61620,61814,62520,62970,63420,63102
2513.value	55616,55426,56004,56070,56904,57226,56780,56334
2514.value	55120,54930,54484,54550,53336,53658,54236,53790
2515.value	50528,50338,50916,50982,49768,50090,49644,49198
2516.value	52080,51890,51444,51510,52344,52666,53244,52798
2517.value	37248,36930,37380,37830,38536,38730,38156,38094
2518.value	40848,40530,39956,40406,39064,39258,39708,39646
2519.value	36256,35938,36388,36838,35496,35690,35116,35054
2520.value	33712,33394,32820,33270,33976,34170,34620,34558
2521.value	43456,43010,43588,43910,44744,44810,44364,44174
2522.value	42960,42514,42068,42390,41176,41242,41820,41630
2523.value	46560,46114,46692,47014,45800,45866,45420,45230
2524.value	48112,47666,47220,47542,48376,48442,49020,48830
2525.align	64
2526.Lrem_4bit:
2527.long	0,0,0,471859200,0,943718400,0,610271232
2528.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
2529.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
2530.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
2531.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
2532.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
2533.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
2534.byte	0
2535#endif
2536