1238405Sjkim	# $FreeBSD$
2238405Sjkim.file	"ghash-x86.s"
3238405Sjkim.text
4238405Sjkim.globl	gcm_gmult_4bit_x86
5238405Sjkim.type	gcm_gmult_4bit_x86,@function
6238405Sjkim.align	16
7238405Sjkimgcm_gmult_4bit_x86:
8238405Sjkim.L_gcm_gmult_4bit_x86_begin:
9238405Sjkim	pushl	%ebp
10238405Sjkim	pushl	%ebx
11238405Sjkim	pushl	%esi
12238405Sjkim	pushl	%edi
13238405Sjkim	subl	$84,%esp
14238405Sjkim	movl	104(%esp),%edi
15238405Sjkim	movl	108(%esp),%esi
16238405Sjkim	movl	(%edi),%ebp
17238405Sjkim	movl	4(%edi),%edx
18238405Sjkim	movl	8(%edi),%ecx
19238405Sjkim	movl	12(%edi),%ebx
20238405Sjkim	movl	$0,16(%esp)
21238405Sjkim	movl	$471859200,20(%esp)
22238405Sjkim	movl	$943718400,24(%esp)
23238405Sjkim	movl	$610271232,28(%esp)
24238405Sjkim	movl	$1887436800,32(%esp)
25238405Sjkim	movl	$1822425088,36(%esp)
26238405Sjkim	movl	$1220542464,40(%esp)
27238405Sjkim	movl	$1423966208,44(%esp)
28238405Sjkim	movl	$3774873600,48(%esp)
29238405Sjkim	movl	$4246732800,52(%esp)
30238405Sjkim	movl	$3644850176,56(%esp)
31238405Sjkim	movl	$3311403008,60(%esp)
32238405Sjkim	movl	$2441084928,64(%esp)
33238405Sjkim	movl	$2376073216,68(%esp)
34238405Sjkim	movl	$2847932416,72(%esp)
35238405Sjkim	movl	$3051356160,76(%esp)
36238405Sjkim	movl	%ebp,(%esp)
37238405Sjkim	movl	%edx,4(%esp)
38238405Sjkim	movl	%ecx,8(%esp)
39238405Sjkim	movl	%ebx,12(%esp)
40238405Sjkim	shrl	$20,%ebx
41238405Sjkim	andl	$240,%ebx
42238405Sjkim	movl	4(%esi,%ebx,1),%ebp
43238405Sjkim	movl	(%esi,%ebx,1),%edx
44238405Sjkim	movl	12(%esi,%ebx,1),%ecx
45238405Sjkim	movl	8(%esi,%ebx,1),%ebx
46238405Sjkim	xorl	%eax,%eax
47238405Sjkim	movl	$15,%edi
48238405Sjkim	jmp	.L000x86_loop
49238405Sjkim.align	16
50238405Sjkim.L000x86_loop:
51238405Sjkim	movb	%bl,%al
52238405Sjkim	shrdl	$4,%ecx,%ebx
53238405Sjkim	andb	$15,%al
54238405Sjkim	shrdl	$4,%edx,%ecx
55238405Sjkim	shrdl	$4,%ebp,%edx
56238405Sjkim	shrl	$4,%ebp
57238405Sjkim	xorl	16(%esp,%eax,4),%ebp
58238405Sjkim	movb	(%esp,%edi,1),%al
59238405Sjkim	andb	$240,%al
60238405Sjkim	xorl	8(%esi,%eax,1),%ebx
61238405Sjkim	xorl	12(%esi,%eax,1),%ecx
62238405Sjkim	xorl	(%esi,%eax,1),%edx
63238405Sjkim	xorl	4(%esi,%eax,1),%ebp
64238405Sjkim	decl	%edi
65238405Sjkim	js	.L001x86_break
66238405Sjkim	movb	%bl,%al
67238405Sjkim	shrdl	$4,%ecx,%ebx
68238405Sjkim	andb	$15,%al
69238405Sjkim	shrdl	$4,%edx,%ecx
70238405Sjkim	shrdl	$4,%ebp,%edx
71238405Sjkim	shrl	$4,%ebp
72238405Sjkim	xorl	16(%esp,%eax,4),%ebp
73238405Sjkim	movb	(%esp,%edi,1),%al
74238405Sjkim	shlb	$4,%al
75238405Sjkim	xorl	8(%esi,%eax,1),%ebx
76238405Sjkim	xorl	12(%esi,%eax,1),%ecx
77238405Sjkim	xorl	(%esi,%eax,1),%edx
78238405Sjkim	xorl	4(%esi,%eax,1),%ebp
79238405Sjkim	jmp	.L000x86_loop
80238405Sjkim.align	16
81238405Sjkim.L001x86_break:
82238405Sjkim	bswap	%ebx
83238405Sjkim	bswap	%ecx
84238405Sjkim	bswap	%edx
85238405Sjkim	bswap	%ebp
86238405Sjkim	movl	104(%esp),%edi
87238405Sjkim	movl	%ebx,12(%edi)
88238405Sjkim	movl	%ecx,8(%edi)
89238405Sjkim	movl	%edx,4(%edi)
90238405Sjkim	movl	%ebp,(%edi)
91238405Sjkim	addl	$84,%esp
92238405Sjkim	popl	%edi
93238405Sjkim	popl	%esi
94238405Sjkim	popl	%ebx
95238405Sjkim	popl	%ebp
96238405Sjkim	ret
97238405Sjkim.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
98238405Sjkim.globl	gcm_ghash_4bit_x86
99238405Sjkim.type	gcm_ghash_4bit_x86,@function
100238405Sjkim.align	16
101238405Sjkimgcm_ghash_4bit_x86:
102238405Sjkim.L_gcm_ghash_4bit_x86_begin:
103238405Sjkim	pushl	%ebp
104238405Sjkim	pushl	%ebx
105238405Sjkim	pushl	%esi
106238405Sjkim	pushl	%edi
107238405Sjkim	subl	$84,%esp
108238405Sjkim	movl	104(%esp),%ebx
109238405Sjkim	movl	108(%esp),%esi
110238405Sjkim	movl	112(%esp),%edi
111238405Sjkim	movl	116(%esp),%ecx
112238405Sjkim	addl	%edi,%ecx
113238405Sjkim	movl	%ecx,116(%esp)
114238405Sjkim	movl	(%ebx),%ebp
115238405Sjkim	movl	4(%ebx),%edx
116238405Sjkim	movl	8(%ebx),%ecx
117238405Sjkim	movl	12(%ebx),%ebx
118238405Sjkim	movl	$0,16(%esp)
119238405Sjkim	movl	$471859200,20(%esp)
120238405Sjkim	movl	$943718400,24(%esp)
121238405Sjkim	movl	$610271232,28(%esp)
122238405Sjkim	movl	$1887436800,32(%esp)
123238405Sjkim	movl	$1822425088,36(%esp)
124238405Sjkim	movl	$1220542464,40(%esp)
125238405Sjkim	movl	$1423966208,44(%esp)
126238405Sjkim	movl	$3774873600,48(%esp)
127238405Sjkim	movl	$4246732800,52(%esp)
128238405Sjkim	movl	$3644850176,56(%esp)
129238405Sjkim	movl	$3311403008,60(%esp)
130238405Sjkim	movl	$2441084928,64(%esp)
131238405Sjkim	movl	$2376073216,68(%esp)
132238405Sjkim	movl	$2847932416,72(%esp)
133238405Sjkim	movl	$3051356160,76(%esp)
134238405Sjkim.align	16
135238405Sjkim.L002x86_outer_loop:
136238405Sjkim	xorl	12(%edi),%ebx
137238405Sjkim	xorl	8(%edi),%ecx
138238405Sjkim	xorl	4(%edi),%edx
139238405Sjkim	xorl	(%edi),%ebp
140238405Sjkim	movl	%ebx,12(%esp)
141238405Sjkim	movl	%ecx,8(%esp)
142238405Sjkim	movl	%edx,4(%esp)
143238405Sjkim	movl	%ebp,(%esp)
144238405Sjkim	shrl	$20,%ebx
145238405Sjkim	andl	$240,%ebx
146238405Sjkim	movl	4(%esi,%ebx,1),%ebp
147238405Sjkim	movl	(%esi,%ebx,1),%edx
148238405Sjkim	movl	12(%esi,%ebx,1),%ecx
149238405Sjkim	movl	8(%esi,%ebx,1),%ebx
150238405Sjkim	xorl	%eax,%eax
151238405Sjkim	movl	$15,%edi
152238405Sjkim	jmp	.L003x86_loop
153238405Sjkim.align	16
154238405Sjkim.L003x86_loop:
155238405Sjkim	movb	%bl,%al
156238405Sjkim	shrdl	$4,%ecx,%ebx
157238405Sjkim	andb	$15,%al
158238405Sjkim	shrdl	$4,%edx,%ecx
159238405Sjkim	shrdl	$4,%ebp,%edx
160238405Sjkim	shrl	$4,%ebp
161238405Sjkim	xorl	16(%esp,%eax,4),%ebp
162238405Sjkim	movb	(%esp,%edi,1),%al
163238405Sjkim	andb	$240,%al
164238405Sjkim	xorl	8(%esi,%eax,1),%ebx
165238405Sjkim	xorl	12(%esi,%eax,1),%ecx
166238405Sjkim	xorl	(%esi,%eax,1),%edx
167238405Sjkim	xorl	4(%esi,%eax,1),%ebp
168238405Sjkim	decl	%edi
169238405Sjkim	js	.L004x86_break
170238405Sjkim	movb	%bl,%al
171238405Sjkim	shrdl	$4,%ecx,%ebx
172238405Sjkim	andb	$15,%al
173238405Sjkim	shrdl	$4,%edx,%ecx
174238405Sjkim	shrdl	$4,%ebp,%edx
175238405Sjkim	shrl	$4,%ebp
176238405Sjkim	xorl	16(%esp,%eax,4),%ebp
177238405Sjkim	movb	(%esp,%edi,1),%al
178238405Sjkim	shlb	$4,%al
179238405Sjkim	xorl	8(%esi,%eax,1),%ebx
180238405Sjkim	xorl	12(%esi,%eax,1),%ecx
181238405Sjkim	xorl	(%esi,%eax,1),%edx
182238405Sjkim	xorl	4(%esi,%eax,1),%ebp
183238405Sjkim	jmp	.L003x86_loop
184238405Sjkim.align	16
185238405Sjkim.L004x86_break:
186238405Sjkim	bswap	%ebx
187238405Sjkim	bswap	%ecx
188238405Sjkim	bswap	%edx
189238405Sjkim	bswap	%ebp
190238405Sjkim	movl	112(%esp),%edi
191238405Sjkim	leal	16(%edi),%edi
192238405Sjkim	cmpl	116(%esp),%edi
193238405Sjkim	movl	%edi,112(%esp)
194238405Sjkim	jb	.L002x86_outer_loop
195238405Sjkim	movl	104(%esp),%edi
196238405Sjkim	movl	%ebx,12(%edi)
197238405Sjkim	movl	%ecx,8(%edi)
198238405Sjkim	movl	%edx,4(%edi)
199238405Sjkim	movl	%ebp,(%edi)
200238405Sjkim	addl	$84,%esp
201238405Sjkim	popl	%edi
202238405Sjkim	popl	%esi
203238405Sjkim	popl	%ebx
204238405Sjkim	popl	%ebp
205238405Sjkim	ret
206238405Sjkim.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
207238405Sjkim.globl	gcm_gmult_4bit_mmx
208238405Sjkim.type	gcm_gmult_4bit_mmx,@function
209238405Sjkim.align	16
210238405Sjkimgcm_gmult_4bit_mmx:
211238405Sjkim.L_gcm_gmult_4bit_mmx_begin:
212238405Sjkim	pushl	%ebp
213238405Sjkim	pushl	%ebx
214238405Sjkim	pushl	%esi
215238405Sjkim	pushl	%edi
216238405Sjkim	movl	20(%esp),%edi
217238405Sjkim	movl	24(%esp),%esi
218238405Sjkim	call	.L005pic_point
219238405Sjkim.L005pic_point:
220238405Sjkim	popl	%eax
221238405Sjkim	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
222238405Sjkim	movzbl	15(%edi),%ebx
223238405Sjkim	xorl	%ecx,%ecx
224238405Sjkim	movl	%ebx,%edx
225238405Sjkim	movb	%dl,%cl
226238405Sjkim	movl	$14,%ebp
227238405Sjkim	shlb	$4,%cl
228238405Sjkim	andl	$240,%edx
229238405Sjkim	movq	8(%esi,%ecx,1),%mm0
230238405Sjkim	movq	(%esi,%ecx,1),%mm1
231238405Sjkim	movd	%mm0,%ebx
232238405Sjkim	jmp	.L006mmx_loop
233238405Sjkim.align	16
234238405Sjkim.L006mmx_loop:
235238405Sjkim	psrlq	$4,%mm0
236238405Sjkim	andl	$15,%ebx
237238405Sjkim	movq	%mm1,%mm2
238238405Sjkim	psrlq	$4,%mm1
239238405Sjkim	pxor	8(%esi,%edx,1),%mm0
240238405Sjkim	movb	(%edi,%ebp,1),%cl
241238405Sjkim	psllq	$60,%mm2
242238405Sjkim	pxor	(%eax,%ebx,8),%mm1
243238405Sjkim	decl	%ebp
244238405Sjkim	movd	%mm0,%ebx
245238405Sjkim	pxor	(%esi,%edx,1),%mm1
246238405Sjkim	movl	%ecx,%edx
247238405Sjkim	pxor	%mm2,%mm0
248238405Sjkim	js	.L007mmx_break
249238405Sjkim	shlb	$4,%cl
250238405Sjkim	andl	$15,%ebx
251238405Sjkim	psrlq	$4,%mm0
252238405Sjkim	andl	$240,%edx
253238405Sjkim	movq	%mm1,%mm2
254238405Sjkim	psrlq	$4,%mm1
255238405Sjkim	pxor	8(%esi,%ecx,1),%mm0
256238405Sjkim	psllq	$60,%mm2
257238405Sjkim	pxor	(%eax,%ebx,8),%mm1
258238405Sjkim	movd	%mm0,%ebx
259238405Sjkim	pxor	(%esi,%ecx,1),%mm1
260238405Sjkim	pxor	%mm2,%mm0
261238405Sjkim	jmp	.L006mmx_loop
262238405Sjkim.align	16
263238405Sjkim.L007mmx_break:
264238405Sjkim	shlb	$4,%cl
265238405Sjkim	andl	$15,%ebx
266238405Sjkim	psrlq	$4,%mm0
267238405Sjkim	andl	$240,%edx
268238405Sjkim	movq	%mm1,%mm2
269238405Sjkim	psrlq	$4,%mm1
270238405Sjkim	pxor	8(%esi,%ecx,1),%mm0
271238405Sjkim	psllq	$60,%mm2
272238405Sjkim	pxor	(%eax,%ebx,8),%mm1
273238405Sjkim	movd	%mm0,%ebx
274238405Sjkim	pxor	(%esi,%ecx,1),%mm1
275238405Sjkim	pxor	%mm2,%mm0
276238405Sjkim	psrlq	$4,%mm0
277238405Sjkim	andl	$15,%ebx
278238405Sjkim	movq	%mm1,%mm2
279238405Sjkim	psrlq	$4,%mm1
280238405Sjkim	pxor	8(%esi,%edx,1),%mm0
281238405Sjkim	psllq	$60,%mm2
282238405Sjkim	pxor	(%eax,%ebx,8),%mm1
283238405Sjkim	movd	%mm0,%ebx
284238405Sjkim	pxor	(%esi,%edx,1),%mm1
285238405Sjkim	pxor	%mm2,%mm0
286238405Sjkim	psrlq	$32,%mm0
287238405Sjkim	movd	%mm1,%edx
288238405Sjkim	psrlq	$32,%mm1
289238405Sjkim	movd	%mm0,%ecx
290238405Sjkim	movd	%mm1,%ebp
291238405Sjkim	bswap	%ebx
292238405Sjkim	bswap	%edx
293238405Sjkim	bswap	%ecx
294238405Sjkim	bswap	%ebp
295238405Sjkim	emms
296238405Sjkim	movl	%ebx,12(%edi)
297238405Sjkim	movl	%edx,4(%edi)
298238405Sjkim	movl	%ecx,8(%edi)
299238405Sjkim	movl	%ebp,(%edi)
300238405Sjkim	popl	%edi
301238405Sjkim	popl	%esi
302238405Sjkim	popl	%ebx
303238405Sjkim	popl	%ebp
304238405Sjkim	ret
305238405Sjkim.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
306238405Sjkim.globl	gcm_ghash_4bit_mmx
307238405Sjkim.type	gcm_ghash_4bit_mmx,@function
308238405Sjkim.align	16
309238405Sjkimgcm_ghash_4bit_mmx:
310238405Sjkim.L_gcm_ghash_4bit_mmx_begin:
311238405Sjkim	pushl	%ebp
312238405Sjkim	pushl	%ebx
313238405Sjkim	pushl	%esi
314238405Sjkim	pushl	%edi
315238405Sjkim	movl	20(%esp),%eax
316238405Sjkim	movl	24(%esp),%ebx
317238405Sjkim	movl	28(%esp),%ecx
318238405Sjkim	movl	32(%esp),%edx
319238405Sjkim	movl	%esp,%ebp
320238405Sjkim	call	.L008pic_point
321238405Sjkim.L008pic_point:
322238405Sjkim	popl	%esi
323238405Sjkim	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
324238405Sjkim	subl	$544,%esp
325238405Sjkim	andl	$-64,%esp
326238405Sjkim	subl	$16,%esp
327238405Sjkim	addl	%ecx,%edx
328238405Sjkim	movl	%eax,544(%esp)
329238405Sjkim	movl	%edx,552(%esp)
330238405Sjkim	movl	%ebp,556(%esp)
331238405Sjkim	addl	$128,%ebx
332238405Sjkim	leal	144(%esp),%edi
333238405Sjkim	leal	400(%esp),%ebp
334238405Sjkim	movl	-120(%ebx),%edx
335238405Sjkim	movq	-120(%ebx),%mm0
336238405Sjkim	movq	-128(%ebx),%mm3
337238405Sjkim	shll	$4,%edx
338238405Sjkim	movb	%dl,(%esp)
339238405Sjkim	movl	-104(%ebx),%edx
340238405Sjkim	movq	-104(%ebx),%mm2
341238405Sjkim	movq	-112(%ebx),%mm5
342238405Sjkim	movq	%mm0,-128(%edi)
343238405Sjkim	psrlq	$4,%mm0
344238405Sjkim	movq	%mm3,(%edi)
345238405Sjkim	movq	%mm3,%mm7
346238405Sjkim	psrlq	$4,%mm3
347238405Sjkim	shll	$4,%edx
348238405Sjkim	movb	%dl,1(%esp)
349238405Sjkim	movl	-88(%ebx),%edx
350238405Sjkim	movq	-88(%ebx),%mm1
351238405Sjkim	psllq	$60,%mm7
352238405Sjkim	movq	-96(%ebx),%mm4
353238405Sjkim	por	%mm7,%mm0
354238405Sjkim	movq	%mm2,-120(%edi)
355238405Sjkim	psrlq	$4,%mm2
356238405Sjkim	movq	%mm5,8(%edi)
357238405Sjkim	movq	%mm5,%mm6
358238405Sjkim	movq	%mm0,-128(%ebp)
359238405Sjkim	psrlq	$4,%mm5
360238405Sjkim	movq	%mm3,(%ebp)
361238405Sjkim	shll	$4,%edx
362238405Sjkim	movb	%dl,2(%esp)
363238405Sjkim	movl	-72(%ebx),%edx
364238405Sjkim	movq	-72(%ebx),%mm0
365238405Sjkim	psllq	$60,%mm6
366238405Sjkim	movq	-80(%ebx),%mm3
367238405Sjkim	por	%mm6,%mm2
368238405Sjkim	movq	%mm1,-112(%edi)
369238405Sjkim	psrlq	$4,%mm1
370238405Sjkim	movq	%mm4,16(%edi)
371238405Sjkim	movq	%mm4,%mm7
372238405Sjkim	movq	%mm2,-120(%ebp)
373238405Sjkim	psrlq	$4,%mm4
374238405Sjkim	movq	%mm5,8(%ebp)
375238405Sjkim	shll	$4,%edx
376238405Sjkim	movb	%dl,3(%esp)
377238405Sjkim	movl	-56(%ebx),%edx
378238405Sjkim	movq	-56(%ebx),%mm2
379238405Sjkim	psllq	$60,%mm7
380238405Sjkim	movq	-64(%ebx),%mm5
381238405Sjkim	por	%mm7,%mm1
382238405Sjkim	movq	%mm0,-104(%edi)
383238405Sjkim	psrlq	$4,%mm0
384238405Sjkim	movq	%mm3,24(%edi)
385238405Sjkim	movq	%mm3,%mm6
386238405Sjkim	movq	%mm1,-112(%ebp)
387238405Sjkim	psrlq	$4,%mm3
388238405Sjkim	movq	%mm4,16(%ebp)
389238405Sjkim	shll	$4,%edx
390238405Sjkim	movb	%dl,4(%esp)
391238405Sjkim	movl	-40(%ebx),%edx
392238405Sjkim	movq	-40(%ebx),%mm1
393238405Sjkim	psllq	$60,%mm6
394238405Sjkim	movq	-48(%ebx),%mm4
395238405Sjkim	por	%mm6,%mm0
396238405Sjkim	movq	%mm2,-96(%edi)
397238405Sjkim	psrlq	$4,%mm2
398238405Sjkim	movq	%mm5,32(%edi)
399238405Sjkim	movq	%mm5,%mm7
400238405Sjkim	movq	%mm0,-104(%ebp)
401238405Sjkim	psrlq	$4,%mm5
402238405Sjkim	movq	%mm3,24(%ebp)
403238405Sjkim	shll	$4,%edx
404238405Sjkim	movb	%dl,5(%esp)
405238405Sjkim	movl	-24(%ebx),%edx
406238405Sjkim	movq	-24(%ebx),%mm0
407238405Sjkim	psllq	$60,%mm7
408238405Sjkim	movq	-32(%ebx),%mm3
409238405Sjkim	por	%mm7,%mm2
410238405Sjkim	movq	%mm1,-88(%edi)
411238405Sjkim	psrlq	$4,%mm1
412238405Sjkim	movq	%mm4,40(%edi)
413238405Sjkim	movq	%mm4,%mm6
414238405Sjkim	movq	%mm2,-96(%ebp)
415238405Sjkim	psrlq	$4,%mm4
416238405Sjkim	movq	%mm5,32(%ebp)
417238405Sjkim	shll	$4,%edx
418238405Sjkim	movb	%dl,6(%esp)
419238405Sjkim	movl	-8(%ebx),%edx
420238405Sjkim	movq	-8(%ebx),%mm2
421238405Sjkim	psllq	$60,%mm6
422238405Sjkim	movq	-16(%ebx),%mm5
423238405Sjkim	por	%mm6,%mm1
424238405Sjkim	movq	%mm0,-80(%edi)
425238405Sjkim	psrlq	$4,%mm0
426238405Sjkim	movq	%mm3,48(%edi)
427238405Sjkim	movq	%mm3,%mm7
428238405Sjkim	movq	%mm1,-88(%ebp)
429238405Sjkim	psrlq	$4,%mm3
430238405Sjkim	movq	%mm4,40(%ebp)
431238405Sjkim	shll	$4,%edx
432238405Sjkim	movb	%dl,7(%esp)
433238405Sjkim	movl	8(%ebx),%edx
434238405Sjkim	movq	8(%ebx),%mm1
435238405Sjkim	psllq	$60,%mm7
436238405Sjkim	movq	(%ebx),%mm4
437238405Sjkim	por	%mm7,%mm0
438238405Sjkim	movq	%mm2,-72(%edi)
439238405Sjkim	psrlq	$4,%mm2
440238405Sjkim	movq	%mm5,56(%edi)
441238405Sjkim	movq	%mm5,%mm6
442238405Sjkim	movq	%mm0,-80(%ebp)
443238405Sjkim	psrlq	$4,%mm5
444238405Sjkim	movq	%mm3,48(%ebp)
445238405Sjkim	shll	$4,%edx
446238405Sjkim	movb	%dl,8(%esp)
447238405Sjkim	movl	24(%ebx),%edx
448238405Sjkim	movq	24(%ebx),%mm0
449238405Sjkim	psllq	$60,%mm6
450238405Sjkim	movq	16(%ebx),%mm3
451238405Sjkim	por	%mm6,%mm2
452238405Sjkim	movq	%mm1,-64(%edi)
453238405Sjkim	psrlq	$4,%mm1
454238405Sjkim	movq	%mm4,64(%edi)
455238405Sjkim	movq	%mm4,%mm7
456238405Sjkim	movq	%mm2,-72(%ebp)
457238405Sjkim	psrlq	$4,%mm4
458238405Sjkim	movq	%mm5,56(%ebp)
459238405Sjkim	shll	$4,%edx
460238405Sjkim	movb	%dl,9(%esp)
461238405Sjkim	movl	40(%ebx),%edx
462238405Sjkim	movq	40(%ebx),%mm2
463238405Sjkim	psllq	$60,%mm7
464238405Sjkim	movq	32(%ebx),%mm5
465238405Sjkim	por	%mm7,%mm1
466238405Sjkim	movq	%mm0,-56(%edi)
467238405Sjkim	psrlq	$4,%mm0
468238405Sjkim	movq	%mm3,72(%edi)
469238405Sjkim	movq	%mm3,%mm6
470238405Sjkim	movq	%mm1,-64(%ebp)
471238405Sjkim	psrlq	$4,%mm3
472238405Sjkim	movq	%mm4,64(%ebp)
473238405Sjkim	shll	$4,%edx
474238405Sjkim	movb	%dl,10(%esp)
475238405Sjkim	movl	56(%ebx),%edx
476238405Sjkim	movq	56(%ebx),%mm1
477238405Sjkim	psllq	$60,%mm6
478238405Sjkim	movq	48(%ebx),%mm4
479238405Sjkim	por	%mm6,%mm0
480238405Sjkim	movq	%mm2,-48(%edi)
481238405Sjkim	psrlq	$4,%mm2
482238405Sjkim	movq	%mm5,80(%edi)
483238405Sjkim	movq	%mm5,%mm7
484238405Sjkim	movq	%mm0,-56(%ebp)
485238405Sjkim	psrlq	$4,%mm5
486238405Sjkim	movq	%mm3,72(%ebp)
487238405Sjkim	shll	$4,%edx
488238405Sjkim	movb	%dl,11(%esp)
489238405Sjkim	movl	72(%ebx),%edx
490238405Sjkim	movq	72(%ebx),%mm0
491238405Sjkim	psllq	$60,%mm7
492238405Sjkim	movq	64(%ebx),%mm3
493238405Sjkim	por	%mm7,%mm2
494238405Sjkim	movq	%mm1,-40(%edi)
495238405Sjkim	psrlq	$4,%mm1
496238405Sjkim	movq	%mm4,88(%edi)
497238405Sjkim	movq	%mm4,%mm6
498238405Sjkim	movq	%mm2,-48(%ebp)
499238405Sjkim	psrlq	$4,%mm4
500238405Sjkim	movq	%mm5,80(%ebp)
501238405Sjkim	shll	$4,%edx
502238405Sjkim	movb	%dl,12(%esp)
503238405Sjkim	movl	88(%ebx),%edx
504238405Sjkim	movq	88(%ebx),%mm2
505238405Sjkim	psllq	$60,%mm6
506238405Sjkim	movq	80(%ebx),%mm5
507238405Sjkim	por	%mm6,%mm1
508238405Sjkim	movq	%mm0,-32(%edi)
509238405Sjkim	psrlq	$4,%mm0
510238405Sjkim	movq	%mm3,96(%edi)
511238405Sjkim	movq	%mm3,%mm7
512238405Sjkim	movq	%mm1,-40(%ebp)
513238405Sjkim	psrlq	$4,%mm3
514238405Sjkim	movq	%mm4,88(%ebp)
515238405Sjkim	shll	$4,%edx
516238405Sjkim	movb	%dl,13(%esp)
517238405Sjkim	movl	104(%ebx),%edx
518238405Sjkim	movq	104(%ebx),%mm1
519238405Sjkim	psllq	$60,%mm7
520238405Sjkim	movq	96(%ebx),%mm4
521238405Sjkim	por	%mm7,%mm0
522238405Sjkim	movq	%mm2,-24(%edi)
523238405Sjkim	psrlq	$4,%mm2
524238405Sjkim	movq	%mm5,104(%edi)
525238405Sjkim	movq	%mm5,%mm6
526238405Sjkim	movq	%mm0,-32(%ebp)
527238405Sjkim	psrlq	$4,%mm5
528238405Sjkim	movq	%mm3,96(%ebp)
529238405Sjkim	shll	$4,%edx
530238405Sjkim	movb	%dl,14(%esp)
531238405Sjkim	movl	120(%ebx),%edx
532238405Sjkim	movq	120(%ebx),%mm0
533238405Sjkim	psllq	$60,%mm6
534238405Sjkim	movq	112(%ebx),%mm3
535238405Sjkim	por	%mm6,%mm2
536238405Sjkim	movq	%mm1,-16(%edi)
537238405Sjkim	psrlq	$4,%mm1
538238405Sjkim	movq	%mm4,112(%edi)
539238405Sjkim	movq	%mm4,%mm7
540238405Sjkim	movq	%mm2,-24(%ebp)
541238405Sjkim	psrlq	$4,%mm4
542238405Sjkim	movq	%mm5,104(%ebp)
543238405Sjkim	shll	$4,%edx
544238405Sjkim	movb	%dl,15(%esp)
545238405Sjkim	psllq	$60,%mm7
546238405Sjkim	por	%mm7,%mm1
547238405Sjkim	movq	%mm0,-8(%edi)
548238405Sjkim	psrlq	$4,%mm0
549238405Sjkim	movq	%mm3,120(%edi)
550238405Sjkim	movq	%mm3,%mm6
551238405Sjkim	movq	%mm1,-16(%ebp)
552238405Sjkim	psrlq	$4,%mm3
553238405Sjkim	movq	%mm4,112(%ebp)
554238405Sjkim	psllq	$60,%mm6
555238405Sjkim	por	%mm6,%mm0
556238405Sjkim	movq	%mm0,-8(%ebp)
557238405Sjkim	movq	%mm3,120(%ebp)
558238405Sjkim	movq	(%eax),%mm6
559238405Sjkim	movl	8(%eax),%ebx
560238405Sjkim	movl	12(%eax),%edx
561238405Sjkim.align	16
562238405Sjkim.L009outer:
563238405Sjkim	xorl	12(%ecx),%edx
564238405Sjkim	xorl	8(%ecx),%ebx
565238405Sjkim	pxor	(%ecx),%mm6
566238405Sjkim	leal	16(%ecx),%ecx
567238405Sjkim	movl	%ebx,536(%esp)
568238405Sjkim	movq	%mm6,528(%esp)
569238405Sjkim	movl	%ecx,548(%esp)
570238405Sjkim	xorl	%eax,%eax
571238405Sjkim	roll	$8,%edx
572238405Sjkim	movb	%dl,%al
573238405Sjkim	movl	%eax,%ebp
574238405Sjkim	andb	$15,%al
575238405Sjkim	shrl	$4,%ebp
576238405Sjkim	pxor	%mm0,%mm0
577238405Sjkim	roll	$8,%edx
578238405Sjkim	pxor	%mm1,%mm1
579238405Sjkim	pxor	%mm2,%mm2
580238405Sjkim	movq	16(%esp,%eax,8),%mm7
581238405Sjkim	movq	144(%esp,%eax,8),%mm6
582238405Sjkim	movb	%dl,%al
583238405Sjkim	movd	%mm7,%ebx
584238405Sjkim	psrlq	$8,%mm7
585238405Sjkim	movq	%mm6,%mm3
586238405Sjkim	movl	%eax,%edi
587238405Sjkim	psrlq	$8,%mm6
588238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
589238405Sjkim	andb	$15,%al
590238405Sjkim	psllq	$56,%mm3
591238405Sjkim	shrl	$4,%edi
592238405Sjkim	pxor	16(%esp,%eax,8),%mm7
593238405Sjkim	roll	$8,%edx
594238405Sjkim	pxor	144(%esp,%eax,8),%mm6
595238405Sjkim	pxor	%mm3,%mm7
596238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
597238405Sjkim	xorb	(%esp,%ebp,1),%bl
598238405Sjkim	movb	%dl,%al
599238405Sjkim	movd	%mm7,%ecx
600238405Sjkim	movzbl	%bl,%ebx
601238405Sjkim	psrlq	$8,%mm7
602238405Sjkim	movq	%mm6,%mm3
603238405Sjkim	movl	%eax,%ebp
604238405Sjkim	psrlq	$8,%mm6
605238405Sjkim	pxor	272(%esp,%edi,8),%mm7
606238405Sjkim	andb	$15,%al
607238405Sjkim	psllq	$56,%mm3
608238405Sjkim	shrl	$4,%ebp
609238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm2
610238405Sjkim	pxor	16(%esp,%eax,8),%mm7
611238405Sjkim	roll	$8,%edx
612238405Sjkim	pxor	144(%esp,%eax,8),%mm6
613238405Sjkim	pxor	%mm3,%mm7
614238405Sjkim	pxor	400(%esp,%edi,8),%mm6
615238405Sjkim	xorb	(%esp,%edi,1),%cl
616238405Sjkim	movb	%dl,%al
617238405Sjkim	movl	536(%esp),%edx
618238405Sjkim	movd	%mm7,%ebx
619238405Sjkim	movzbl	%cl,%ecx
620238405Sjkim	psrlq	$8,%mm7
621238405Sjkim	movq	%mm6,%mm3
622238405Sjkim	movl	%eax,%edi
623238405Sjkim	psrlq	$8,%mm6
624238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
625238405Sjkim	andb	$15,%al
626238405Sjkim	psllq	$56,%mm3
627238405Sjkim	pxor	%mm2,%mm6
628238405Sjkim	shrl	$4,%edi
629238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm1
630238405Sjkim	pxor	16(%esp,%eax,8),%mm7
631238405Sjkim	roll	$8,%edx
632238405Sjkim	pxor	144(%esp,%eax,8),%mm6
633238405Sjkim	pxor	%mm3,%mm7
634238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
635238405Sjkim	xorb	(%esp,%ebp,1),%bl
636238405Sjkim	movb	%dl,%al
637238405Sjkim	movd	%mm7,%ecx
638238405Sjkim	movzbl	%bl,%ebx
639238405Sjkim	psrlq	$8,%mm7
640238405Sjkim	movq	%mm6,%mm3
641238405Sjkim	movl	%eax,%ebp
642238405Sjkim	psrlq	$8,%mm6
643238405Sjkim	pxor	272(%esp,%edi,8),%mm7
644238405Sjkim	andb	$15,%al
645238405Sjkim	psllq	$56,%mm3
646238405Sjkim	pxor	%mm1,%mm6
647238405Sjkim	shrl	$4,%ebp
648238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm0
649238405Sjkim	pxor	16(%esp,%eax,8),%mm7
650238405Sjkim	roll	$8,%edx
651238405Sjkim	pxor	144(%esp,%eax,8),%mm6
652238405Sjkim	pxor	%mm3,%mm7
653238405Sjkim	pxor	400(%esp,%edi,8),%mm6
654238405Sjkim	xorb	(%esp,%edi,1),%cl
655238405Sjkim	movb	%dl,%al
656238405Sjkim	movd	%mm7,%ebx
657238405Sjkim	movzbl	%cl,%ecx
658238405Sjkim	psrlq	$8,%mm7
659238405Sjkim	movq	%mm6,%mm3
660238405Sjkim	movl	%eax,%edi
661238405Sjkim	psrlq	$8,%mm6
662238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
663238405Sjkim	andb	$15,%al
664238405Sjkim	psllq	$56,%mm3
665238405Sjkim	pxor	%mm0,%mm6
666238405Sjkim	shrl	$4,%edi
667238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm2
668238405Sjkim	pxor	16(%esp,%eax,8),%mm7
669238405Sjkim	roll	$8,%edx
670238405Sjkim	pxor	144(%esp,%eax,8),%mm6
671238405Sjkim	pxor	%mm3,%mm7
672238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
673238405Sjkim	xorb	(%esp,%ebp,1),%bl
674238405Sjkim	movb	%dl,%al
675238405Sjkim	movd	%mm7,%ecx
676238405Sjkim	movzbl	%bl,%ebx
677238405Sjkim	psrlq	$8,%mm7
678238405Sjkim	movq	%mm6,%mm3
679238405Sjkim	movl	%eax,%ebp
680238405Sjkim	psrlq	$8,%mm6
681238405Sjkim	pxor	272(%esp,%edi,8),%mm7
682238405Sjkim	andb	$15,%al
683238405Sjkim	psllq	$56,%mm3
684238405Sjkim	pxor	%mm2,%mm6
685238405Sjkim	shrl	$4,%ebp
686238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm1
687238405Sjkim	pxor	16(%esp,%eax,8),%mm7
688238405Sjkim	roll	$8,%edx
689238405Sjkim	pxor	144(%esp,%eax,8),%mm6
690238405Sjkim	pxor	%mm3,%mm7
691238405Sjkim	pxor	400(%esp,%edi,8),%mm6
692238405Sjkim	xorb	(%esp,%edi,1),%cl
693238405Sjkim	movb	%dl,%al
694238405Sjkim	movl	532(%esp),%edx
695238405Sjkim	movd	%mm7,%ebx
696238405Sjkim	movzbl	%cl,%ecx
697238405Sjkim	psrlq	$8,%mm7
698238405Sjkim	movq	%mm6,%mm3
699238405Sjkim	movl	%eax,%edi
700238405Sjkim	psrlq	$8,%mm6
701238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
702238405Sjkim	andb	$15,%al
703238405Sjkim	psllq	$56,%mm3
704238405Sjkim	pxor	%mm1,%mm6
705238405Sjkim	shrl	$4,%edi
706238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm0
707238405Sjkim	pxor	16(%esp,%eax,8),%mm7
708238405Sjkim	roll	$8,%edx
709238405Sjkim	pxor	144(%esp,%eax,8),%mm6
710238405Sjkim	pxor	%mm3,%mm7
711238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
712238405Sjkim	xorb	(%esp,%ebp,1),%bl
713238405Sjkim	movb	%dl,%al
714238405Sjkim	movd	%mm7,%ecx
715238405Sjkim	movzbl	%bl,%ebx
716238405Sjkim	psrlq	$8,%mm7
717238405Sjkim	movq	%mm6,%mm3
718238405Sjkim	movl	%eax,%ebp
719238405Sjkim	psrlq	$8,%mm6
720238405Sjkim	pxor	272(%esp,%edi,8),%mm7
721238405Sjkim	andb	$15,%al
722238405Sjkim	psllq	$56,%mm3
723238405Sjkim	pxor	%mm0,%mm6
724238405Sjkim	shrl	$4,%ebp
725238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm2
726238405Sjkim	pxor	16(%esp,%eax,8),%mm7
727238405Sjkim	roll	$8,%edx
728238405Sjkim	pxor	144(%esp,%eax,8),%mm6
729238405Sjkim	pxor	%mm3,%mm7
730238405Sjkim	pxor	400(%esp,%edi,8),%mm6
731238405Sjkim	xorb	(%esp,%edi,1),%cl
732238405Sjkim	movb	%dl,%al
733238405Sjkim	movd	%mm7,%ebx
734238405Sjkim	movzbl	%cl,%ecx
735238405Sjkim	psrlq	$8,%mm7
736238405Sjkim	movq	%mm6,%mm3
737238405Sjkim	movl	%eax,%edi
738238405Sjkim	psrlq	$8,%mm6
739238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
740238405Sjkim	andb	$15,%al
741238405Sjkim	psllq	$56,%mm3
742238405Sjkim	pxor	%mm2,%mm6
743238405Sjkim	shrl	$4,%edi
744238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm1
745238405Sjkim	pxor	16(%esp,%eax,8),%mm7
746238405Sjkim	roll	$8,%edx
747238405Sjkim	pxor	144(%esp,%eax,8),%mm6
748238405Sjkim	pxor	%mm3,%mm7
749238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
750238405Sjkim	xorb	(%esp,%ebp,1),%bl
751238405Sjkim	movb	%dl,%al
752238405Sjkim	movd	%mm7,%ecx
753238405Sjkim	movzbl	%bl,%ebx
754238405Sjkim	psrlq	$8,%mm7
755238405Sjkim	movq	%mm6,%mm3
756238405Sjkim	movl	%eax,%ebp
757238405Sjkim	psrlq	$8,%mm6
758238405Sjkim	pxor	272(%esp,%edi,8),%mm7
759238405Sjkim	andb	$15,%al
760238405Sjkim	psllq	$56,%mm3
761238405Sjkim	pxor	%mm1,%mm6
762238405Sjkim	shrl	$4,%ebp
763238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm0
764238405Sjkim	pxor	16(%esp,%eax,8),%mm7
765238405Sjkim	roll	$8,%edx
766238405Sjkim	pxor	144(%esp,%eax,8),%mm6
767238405Sjkim	pxor	%mm3,%mm7
768238405Sjkim	pxor	400(%esp,%edi,8),%mm6
769238405Sjkim	xorb	(%esp,%edi,1),%cl
770238405Sjkim	movb	%dl,%al
771238405Sjkim	movl	528(%esp),%edx
772238405Sjkim	movd	%mm7,%ebx
773238405Sjkim	movzbl	%cl,%ecx
774238405Sjkim	psrlq	$8,%mm7
775238405Sjkim	movq	%mm6,%mm3
776238405Sjkim	movl	%eax,%edi
777238405Sjkim	psrlq	$8,%mm6
778238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
779238405Sjkim	andb	$15,%al
780238405Sjkim	psllq	$56,%mm3
781238405Sjkim	pxor	%mm0,%mm6
782238405Sjkim	shrl	$4,%edi
783238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm2
784238405Sjkim	pxor	16(%esp,%eax,8),%mm7
785238405Sjkim	roll	$8,%edx
786238405Sjkim	pxor	144(%esp,%eax,8),%mm6
787238405Sjkim	pxor	%mm3,%mm7
788238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
789238405Sjkim	xorb	(%esp,%ebp,1),%bl
790238405Sjkim	movb	%dl,%al
791238405Sjkim	movd	%mm7,%ecx
792238405Sjkim	movzbl	%bl,%ebx
793238405Sjkim	psrlq	$8,%mm7
794238405Sjkim	movq	%mm6,%mm3
795238405Sjkim	movl	%eax,%ebp
796238405Sjkim	psrlq	$8,%mm6
797238405Sjkim	pxor	272(%esp,%edi,8),%mm7
798238405Sjkim	andb	$15,%al
799238405Sjkim	psllq	$56,%mm3
800238405Sjkim	pxor	%mm2,%mm6
801238405Sjkim	shrl	$4,%ebp
802238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm1
803238405Sjkim	pxor	16(%esp,%eax,8),%mm7
804238405Sjkim	roll	$8,%edx
805238405Sjkim	pxor	144(%esp,%eax,8),%mm6
806238405Sjkim	pxor	%mm3,%mm7
807238405Sjkim	pxor	400(%esp,%edi,8),%mm6
808238405Sjkim	xorb	(%esp,%edi,1),%cl
809238405Sjkim	movb	%dl,%al
810238405Sjkim	movd	%mm7,%ebx
811238405Sjkim	movzbl	%cl,%ecx
812238405Sjkim	psrlq	$8,%mm7
813238405Sjkim	movq	%mm6,%mm3
814238405Sjkim	movl	%eax,%edi
815238405Sjkim	psrlq	$8,%mm6
816238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
817238405Sjkim	andb	$15,%al
818238405Sjkim	psllq	$56,%mm3
819238405Sjkim	pxor	%mm1,%mm6
820238405Sjkim	shrl	$4,%edi
821238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm0
822238405Sjkim	pxor	16(%esp,%eax,8),%mm7
823238405Sjkim	roll	$8,%edx
824238405Sjkim	pxor	144(%esp,%eax,8),%mm6
825238405Sjkim	pxor	%mm3,%mm7
826238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
827238405Sjkim	xorb	(%esp,%ebp,1),%bl
828238405Sjkim	movb	%dl,%al
829238405Sjkim	movd	%mm7,%ecx
830238405Sjkim	movzbl	%bl,%ebx
831238405Sjkim	psrlq	$8,%mm7
832238405Sjkim	movq	%mm6,%mm3
833238405Sjkim	movl	%eax,%ebp
834238405Sjkim	psrlq	$8,%mm6
835238405Sjkim	pxor	272(%esp,%edi,8),%mm7
836238405Sjkim	andb	$15,%al
837238405Sjkim	psllq	$56,%mm3
838238405Sjkim	pxor	%mm0,%mm6
839238405Sjkim	shrl	$4,%ebp
840238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm2
841238405Sjkim	pxor	16(%esp,%eax,8),%mm7
842238405Sjkim	roll	$8,%edx
843238405Sjkim	pxor	144(%esp,%eax,8),%mm6
844238405Sjkim	pxor	%mm3,%mm7
845238405Sjkim	pxor	400(%esp,%edi,8),%mm6
846238405Sjkim	xorb	(%esp,%edi,1),%cl
847238405Sjkim	movb	%dl,%al
848238405Sjkim	movl	524(%esp),%edx
849238405Sjkim	movd	%mm7,%ebx
850238405Sjkim	movzbl	%cl,%ecx
851238405Sjkim	psrlq	$8,%mm7
852238405Sjkim	movq	%mm6,%mm3
853238405Sjkim	movl	%eax,%edi
854238405Sjkim	psrlq	$8,%mm6
855238405Sjkim	pxor	272(%esp,%ebp,8),%mm7
856238405Sjkim	andb	$15,%al
857238405Sjkim	psllq	$56,%mm3
858238405Sjkim	pxor	%mm2,%mm6
859238405Sjkim	shrl	$4,%edi
860238405Sjkim	pinsrw	$2,(%esi,%ecx,2),%mm1
861238405Sjkim	pxor	16(%esp,%eax,8),%mm7
862238405Sjkim	pxor	144(%esp,%eax,8),%mm6
863238405Sjkim	xorb	(%esp,%ebp,1),%bl
864238405Sjkim	pxor	%mm3,%mm7
865238405Sjkim	pxor	400(%esp,%ebp,8),%mm6
866238405Sjkim	movzbl	%bl,%ebx
867238405Sjkim	pxor	%mm2,%mm2
868238405Sjkim	psllq	$4,%mm1
869238405Sjkim	movd	%mm7,%ecx
870238405Sjkim	psrlq	$4,%mm7
871238405Sjkim	movq	%mm6,%mm3
872238405Sjkim	psrlq	$4,%mm6
873238405Sjkim	shll	$4,%ecx
874238405Sjkim	pxor	16(%esp,%edi,8),%mm7
875238405Sjkim	psllq	$60,%mm3
876238405Sjkim	movzbl	%cl,%ecx
877238405Sjkim	pxor	%mm3,%mm7
878238405Sjkim	pxor	144(%esp,%edi,8),%mm6
879238405Sjkim	pinsrw	$2,(%esi,%ebx,2),%mm0
880238405Sjkim	pxor	%mm1,%mm6
881238405Sjkim	movd	%mm7,%edx
882238405Sjkim	pinsrw	$3,(%esi,%ecx,2),%mm2
883238405Sjkim	psllq	$12,%mm0
884238405Sjkim	pxor	%mm0,%mm6
885238405Sjkim	psrlq	$32,%mm7
886238405Sjkim	pxor	%mm2,%mm6
887238405Sjkim	movl	548(%esp),%ecx
888238405Sjkim	movd	%mm7,%ebx
889238405Sjkim	movq	%mm6,%mm3
890238405Sjkim	psllw	$8,%mm6
891238405Sjkim	psrlw	$8,%mm3
892238405Sjkim	por	%mm3,%mm6
893238405Sjkim	bswap	%edx
894238405Sjkim	pshufw	$27,%mm6,%mm6
895238405Sjkim	bswap	%ebx
896238405Sjkim	cmpl	552(%esp),%ecx
897238405Sjkim	jne	.L009outer
898238405Sjkim	movl	544(%esp),%eax
899238405Sjkim	movl	%edx,12(%eax)
900238405Sjkim	movl	%ebx,8(%eax)
901238405Sjkim	movq	%mm6,(%eax)
902238405Sjkim	movl	556(%esp),%esp
903238405Sjkim	emms
904238405Sjkim	popl	%edi
905238405Sjkim	popl	%esi
906238405Sjkim	popl	%ebx
907238405Sjkim	popl	%ebp
908238405Sjkim	ret
909238405Sjkim.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
910238405Sjkim.globl	gcm_init_clmul
911238405Sjkim.type	gcm_init_clmul,@function
912238405Sjkim.align	16
913238405Sjkimgcm_init_clmul:
914238405Sjkim.L_gcm_init_clmul_begin:
915238405Sjkim	movl	4(%esp),%edx
916238405Sjkim	movl	8(%esp),%eax
917238405Sjkim	call	.L010pic
918238405Sjkim.L010pic:
919238405Sjkim	popl	%ecx
920238405Sjkim	leal	.Lbswap-.L010pic(%ecx),%ecx
921238405Sjkim	movdqu	(%eax),%xmm2
922238405Sjkim	pshufd	$78,%xmm2,%xmm2
923238405Sjkim	pshufd	$255,%xmm2,%xmm4
924238405Sjkim	movdqa	%xmm2,%xmm3
925238405Sjkim	psllq	$1,%xmm2
926238405Sjkim	pxor	%xmm5,%xmm5
927238405Sjkim	psrlq	$63,%xmm3
928238405Sjkim	pcmpgtd	%xmm4,%xmm5
929238405Sjkim	pslldq	$8,%xmm3
930238405Sjkim	por	%xmm3,%xmm2
931238405Sjkim	pand	16(%ecx),%xmm5
932238405Sjkim	pxor	%xmm5,%xmm2
933238405Sjkim	movdqa	%xmm2,%xmm0
934238405Sjkim	movdqa	%xmm0,%xmm1
935238405Sjkim	pshufd	$78,%xmm0,%xmm3
936238405Sjkim	pshufd	$78,%xmm2,%xmm4
937238405Sjkim	pxor	%xmm0,%xmm3
938238405Sjkim	pxor	%xmm2,%xmm4
939238405Sjkim.byte	102,15,58,68,194,0
940238405Sjkim.byte	102,15,58,68,202,17
941238405Sjkim.byte	102,15,58,68,220,0
942238405Sjkim	xorps	%xmm0,%xmm3
943238405Sjkim	xorps	%xmm1,%xmm3
944238405Sjkim	movdqa	%xmm3,%xmm4
945238405Sjkim	psrldq	$8,%xmm3
946238405Sjkim	pslldq	$8,%xmm4
947238405Sjkim	pxor	%xmm3,%xmm1
948238405Sjkim	pxor	%xmm4,%xmm0
949238405Sjkim	movdqa	%xmm0,%xmm3
950238405Sjkim	psllq	$1,%xmm0
951238405Sjkim	pxor	%xmm3,%xmm0
952238405Sjkim	psllq	$5,%xmm0
953238405Sjkim	pxor	%xmm3,%xmm0
954238405Sjkim	psllq	$57,%xmm0
955238405Sjkim	movdqa	%xmm0,%xmm4
956238405Sjkim	pslldq	$8,%xmm0
957238405Sjkim	psrldq	$8,%xmm4
958238405Sjkim	pxor	%xmm3,%xmm0
959238405Sjkim	pxor	%xmm4,%xmm1
960238405Sjkim	movdqa	%xmm0,%xmm4
961238405Sjkim	psrlq	$5,%xmm0
962238405Sjkim	pxor	%xmm4,%xmm0
963238405Sjkim	psrlq	$1,%xmm0
964238405Sjkim	pxor	%xmm4,%xmm0
965238405Sjkim	pxor	%xmm1,%xmm4
966238405Sjkim	psrlq	$1,%xmm0
967238405Sjkim	pxor	%xmm4,%xmm0
968238405Sjkim	movdqu	%xmm2,(%edx)
969238405Sjkim	movdqu	%xmm0,16(%edx)
970238405Sjkim	ret
971238405Sjkim.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
972238405Sjkim.globl	gcm_gmult_clmul
973238405Sjkim.type	gcm_gmult_clmul,@function
974238405Sjkim.align	16
975238405Sjkimgcm_gmult_clmul:
976238405Sjkim.L_gcm_gmult_clmul_begin:
977238405Sjkim	movl	4(%esp),%eax
978238405Sjkim	movl	8(%esp),%edx
979238405Sjkim	call	.L011pic
980238405Sjkim.L011pic:
981238405Sjkim	popl	%ecx
982238405Sjkim	leal	.Lbswap-.L011pic(%ecx),%ecx
983238405Sjkim	movdqu	(%eax),%xmm0
984238405Sjkim	movdqa	(%ecx),%xmm5
985238405Sjkim	movups	(%edx),%xmm2
986238405Sjkim.byte	102,15,56,0,197
987238405Sjkim	movdqa	%xmm0,%xmm1
988238405Sjkim	pshufd	$78,%xmm0,%xmm3
989238405Sjkim	pshufd	$78,%xmm2,%xmm4
990238405Sjkim	pxor	%xmm0,%xmm3
991238405Sjkim	pxor	%xmm2,%xmm4
992238405Sjkim.byte	102,15,58,68,194,0
993238405Sjkim.byte	102,15,58,68,202,17
994238405Sjkim.byte	102,15,58,68,220,0
995238405Sjkim	xorps	%xmm0,%xmm3
996238405Sjkim	xorps	%xmm1,%xmm3
997238405Sjkim	movdqa	%xmm3,%xmm4
998238405Sjkim	psrldq	$8,%xmm3
999238405Sjkim	pslldq	$8,%xmm4
1000238405Sjkim	pxor	%xmm3,%xmm1
1001238405Sjkim	pxor	%xmm4,%xmm0
1002238405Sjkim	movdqa	%xmm0,%xmm3
1003238405Sjkim	psllq	$1,%xmm0
1004238405Sjkim	pxor	%xmm3,%xmm0
1005238405Sjkim	psllq	$5,%xmm0
1006238405Sjkim	pxor	%xmm3,%xmm0
1007238405Sjkim	psllq	$57,%xmm0
1008238405Sjkim	movdqa	%xmm0,%xmm4
1009238405Sjkim	pslldq	$8,%xmm0
1010238405Sjkim	psrldq	$8,%xmm4
1011238405Sjkim	pxor	%xmm3,%xmm0
1012238405Sjkim	pxor	%xmm4,%xmm1
1013238405Sjkim	movdqa	%xmm0,%xmm4
1014238405Sjkim	psrlq	$5,%xmm0
1015238405Sjkim	pxor	%xmm4,%xmm0
1016238405Sjkim	psrlq	$1,%xmm0
1017238405Sjkim	pxor	%xmm4,%xmm0
1018238405Sjkim	pxor	%xmm1,%xmm4
1019238405Sjkim	psrlq	$1,%xmm0
1020238405Sjkim	pxor	%xmm4,%xmm0
1021238405Sjkim.byte	102,15,56,0,197
1022238405Sjkim	movdqu	%xmm0,(%eax)
1023238405Sjkim	ret
1024238405Sjkim.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1025238405Sjkim.globl	gcm_ghash_clmul
1026238405Sjkim.type	gcm_ghash_clmul,@function
1027238405Sjkim.align	16
1028238405Sjkimgcm_ghash_clmul:
1029238405Sjkim.L_gcm_ghash_clmul_begin:
1030238405Sjkim	pushl	%ebp
1031238405Sjkim	pushl	%ebx
1032238405Sjkim	pushl	%esi
1033238405Sjkim	pushl	%edi
1034238405Sjkim	movl	20(%esp),%eax
1035238405Sjkim	movl	24(%esp),%edx
1036238405Sjkim	movl	28(%esp),%esi
1037238405Sjkim	movl	32(%esp),%ebx
1038238405Sjkim	call	.L012pic
1039238405Sjkim.L012pic:
1040238405Sjkim	popl	%ecx
1041238405Sjkim	leal	.Lbswap-.L012pic(%ecx),%ecx
1042238405Sjkim	movdqu	(%eax),%xmm0
1043238405Sjkim	movdqa	(%ecx),%xmm5
1044238405Sjkim	movdqu	(%edx),%xmm2
1045238405Sjkim.byte	102,15,56,0,197
1046238405Sjkim	subl	$16,%ebx
1047238405Sjkim	jz	.L013odd_tail
1048238405Sjkim	movdqu	(%esi),%xmm3
1049238405Sjkim	movdqu	16(%esi),%xmm6
1050238405Sjkim.byte	102,15,56,0,221
1051238405Sjkim.byte	102,15,56,0,245
1052238405Sjkim	pxor	%xmm3,%xmm0
1053238405Sjkim	movdqa	%xmm6,%xmm7
1054238405Sjkim	pshufd	$78,%xmm6,%xmm3
1055238405Sjkim	pshufd	$78,%xmm2,%xmm4
1056238405Sjkim	pxor	%xmm6,%xmm3
1057238405Sjkim	pxor	%xmm2,%xmm4
1058238405Sjkim.byte	102,15,58,68,242,0
1059238405Sjkim.byte	102,15,58,68,250,17
1060238405Sjkim.byte	102,15,58,68,220,0
1061238405Sjkim	xorps	%xmm6,%xmm3
1062238405Sjkim	xorps	%xmm7,%xmm3
1063238405Sjkim	movdqa	%xmm3,%xmm4
1064238405Sjkim	psrldq	$8,%xmm3
1065238405Sjkim	pslldq	$8,%xmm4
1066238405Sjkim	pxor	%xmm3,%xmm7
1067238405Sjkim	pxor	%xmm4,%xmm6
1068238405Sjkim	movups	16(%edx),%xmm2
1069238405Sjkim	leal	32(%esi),%esi
1070238405Sjkim	subl	$32,%ebx
1071238405Sjkim	jbe	.L014even_tail
1072238405Sjkim.L015mod_loop:
1073238405Sjkim	movdqa	%xmm0,%xmm1
1074238405Sjkim	pshufd	$78,%xmm0,%xmm3
1075238405Sjkim	pshufd	$78,%xmm2,%xmm4
1076238405Sjkim	pxor	%xmm0,%xmm3
1077238405Sjkim	pxor	%xmm2,%xmm4
1078238405Sjkim.byte	102,15,58,68,194,0
1079238405Sjkim.byte	102,15,58,68,202,17
1080238405Sjkim.byte	102,15,58,68,220,0
1081238405Sjkim	xorps	%xmm0,%xmm3
1082238405Sjkim	xorps	%xmm1,%xmm3
1083238405Sjkim	movdqa	%xmm3,%xmm4
1084238405Sjkim	psrldq	$8,%xmm3
1085238405Sjkim	pslldq	$8,%xmm4
1086238405Sjkim	pxor	%xmm3,%xmm1
1087238405Sjkim	pxor	%xmm4,%xmm0
1088238405Sjkim	movdqu	(%esi),%xmm3
1089238405Sjkim	movups	(%edx),%xmm2
1090238405Sjkim	pxor	%xmm6,%xmm0
1091238405Sjkim	pxor	%xmm7,%xmm1
1092238405Sjkim	movdqu	16(%esi),%xmm6
1093238405Sjkim.byte	102,15,56,0,221
1094238405Sjkim.byte	102,15,56,0,245
1095238405Sjkim	movdqa	%xmm6,%xmm5
1096238405Sjkim	movdqa	%xmm6,%xmm7
1097238405Sjkim	pxor	%xmm3,%xmm1
1098238405Sjkim	movdqa	%xmm0,%xmm3
1099238405Sjkim	psllq	$1,%xmm0
1100238405Sjkim	pxor	%xmm3,%xmm0
1101238405Sjkim	psllq	$5,%xmm0
1102238405Sjkim	pxor	%xmm3,%xmm0
1103238405Sjkim.byte	102,15,58,68,242,0
1104238405Sjkim	psllq	$57,%xmm0
1105238405Sjkim	movdqa	%xmm0,%xmm4
1106238405Sjkim	pslldq	$8,%xmm0
1107238405Sjkim	psrldq	$8,%xmm4
1108238405Sjkim	pxor	%xmm3,%xmm0
1109238405Sjkim	pshufd	$78,%xmm5,%xmm3
1110238405Sjkim	pxor	%xmm4,%xmm1
1111238405Sjkim	pxor	%xmm5,%xmm3
1112238405Sjkim	pshufd	$78,%xmm2,%xmm5
1113238405Sjkim	pxor	%xmm2,%xmm5
1114238405Sjkim.byte	102,15,58,68,250,17
1115238405Sjkim	movdqa	%xmm0,%xmm4
1116238405Sjkim	psrlq	$5,%xmm0
1117238405Sjkim	pxor	%xmm4,%xmm0
1118238405Sjkim	psrlq	$1,%xmm0
1119238405Sjkim	pxor	%xmm4,%xmm0
1120238405Sjkim	pxor	%xmm1,%xmm4
1121238405Sjkim	psrlq	$1,%xmm0
1122238405Sjkim	pxor	%xmm4,%xmm0
1123238405Sjkim.byte	102,15,58,68,221,0
1124238405Sjkim	movups	16(%edx),%xmm2
1125238405Sjkim	xorps	%xmm6,%xmm3
1126238405Sjkim	xorps	%xmm7,%xmm3
1127238405Sjkim	movdqa	%xmm3,%xmm5
1128238405Sjkim	psrldq	$8,%xmm3
1129238405Sjkim	pslldq	$8,%xmm5
1130238405Sjkim	pxor	%xmm3,%xmm7
1131238405Sjkim	pxor	%xmm5,%xmm6
1132238405Sjkim	movdqa	(%ecx),%xmm5
1133238405Sjkim	leal	32(%esi),%esi
1134238405Sjkim	subl	$32,%ebx
1135238405Sjkim	ja	.L015mod_loop
1136238405Sjkim.L014even_tail:
1137238405Sjkim	movdqa	%xmm0,%xmm1
1138238405Sjkim	pshufd	$78,%xmm0,%xmm3
1139238405Sjkim	pshufd	$78,%xmm2,%xmm4
1140238405Sjkim	pxor	%xmm0,%xmm3
1141238405Sjkim	pxor	%xmm2,%xmm4
1142238405Sjkim.byte	102,15,58,68,194,0
1143238405Sjkim.byte	102,15,58,68,202,17
1144238405Sjkim.byte	102,15,58,68,220,0
1145238405Sjkim	xorps	%xmm0,%xmm3
1146238405Sjkim	xorps	%xmm1,%xmm3
1147238405Sjkim	movdqa	%xmm3,%xmm4
1148238405Sjkim	psrldq	$8,%xmm3
1149238405Sjkim	pslldq	$8,%xmm4
1150238405Sjkim	pxor	%xmm3,%xmm1
1151238405Sjkim	pxor	%xmm4,%xmm0
1152238405Sjkim	pxor	%xmm6,%xmm0
1153238405Sjkim	pxor	%xmm7,%xmm1
1154238405Sjkim	movdqa	%xmm0,%xmm3
1155238405Sjkim	psllq	$1,%xmm0
1156238405Sjkim	pxor	%xmm3,%xmm0
1157238405Sjkim	psllq	$5,%xmm0
1158238405Sjkim	pxor	%xmm3,%xmm0
1159238405Sjkim	psllq	$57,%xmm0
1160238405Sjkim	movdqa	%xmm0,%xmm4
1161238405Sjkim	pslldq	$8,%xmm0
1162238405Sjkim	psrldq	$8,%xmm4
1163238405Sjkim	pxor	%xmm3,%xmm0
1164238405Sjkim	pxor	%xmm4,%xmm1
1165238405Sjkim	movdqa	%xmm0,%xmm4
1166238405Sjkim	psrlq	$5,%xmm0
1167238405Sjkim	pxor	%xmm4,%xmm0
1168238405Sjkim	psrlq	$1,%xmm0
1169238405Sjkim	pxor	%xmm4,%xmm0
1170238405Sjkim	pxor	%xmm1,%xmm4
1171238405Sjkim	psrlq	$1,%xmm0
1172238405Sjkim	pxor	%xmm4,%xmm0
1173238405Sjkim	testl	%ebx,%ebx
1174238405Sjkim	jnz	.L016done
1175238405Sjkim	movups	(%edx),%xmm2
1176238405Sjkim.L013odd_tail:
1177238405Sjkim	movdqu	(%esi),%xmm3
1178238405Sjkim.byte	102,15,56,0,221
1179238405Sjkim	pxor	%xmm3,%xmm0
1180238405Sjkim	movdqa	%xmm0,%xmm1
1181238405Sjkim	pshufd	$78,%xmm0,%xmm3
1182238405Sjkim	pshufd	$78,%xmm2,%xmm4
1183238405Sjkim	pxor	%xmm0,%xmm3
1184238405Sjkim	pxor	%xmm2,%xmm4
1185238405Sjkim.byte	102,15,58,68,194,0
1186238405Sjkim.byte	102,15,58,68,202,17
1187238405Sjkim.byte	102,15,58,68,220,0
1188238405Sjkim	xorps	%xmm0,%xmm3
1189238405Sjkim	xorps	%xmm1,%xmm3
1190238405Sjkim	movdqa	%xmm3,%xmm4
1191238405Sjkim	psrldq	$8,%xmm3
1192238405Sjkim	pslldq	$8,%xmm4
1193238405Sjkim	pxor	%xmm3,%xmm1
1194238405Sjkim	pxor	%xmm4,%xmm0
1195238405Sjkim	movdqa	%xmm0,%xmm3
1196238405Sjkim	psllq	$1,%xmm0
1197238405Sjkim	pxor	%xmm3,%xmm0
1198238405Sjkim	psllq	$5,%xmm0
1199238405Sjkim	pxor	%xmm3,%xmm0
1200238405Sjkim	psllq	$57,%xmm0
1201238405Sjkim	movdqa	%xmm0,%xmm4
1202238405Sjkim	pslldq	$8,%xmm0
1203238405Sjkim	psrldq	$8,%xmm4
1204238405Sjkim	pxor	%xmm3,%xmm0
1205238405Sjkim	pxor	%xmm4,%xmm1
1206238405Sjkim	movdqa	%xmm0,%xmm4
1207238405Sjkim	psrlq	$5,%xmm0
1208238405Sjkim	pxor	%xmm4,%xmm0
1209238405Sjkim	psrlq	$1,%xmm0
1210238405Sjkim	pxor	%xmm4,%xmm0
1211238405Sjkim	pxor	%xmm1,%xmm4
1212238405Sjkim	psrlq	$1,%xmm0
1213238405Sjkim	pxor	%xmm4,%xmm0
1214238405Sjkim.L016done:
1215238405Sjkim.byte	102,15,56,0,197
1216238405Sjkim	movdqu	%xmm0,(%eax)
1217238405Sjkim	popl	%edi
1218238405Sjkim	popl	%esi
1219238405Sjkim	popl	%ebx
1220238405Sjkim	popl	%ebp
1221238405Sjkim	ret
1222238405Sjkim.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1223238405Sjkim.align	64
1224238405Sjkim.Lbswap:
1225238405Sjkim.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1226238405Sjkim.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1227238405Sjkim.align	64
1228238405Sjkim.Lrem_4bit:
1229238405Sjkim.long	0,0,0,471859200,0,943718400,0,610271232
1230238405Sjkim.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1231238405Sjkim.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1232238405Sjkim.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1233238405Sjkim.align	64
1234238405Sjkim.Lrem_8bit:
1235238405Sjkim.value	0,450,900,582,1800,1738,1164,1358
1236238405Sjkim.value	3600,4050,3476,3158,2328,2266,2716,2910
1237238405Sjkim.value	7200,7650,8100,7782,6952,6890,6316,6510
1238238405Sjkim.value	4656,5106,4532,4214,5432,5370,5820,6014
1239238405Sjkim.value	14400,14722,15300,14854,16200,16010,15564,15630
1240238405Sjkim.value	13904,14226,13780,13334,12632,12442,13020,13086
1241238405Sjkim.value	9312,9634,10212,9766,9064,8874,8428,8494
1242238405Sjkim.value	10864,11186,10740,10294,11640,11450,12028,12094
1243238405Sjkim.value	28800,28994,29444,29382,30600,30282,29708,30158
1244238405Sjkim.value	32400,32594,32020,31958,31128,30810,31260,31710
1245238405Sjkim.value	27808,28002,28452,28390,27560,27242,26668,27118
1246238405Sjkim.value	25264,25458,24884,24822,26040,25722,26172,26622
1247238405Sjkim.value	18624,18690,19268,19078,20424,19978,19532,19854
1248238405Sjkim.value	18128,18194,17748,17558,16856,16410,16988,17310
1249238405Sjkim.value	21728,21794,22372,22182,21480,21034,20588,20910
1250238405Sjkim.value	23280,23346,22900,22710,24056,23610,24188,24510
1251238405Sjkim.value	57600,57538,57988,58182,58888,59338,58764,58446
1252238405Sjkim.value	61200,61138,60564,60758,59416,59866,60316,59998
1253238405Sjkim.value	64800,64738,65188,65382,64040,64490,63916,63598
1254238405Sjkim.value	62256,62194,61620,61814,62520,62970,63420,63102
1255238405Sjkim.value	55616,55426,56004,56070,56904,57226,56780,56334
1256238405Sjkim.value	55120,54930,54484,54550,53336,53658,54236,53790
1257238405Sjkim.value	50528,50338,50916,50982,49768,50090,49644,49198
1258238405Sjkim.value	52080,51890,51444,51510,52344,52666,53244,52798
1259238405Sjkim.value	37248,36930,37380,37830,38536,38730,38156,38094
1260238405Sjkim.value	40848,40530,39956,40406,39064,39258,39708,39646
1261238405Sjkim.value	36256,35938,36388,36838,35496,35690,35116,35054
1262238405Sjkim.value	33712,33394,32820,33270,33976,34170,34620,34558
1263238405Sjkim.value	43456,43010,43588,43910,44744,44810,44364,44174
1264238405Sjkim.value	42960,42514,42068,42390,41176,41242,41820,41630
1265238405Sjkim.value	46560,46114,46692,47014,45800,45866,45420,45230
1266238405Sjkim.value	48112,47666,47220,47542,48376,48442,49020,48830
1267238405Sjkim.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1268238405Sjkim.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1269238405Sjkim.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1270238405Sjkim.byte	0
1271