aesni-x86.S revision 1.10
1#include <machine/asm.h>
2.text
3.globl	aesni_encrypt
4.type	aesni_encrypt,@function
5.align	16
6aesni_encrypt:
7.L_aesni_encrypt_begin:
8	movl	4(%esp),%eax
9	movl	12(%esp),%edx
10	movups	(%eax),%xmm2
11	movl	240(%edx),%ecx
12	movl	8(%esp),%eax
13	movups	(%edx),%xmm0
14	movups	16(%edx),%xmm1
15	leal	32(%edx),%edx
16	xorps	%xmm0,%xmm2
17.L000enc1_loop_1:
18.byte	102,15,56,220,209
19	decl	%ecx
20	movups	(%edx),%xmm1
21	leal	16(%edx),%edx
22	jnz	.L000enc1_loop_1
23.byte	102,15,56,221,209
24	pxor	%xmm0,%xmm0
25	pxor	%xmm1,%xmm1
26	movups	%xmm2,(%eax)
27	pxor	%xmm2,%xmm2
28	ret
29.size	aesni_encrypt,.-.L_aesni_encrypt_begin
30.globl	aesni_decrypt
31.type	aesni_decrypt,@function
32.align	16
33aesni_decrypt:
34.L_aesni_decrypt_begin:
35	movl	4(%esp),%eax
36	movl	12(%esp),%edx
37	movups	(%eax),%xmm2
38	movl	240(%edx),%ecx
39	movl	8(%esp),%eax
40	movups	(%edx),%xmm0
41	movups	16(%edx),%xmm1
42	leal	32(%edx),%edx
43	xorps	%xmm0,%xmm2
44.L001dec1_loop_2:
45.byte	102,15,56,222,209
46	decl	%ecx
47	movups	(%edx),%xmm1
48	leal	16(%edx),%edx
49	jnz	.L001dec1_loop_2
50.byte	102,15,56,223,209
51	pxor	%xmm0,%xmm0
52	pxor	%xmm1,%xmm1
53	movups	%xmm2,(%eax)
54	pxor	%xmm2,%xmm2
55	ret
56.size	aesni_decrypt,.-.L_aesni_decrypt_begin
57.type	_aesni_encrypt2,@function
58.align	16
59_aesni_encrypt2:
60	movups	(%edx),%xmm0
61	shll	$4,%ecx
62	movups	16(%edx),%xmm1
63	xorps	%xmm0,%xmm2
64	pxor	%xmm0,%xmm3
65	movups	32(%edx),%xmm0
66	leal	32(%edx,%ecx,1),%edx
67	negl	%ecx
68	addl	$16,%ecx
69.L002enc2_loop:
70.byte	102,15,56,220,209
71.byte	102,15,56,220,217
72	movups	(%edx,%ecx,1),%xmm1
73	addl	$32,%ecx
74.byte	102,15,56,220,208
75.byte	102,15,56,220,216
76	movups	-16(%edx,%ecx,1),%xmm0
77	jnz	.L002enc2_loop
78.byte	102,15,56,220,209
79.byte	102,15,56,220,217
80.byte	102,15,56,221,208
81.byte	102,15,56,221,216
82	ret
83.size	_aesni_encrypt2,.-_aesni_encrypt2
84.type	_aesni_decrypt2,@function
85.align	16
86_aesni_decrypt2:
87	movups	(%edx),%xmm0
88	shll	$4,%ecx
89	movups	16(%edx),%xmm1
90	xorps	%xmm0,%xmm2
91	pxor	%xmm0,%xmm3
92	movups	32(%edx),%xmm0
93	leal	32(%edx,%ecx,1),%edx
94	negl	%ecx
95	addl	$16,%ecx
96.L003dec2_loop:
97.byte	102,15,56,222,209
98.byte	102,15,56,222,217
99	movups	(%edx,%ecx,1),%xmm1
100	addl	$32,%ecx
101.byte	102,15,56,222,208
102.byte	102,15,56,222,216
103	movups	-16(%edx,%ecx,1),%xmm0
104	jnz	.L003dec2_loop
105.byte	102,15,56,222,209
106.byte	102,15,56,222,217
107.byte	102,15,56,223,208
108.byte	102,15,56,223,216
109	ret
110.size	_aesni_decrypt2,.-_aesni_decrypt2
111.type	_aesni_encrypt3,@function
112.align	16
113_aesni_encrypt3:
114	movups	(%edx),%xmm0
115	shll	$4,%ecx
116	movups	16(%edx),%xmm1
117	xorps	%xmm0,%xmm2
118	pxor	%xmm0,%xmm3
119	pxor	%xmm0,%xmm4
120	movups	32(%edx),%xmm0
121	leal	32(%edx,%ecx,1),%edx
122	negl	%ecx
123	addl	$16,%ecx
124.L004enc3_loop:
125.byte	102,15,56,220,209
126.byte	102,15,56,220,217
127.byte	102,15,56,220,225
128	movups	(%edx,%ecx,1),%xmm1
129	addl	$32,%ecx
130.byte	102,15,56,220,208
131.byte	102,15,56,220,216
132.byte	102,15,56,220,224
133	movups	-16(%edx,%ecx,1),%xmm0
134	jnz	.L004enc3_loop
135.byte	102,15,56,220,209
136.byte	102,15,56,220,217
137.byte	102,15,56,220,225
138.byte	102,15,56,221,208
139.byte	102,15,56,221,216
140.byte	102,15,56,221,224
141	ret
142.size	_aesni_encrypt3,.-_aesni_encrypt3
143.type	_aesni_decrypt3,@function
144.align	16
145_aesni_decrypt3:
146	movups	(%edx),%xmm0
147	shll	$4,%ecx
148	movups	16(%edx),%xmm1
149	xorps	%xmm0,%xmm2
150	pxor	%xmm0,%xmm3
151	pxor	%xmm0,%xmm4
152	movups	32(%edx),%xmm0
153	leal	32(%edx,%ecx,1),%edx
154	negl	%ecx
155	addl	$16,%ecx
156.L005dec3_loop:
157.byte	102,15,56,222,209
158.byte	102,15,56,222,217
159.byte	102,15,56,222,225
160	movups	(%edx,%ecx,1),%xmm1
161	addl	$32,%ecx
162.byte	102,15,56,222,208
163.byte	102,15,56,222,216
164.byte	102,15,56,222,224
165	movups	-16(%edx,%ecx,1),%xmm0
166	jnz	.L005dec3_loop
167.byte	102,15,56,222,209
168.byte	102,15,56,222,217
169.byte	102,15,56,222,225
170.byte	102,15,56,223,208
171.byte	102,15,56,223,216
172.byte	102,15,56,223,224
173	ret
174.size	_aesni_decrypt3,.-_aesni_decrypt3
175.type	_aesni_encrypt4,@function
176.align	16
177_aesni_encrypt4:
178	movups	(%edx),%xmm0
179	movups	16(%edx),%xmm1
180	shll	$4,%ecx
181	xorps	%xmm0,%xmm2
182	pxor	%xmm0,%xmm3
183	pxor	%xmm0,%xmm4
184	pxor	%xmm0,%xmm5
185	movups	32(%edx),%xmm0
186	leal	32(%edx,%ecx,1),%edx
187	negl	%ecx
188.byte	15,31,64,0
189	addl	$16,%ecx
190.L006enc4_loop:
191.byte	102,15,56,220,209
192.byte	102,15,56,220,217
193.byte	102,15,56,220,225
194.byte	102,15,56,220,233
195	movups	(%edx,%ecx,1),%xmm1
196	addl	$32,%ecx
197.byte	102,15,56,220,208
198.byte	102,15,56,220,216
199.byte	102,15,56,220,224
200.byte	102,15,56,220,232
201	movups	-16(%edx,%ecx,1),%xmm0
202	jnz	.L006enc4_loop
203.byte	102,15,56,220,209
204.byte	102,15,56,220,217
205.byte	102,15,56,220,225
206.byte	102,15,56,220,233
207.byte	102,15,56,221,208
208.byte	102,15,56,221,216
209.byte	102,15,56,221,224
210.byte	102,15,56,221,232
211	ret
212.size	_aesni_encrypt4,.-_aesni_encrypt4
213.type	_aesni_decrypt4,@function
214.align	16
215_aesni_decrypt4:
216	movups	(%edx),%xmm0
217	movups	16(%edx),%xmm1
218	shll	$4,%ecx
219	xorps	%xmm0,%xmm2
220	pxor	%xmm0,%xmm3
221	pxor	%xmm0,%xmm4
222	pxor	%xmm0,%xmm5
223	movups	32(%edx),%xmm0
224	leal	32(%edx,%ecx,1),%edx
225	negl	%ecx
226.byte	15,31,64,0
227	addl	$16,%ecx
228.L007dec4_loop:
229.byte	102,15,56,222,209
230.byte	102,15,56,222,217
231.byte	102,15,56,222,225
232.byte	102,15,56,222,233
233	movups	(%edx,%ecx,1),%xmm1
234	addl	$32,%ecx
235.byte	102,15,56,222,208
236.byte	102,15,56,222,216
237.byte	102,15,56,222,224
238.byte	102,15,56,222,232
239	movups	-16(%edx,%ecx,1),%xmm0
240	jnz	.L007dec4_loop
241.byte	102,15,56,222,209
242.byte	102,15,56,222,217
243.byte	102,15,56,222,225
244.byte	102,15,56,222,233
245.byte	102,15,56,223,208
246.byte	102,15,56,223,216
247.byte	102,15,56,223,224
248.byte	102,15,56,223,232
249	ret
250.size	_aesni_decrypt4,.-_aesni_decrypt4
251.type	_aesni_encrypt6,@function
252.align	16
253_aesni_encrypt6:
254	movups	(%edx),%xmm0
255	shll	$4,%ecx
256	movups	16(%edx),%xmm1
257	xorps	%xmm0,%xmm2
258	pxor	%xmm0,%xmm3
259	pxor	%xmm0,%xmm4
260.byte	102,15,56,220,209
261	pxor	%xmm0,%xmm5
262	pxor	%xmm0,%xmm6
263.byte	102,15,56,220,217
264	leal	32(%edx,%ecx,1),%edx
265	negl	%ecx
266.byte	102,15,56,220,225
267	pxor	%xmm0,%xmm7
268	movups	(%edx,%ecx,1),%xmm0
269	addl	$16,%ecx
270	jmp	.L008_aesni_encrypt6_inner
271.align	16
272.L009enc6_loop:
273.byte	102,15,56,220,209
274.byte	102,15,56,220,217
275.byte	102,15,56,220,225
276.L008_aesni_encrypt6_inner:
277.byte	102,15,56,220,233
278.byte	102,15,56,220,241
279.byte	102,15,56,220,249
280.L_aesni_encrypt6_enter:
281	movups	(%edx,%ecx,1),%xmm1
282	addl	$32,%ecx
283.byte	102,15,56,220,208
284.byte	102,15,56,220,216
285.byte	102,15,56,220,224
286.byte	102,15,56,220,232
287.byte	102,15,56,220,240
288.byte	102,15,56,220,248
289	movups	-16(%edx,%ecx,1),%xmm0
290	jnz	.L009enc6_loop
291.byte	102,15,56,220,209
292.byte	102,15,56,220,217
293.byte	102,15,56,220,225
294.byte	102,15,56,220,233
295.byte	102,15,56,220,241
296.byte	102,15,56,220,249
297.byte	102,15,56,221,208
298.byte	102,15,56,221,216
299.byte	102,15,56,221,224
300.byte	102,15,56,221,232
301.byte	102,15,56,221,240
302.byte	102,15,56,221,248
303	ret
304.size	_aesni_encrypt6,.-_aesni_encrypt6
305.type	_aesni_decrypt6,@function
306.align	16
307_aesni_decrypt6:
308	movups	(%edx),%xmm0
309	shll	$4,%ecx
310	movups	16(%edx),%xmm1
311	xorps	%xmm0,%xmm2
312	pxor	%xmm0,%xmm3
313	pxor	%xmm0,%xmm4
314.byte	102,15,56,222,209
315	pxor	%xmm0,%xmm5
316	pxor	%xmm0,%xmm6
317.byte	102,15,56,222,217
318	leal	32(%edx,%ecx,1),%edx
319	negl	%ecx
320.byte	102,15,56,222,225
321	pxor	%xmm0,%xmm7
322	movups	(%edx,%ecx,1),%xmm0
323	addl	$16,%ecx
324	jmp	.L010_aesni_decrypt6_inner
325.align	16
326.L011dec6_loop:
327.byte	102,15,56,222,209
328.byte	102,15,56,222,217
329.byte	102,15,56,222,225
330.L010_aesni_decrypt6_inner:
331.byte	102,15,56,222,233
332.byte	102,15,56,222,241
333.byte	102,15,56,222,249
334.L_aesni_decrypt6_enter:
335	movups	(%edx,%ecx,1),%xmm1
336	addl	$32,%ecx
337.byte	102,15,56,222,208
338.byte	102,15,56,222,216
339.byte	102,15,56,222,224
340.byte	102,15,56,222,232
341.byte	102,15,56,222,240
342.byte	102,15,56,222,248
343	movups	-16(%edx,%ecx,1),%xmm0
344	jnz	.L011dec6_loop
345.byte	102,15,56,222,209
346.byte	102,15,56,222,217
347.byte	102,15,56,222,225
348.byte	102,15,56,222,233
349.byte	102,15,56,222,241
350.byte	102,15,56,222,249
351.byte	102,15,56,223,208
352.byte	102,15,56,223,216
353.byte	102,15,56,223,224
354.byte	102,15,56,223,232
355.byte	102,15,56,223,240
356.byte	102,15,56,223,248
357	ret
358.size	_aesni_decrypt6,.-_aesni_decrypt6
359.globl	aesni_ecb_encrypt
360.type	aesni_ecb_encrypt,@function
361.align	16
362aesni_ecb_encrypt:
363.L_aesni_ecb_encrypt_begin:
364	pushl	%ebp
365	pushl	%ebx
366	pushl	%esi
367	pushl	%edi
368	movl	20(%esp),%esi
369	movl	24(%esp),%edi
370	movl	28(%esp),%eax
371	movl	32(%esp),%edx
372	movl	36(%esp),%ebx
373	andl	$-16,%eax
374	jz	.L012ecb_ret
375	movl	240(%edx),%ecx
376	testl	%ebx,%ebx
377	jz	.L013ecb_decrypt
378	movl	%edx,%ebp
379	movl	%ecx,%ebx
380	cmpl	$96,%eax
381	jb	.L014ecb_enc_tail
382	movdqu	(%esi),%xmm2
383	movdqu	16(%esi),%xmm3
384	movdqu	32(%esi),%xmm4
385	movdqu	48(%esi),%xmm5
386	movdqu	64(%esi),%xmm6
387	movdqu	80(%esi),%xmm7
388	leal	96(%esi),%esi
389	subl	$96,%eax
390	jmp	.L015ecb_enc_loop6_enter
391.align	16
392.L016ecb_enc_loop6:
393	movups	%xmm2,(%edi)
394	movdqu	(%esi),%xmm2
395	movups	%xmm3,16(%edi)
396	movdqu	16(%esi),%xmm3
397	movups	%xmm4,32(%edi)
398	movdqu	32(%esi),%xmm4
399	movups	%xmm5,48(%edi)
400	movdqu	48(%esi),%xmm5
401	movups	%xmm6,64(%edi)
402	movdqu	64(%esi),%xmm6
403	movups	%xmm7,80(%edi)
404	leal	96(%edi),%edi
405	movdqu	80(%esi),%xmm7
406	leal	96(%esi),%esi
407.L015ecb_enc_loop6_enter:
408	call	_aesni_encrypt6
409	movl	%ebp,%edx
410	movl	%ebx,%ecx
411	subl	$96,%eax
412	jnc	.L016ecb_enc_loop6
413	movups	%xmm2,(%edi)
414	movups	%xmm3,16(%edi)
415	movups	%xmm4,32(%edi)
416	movups	%xmm5,48(%edi)
417	movups	%xmm6,64(%edi)
418	movups	%xmm7,80(%edi)
419	leal	96(%edi),%edi
420	addl	$96,%eax
421	jz	.L012ecb_ret
422.L014ecb_enc_tail:
423	movups	(%esi),%xmm2
424	cmpl	$32,%eax
425	jb	.L017ecb_enc_one
426	movups	16(%esi),%xmm3
427	je	.L018ecb_enc_two
428	movups	32(%esi),%xmm4
429	cmpl	$64,%eax
430	jb	.L019ecb_enc_three
431	movups	48(%esi),%xmm5
432	je	.L020ecb_enc_four
433	movups	64(%esi),%xmm6
434	xorps	%xmm7,%xmm7
435	call	_aesni_encrypt6
436	movups	%xmm2,(%edi)
437	movups	%xmm3,16(%edi)
438	movups	%xmm4,32(%edi)
439	movups	%xmm5,48(%edi)
440	movups	%xmm6,64(%edi)
441	jmp	.L012ecb_ret
442.align	16
443.L017ecb_enc_one:
444	movups	(%edx),%xmm0
445	movups	16(%edx),%xmm1
446	leal	32(%edx),%edx
447	xorps	%xmm0,%xmm2
448.L021enc1_loop_3:
449.byte	102,15,56,220,209
450	decl	%ecx
451	movups	(%edx),%xmm1
452	leal	16(%edx),%edx
453	jnz	.L021enc1_loop_3
454.byte	102,15,56,221,209
455	movups	%xmm2,(%edi)
456	jmp	.L012ecb_ret
457.align	16
458.L018ecb_enc_two:
459	call	_aesni_encrypt2
460	movups	%xmm2,(%edi)
461	movups	%xmm3,16(%edi)
462	jmp	.L012ecb_ret
463.align	16
464.L019ecb_enc_three:
465	call	_aesni_encrypt3
466	movups	%xmm2,(%edi)
467	movups	%xmm3,16(%edi)
468	movups	%xmm4,32(%edi)
469	jmp	.L012ecb_ret
470.align	16
471.L020ecb_enc_four:
472	call	_aesni_encrypt4
473	movups	%xmm2,(%edi)
474	movups	%xmm3,16(%edi)
475	movups	%xmm4,32(%edi)
476	movups	%xmm5,48(%edi)
477	jmp	.L012ecb_ret
478.align	16
479.L013ecb_decrypt:
480	movl	%edx,%ebp
481	movl	%ecx,%ebx
482	cmpl	$96,%eax
483	jb	.L022ecb_dec_tail
484	movdqu	(%esi),%xmm2
485	movdqu	16(%esi),%xmm3
486	movdqu	32(%esi),%xmm4
487	movdqu	48(%esi),%xmm5
488	movdqu	64(%esi),%xmm6
489	movdqu	80(%esi),%xmm7
490	leal	96(%esi),%esi
491	subl	$96,%eax
492	jmp	.L023ecb_dec_loop6_enter
493.align	16
494.L024ecb_dec_loop6:
495	movups	%xmm2,(%edi)
496	movdqu	(%esi),%xmm2
497	movups	%xmm3,16(%edi)
498	movdqu	16(%esi),%xmm3
499	movups	%xmm4,32(%edi)
500	movdqu	32(%esi),%xmm4
501	movups	%xmm5,48(%edi)
502	movdqu	48(%esi),%xmm5
503	movups	%xmm6,64(%edi)
504	movdqu	64(%esi),%xmm6
505	movups	%xmm7,80(%edi)
506	leal	96(%edi),%edi
507	movdqu	80(%esi),%xmm7
508	leal	96(%esi),%esi
509.L023ecb_dec_loop6_enter:
510	call	_aesni_decrypt6
511	movl	%ebp,%edx
512	movl	%ebx,%ecx
513	subl	$96,%eax
514	jnc	.L024ecb_dec_loop6
515	movups	%xmm2,(%edi)
516	movups	%xmm3,16(%edi)
517	movups	%xmm4,32(%edi)
518	movups	%xmm5,48(%edi)
519	movups	%xmm6,64(%edi)
520	movups	%xmm7,80(%edi)
521	leal	96(%edi),%edi
522	addl	$96,%eax
523	jz	.L012ecb_ret
524.L022ecb_dec_tail:
525	movups	(%esi),%xmm2
526	cmpl	$32,%eax
527	jb	.L025ecb_dec_one
528	movups	16(%esi),%xmm3
529	je	.L026ecb_dec_two
530	movups	32(%esi),%xmm4
531	cmpl	$64,%eax
532	jb	.L027ecb_dec_three
533	movups	48(%esi),%xmm5
534	je	.L028ecb_dec_four
535	movups	64(%esi),%xmm6
536	xorps	%xmm7,%xmm7
537	call	_aesni_decrypt6
538	movups	%xmm2,(%edi)
539	movups	%xmm3,16(%edi)
540	movups	%xmm4,32(%edi)
541	movups	%xmm5,48(%edi)
542	movups	%xmm6,64(%edi)
543	jmp	.L012ecb_ret
544.align	16
545.L025ecb_dec_one:
546	movups	(%edx),%xmm0
547	movups	16(%edx),%xmm1
548	leal	32(%edx),%edx
549	xorps	%xmm0,%xmm2
550.L029dec1_loop_4:
551.byte	102,15,56,222,209
552	decl	%ecx
553	movups	(%edx),%xmm1
554	leal	16(%edx),%edx
555	jnz	.L029dec1_loop_4
556.byte	102,15,56,223,209
557	movups	%xmm2,(%edi)
558	jmp	.L012ecb_ret
559.align	16
560.L026ecb_dec_two:
561	call	_aesni_decrypt2
562	movups	%xmm2,(%edi)
563	movups	%xmm3,16(%edi)
564	jmp	.L012ecb_ret
565.align	16
566.L027ecb_dec_three:
567	call	_aesni_decrypt3
568	movups	%xmm2,(%edi)
569	movups	%xmm3,16(%edi)
570	movups	%xmm4,32(%edi)
571	jmp	.L012ecb_ret
572.align	16
573.L028ecb_dec_four:
574	call	_aesni_decrypt4
575	movups	%xmm2,(%edi)
576	movups	%xmm3,16(%edi)
577	movups	%xmm4,32(%edi)
578	movups	%xmm5,48(%edi)
579.L012ecb_ret:
580	pxor	%xmm0,%xmm0
581	pxor	%xmm1,%xmm1
582	pxor	%xmm2,%xmm2
583	pxor	%xmm3,%xmm3
584	pxor	%xmm4,%xmm4
585	pxor	%xmm5,%xmm5
586	pxor	%xmm6,%xmm6
587	pxor	%xmm7,%xmm7
588	popl	%edi
589	popl	%esi
590	popl	%ebx
591	popl	%ebp
592	ret
593.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
594.globl	aesni_ccm64_encrypt_blocks
595.type	aesni_ccm64_encrypt_blocks,@function
596.align	16
597aesni_ccm64_encrypt_blocks:
598.L_aesni_ccm64_encrypt_blocks_begin:
599	pushl	%ebp
600	pushl	%ebx
601	pushl	%esi
602	pushl	%edi
603	movl	20(%esp),%esi
604	movl	24(%esp),%edi
605	movl	28(%esp),%eax
606	movl	32(%esp),%edx
607	movl	36(%esp),%ebx
608	movl	40(%esp),%ecx
609	movl	%esp,%ebp
610	subl	$60,%esp
611	andl	$-16,%esp
612	movl	%ebp,48(%esp)
613	movdqu	(%ebx),%xmm7
614	movdqu	(%ecx),%xmm3
615	movl	240(%edx),%ecx
616	movl	$202182159,(%esp)
617	movl	$134810123,4(%esp)
618	movl	$67438087,8(%esp)
619	movl	$66051,12(%esp)
620	movl	$1,%ebx
621	xorl	%ebp,%ebp
622	movl	%ebx,16(%esp)
623	movl	%ebp,20(%esp)
624	movl	%ebp,24(%esp)
625	movl	%ebp,28(%esp)
626	shll	$4,%ecx
627	movl	$16,%ebx
628	leal	(%edx),%ebp
629	movdqa	(%esp),%xmm5
630	movdqa	%xmm7,%xmm2
631	leal	32(%edx,%ecx,1),%edx
632	subl	%ecx,%ebx
633.byte	102,15,56,0,253
634.L030ccm64_enc_outer:
635	movups	(%ebp),%xmm0
636	movl	%ebx,%ecx
637	movups	(%esi),%xmm6
638	xorps	%xmm0,%xmm2
639	movups	16(%ebp),%xmm1
640	xorps	%xmm6,%xmm0
641	xorps	%xmm0,%xmm3
642	movups	32(%ebp),%xmm0
643.L031ccm64_enc2_loop:
644.byte	102,15,56,220,209
645.byte	102,15,56,220,217
646	movups	(%edx,%ecx,1),%xmm1
647	addl	$32,%ecx
648.byte	102,15,56,220,208
649.byte	102,15,56,220,216
650	movups	-16(%edx,%ecx,1),%xmm0
651	jnz	.L031ccm64_enc2_loop
652.byte	102,15,56,220,209
653.byte	102,15,56,220,217
654	paddq	16(%esp),%xmm7
655	decl	%eax
656.byte	102,15,56,221,208
657.byte	102,15,56,221,216
658	leal	16(%esi),%esi
659	xorps	%xmm2,%xmm6
660	movdqa	%xmm7,%xmm2
661	movups	%xmm6,(%edi)
662.byte	102,15,56,0,213
663	leal	16(%edi),%edi
664	jnz	.L030ccm64_enc_outer
665	movl	48(%esp),%esp
666	movl	40(%esp),%edi
667	movups	%xmm3,(%edi)
668	pxor	%xmm0,%xmm0
669	pxor	%xmm1,%xmm1
670	pxor	%xmm2,%xmm2
671	pxor	%xmm3,%xmm3
672	pxor	%xmm4,%xmm4
673	pxor	%xmm5,%xmm5
674	pxor	%xmm6,%xmm6
675	pxor	%xmm7,%xmm7
676	popl	%edi
677	popl	%esi
678	popl	%ebx
679	popl	%ebp
680	ret
681.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
682.globl	aesni_ccm64_decrypt_blocks
683.type	aesni_ccm64_decrypt_blocks,@function
684.align	16
685aesni_ccm64_decrypt_blocks:
686.L_aesni_ccm64_decrypt_blocks_begin:
687	pushl	%ebp
688	pushl	%ebx
689	pushl	%esi
690	pushl	%edi
691	movl	20(%esp),%esi
692	movl	24(%esp),%edi
693	movl	28(%esp),%eax
694	movl	32(%esp),%edx
695	movl	36(%esp),%ebx
696	movl	40(%esp),%ecx
697	movl	%esp,%ebp
698	subl	$60,%esp
699	andl	$-16,%esp
700	movl	%ebp,48(%esp)
701	movdqu	(%ebx),%xmm7
702	movdqu	(%ecx),%xmm3
703	movl	240(%edx),%ecx
704	movl	$202182159,(%esp)
705	movl	$134810123,4(%esp)
706	movl	$67438087,8(%esp)
707	movl	$66051,12(%esp)
708	movl	$1,%ebx
709	xorl	%ebp,%ebp
710	movl	%ebx,16(%esp)
711	movl	%ebp,20(%esp)
712	movl	%ebp,24(%esp)
713	movl	%ebp,28(%esp)
714	movdqa	(%esp),%xmm5
715	movdqa	%xmm7,%xmm2
716	movl	%edx,%ebp
717	movl	%ecx,%ebx
718.byte	102,15,56,0,253
719	movups	(%edx),%xmm0
720	movups	16(%edx),%xmm1
721	leal	32(%edx),%edx
722	xorps	%xmm0,%xmm2
723.L032enc1_loop_5:
724.byte	102,15,56,220,209
725	decl	%ecx
726	movups	(%edx),%xmm1
727	leal	16(%edx),%edx
728	jnz	.L032enc1_loop_5
729.byte	102,15,56,221,209
730	shll	$4,%ebx
731	movl	$16,%ecx
732	movups	(%esi),%xmm6
733	paddq	16(%esp),%xmm7
734	leal	16(%esi),%esi
735	subl	%ebx,%ecx
736	leal	32(%ebp,%ebx,1),%edx
737	movl	%ecx,%ebx
738	jmp	.L033ccm64_dec_outer
739.align	16
740.L033ccm64_dec_outer:
741	xorps	%xmm2,%xmm6
742	movdqa	%xmm7,%xmm2
743	movups	%xmm6,(%edi)
744	leal	16(%edi),%edi
745.byte	102,15,56,0,213
746	subl	$1,%eax
747	jz	.L034ccm64_dec_break
748	movups	(%ebp),%xmm0
749	movl	%ebx,%ecx
750	movups	16(%ebp),%xmm1
751	xorps	%xmm0,%xmm6
752	xorps	%xmm0,%xmm2
753	xorps	%xmm6,%xmm3
754	movups	32(%ebp),%xmm0
755.L035ccm64_dec2_loop:
756.byte	102,15,56,220,209
757.byte	102,15,56,220,217
758	movups	(%edx,%ecx,1),%xmm1
759	addl	$32,%ecx
760.byte	102,15,56,220,208
761.byte	102,15,56,220,216
762	movups	-16(%edx,%ecx,1),%xmm0
763	jnz	.L035ccm64_dec2_loop
764	movups	(%esi),%xmm6
765	paddq	16(%esp),%xmm7
766.byte	102,15,56,220,209
767.byte	102,15,56,220,217
768.byte	102,15,56,221,208
769.byte	102,15,56,221,216
770	leal	16(%esi),%esi
771	jmp	.L033ccm64_dec_outer
772.align	16
773.L034ccm64_dec_break:
774	movl	240(%ebp),%ecx
775	movl	%ebp,%edx
776	movups	(%edx),%xmm0
777	movups	16(%edx),%xmm1
778	xorps	%xmm0,%xmm6
779	leal	32(%edx),%edx
780	xorps	%xmm6,%xmm3
781.L036enc1_loop_6:
782.byte	102,15,56,220,217
783	decl	%ecx
784	movups	(%edx),%xmm1
785	leal	16(%edx),%edx
786	jnz	.L036enc1_loop_6
787.byte	102,15,56,221,217
788	movl	48(%esp),%esp
789	movl	40(%esp),%edi
790	movups	%xmm3,(%edi)
791	pxor	%xmm0,%xmm0
792	pxor	%xmm1,%xmm1
793	pxor	%xmm2,%xmm2
794	pxor	%xmm3,%xmm3
795	pxor	%xmm4,%xmm4
796	pxor	%xmm5,%xmm5
797	pxor	%xmm6,%xmm6
798	pxor	%xmm7,%xmm7
799	popl	%edi
800	popl	%esi
801	popl	%ebx
802	popl	%ebp
803	ret
804.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
805.globl	aesni_ctr32_encrypt_blocks
806.type	aesni_ctr32_encrypt_blocks,@function
807.align	16
808aesni_ctr32_encrypt_blocks:
809.L_aesni_ctr32_encrypt_blocks_begin:
810	pushl	%ebp
811	pushl	%ebx
812	pushl	%esi
813	pushl	%edi
814	movl	20(%esp),%esi
815	movl	24(%esp),%edi
816	movl	28(%esp),%eax
817	movl	32(%esp),%edx
818	movl	36(%esp),%ebx
819	movl	%esp,%ebp
820	subl	$88,%esp
821	andl	$-16,%esp
822	movl	%ebp,80(%esp)
823	cmpl	$1,%eax
824	je	.L037ctr32_one_shortcut
825	movdqu	(%ebx),%xmm7
826	movl	$202182159,(%esp)
827	movl	$134810123,4(%esp)
828	movl	$67438087,8(%esp)
829	movl	$66051,12(%esp)
830	movl	$6,%ecx
831	xorl	%ebp,%ebp
832	movl	%ecx,16(%esp)
833	movl	%ecx,20(%esp)
834	movl	%ecx,24(%esp)
835	movl	%ebp,28(%esp)
836.byte	102,15,58,22,251,3
837.byte	102,15,58,34,253,3
838	movl	240(%edx),%ecx
839	bswap	%ebx
840	pxor	%xmm0,%xmm0
841	pxor	%xmm1,%xmm1
842	movdqa	(%esp),%xmm2
843.byte	102,15,58,34,195,0
844	leal	3(%ebx),%ebp
845.byte	102,15,58,34,205,0
846	incl	%ebx
847.byte	102,15,58,34,195,1
848	incl	%ebp
849.byte	102,15,58,34,205,1
850	incl	%ebx
851.byte	102,15,58,34,195,2
852	incl	%ebp
853.byte	102,15,58,34,205,2
854	movdqa	%xmm0,48(%esp)
855.byte	102,15,56,0,194
856	movdqu	(%edx),%xmm6
857	movdqa	%xmm1,64(%esp)
858.byte	102,15,56,0,202
859	pshufd	$192,%xmm0,%xmm2
860	pshufd	$128,%xmm0,%xmm3
861	cmpl	$6,%eax
862	jb	.L038ctr32_tail
863	pxor	%xmm6,%xmm7
864	shll	$4,%ecx
865	movl	$16,%ebx
866	movdqa	%xmm7,32(%esp)
867	movl	%edx,%ebp
868	subl	%ecx,%ebx
869	leal	32(%edx,%ecx,1),%edx
870	subl	$6,%eax
871	jmp	.L039ctr32_loop6
872.align	16
873.L039ctr32_loop6:
874	pshufd	$64,%xmm0,%xmm4
875	movdqa	32(%esp),%xmm0
876	pshufd	$192,%xmm1,%xmm5
877	pxor	%xmm0,%xmm2
878	pshufd	$128,%xmm1,%xmm6
879	pxor	%xmm0,%xmm3
880	pshufd	$64,%xmm1,%xmm7
881	movups	16(%ebp),%xmm1
882	pxor	%xmm0,%xmm4
883	pxor	%xmm0,%xmm5
884.byte	102,15,56,220,209
885	pxor	%xmm0,%xmm6
886	pxor	%xmm0,%xmm7
887.byte	102,15,56,220,217
888	movups	32(%ebp),%xmm0
889	movl	%ebx,%ecx
890.byte	102,15,56,220,225
891.byte	102,15,56,220,233
892.byte	102,15,56,220,241
893.byte	102,15,56,220,249
894	call	.L_aesni_encrypt6_enter
895	movups	(%esi),%xmm1
896	movups	16(%esi),%xmm0
897	xorps	%xmm1,%xmm2
898	movups	32(%esi),%xmm1
899	xorps	%xmm0,%xmm3
900	movups	%xmm2,(%edi)
901	movdqa	16(%esp),%xmm0
902	xorps	%xmm1,%xmm4
903	movdqa	64(%esp),%xmm1
904	movups	%xmm3,16(%edi)
905	movups	%xmm4,32(%edi)
906	paddd	%xmm0,%xmm1
907	paddd	48(%esp),%xmm0
908	movdqa	(%esp),%xmm2
909	movups	48(%esi),%xmm3
910	movups	64(%esi),%xmm4
911	xorps	%xmm3,%xmm5
912	movups	80(%esi),%xmm3
913	leal	96(%esi),%esi
914	movdqa	%xmm0,48(%esp)
915.byte	102,15,56,0,194
916	xorps	%xmm4,%xmm6
917	movups	%xmm5,48(%edi)
918	xorps	%xmm3,%xmm7
919	movdqa	%xmm1,64(%esp)
920.byte	102,15,56,0,202
921	movups	%xmm6,64(%edi)
922	pshufd	$192,%xmm0,%xmm2
923	movups	%xmm7,80(%edi)
924	leal	96(%edi),%edi
925	pshufd	$128,%xmm0,%xmm3
926	subl	$6,%eax
927	jnc	.L039ctr32_loop6
928	addl	$6,%eax
929	jz	.L040ctr32_ret
930	movdqu	(%ebp),%xmm7
931	movl	%ebp,%edx
932	pxor	32(%esp),%xmm7
933	movl	240(%ebp),%ecx
934.L038ctr32_tail:
935	por	%xmm7,%xmm2
936	cmpl	$2,%eax
937	jb	.L041ctr32_one
938	pshufd	$64,%xmm0,%xmm4
939	por	%xmm7,%xmm3
940	je	.L042ctr32_two
941	pshufd	$192,%xmm1,%xmm5
942	por	%xmm7,%xmm4
943	cmpl	$4,%eax
944	jb	.L043ctr32_three
945	pshufd	$128,%xmm1,%xmm6
946	por	%xmm7,%xmm5
947	je	.L044ctr32_four
948	por	%xmm7,%xmm6
949	call	_aesni_encrypt6
950	movups	(%esi),%xmm1
951	movups	16(%esi),%xmm0
952	xorps	%xmm1,%xmm2
953	movups	32(%esi),%xmm1
954	xorps	%xmm0,%xmm3
955	movups	48(%esi),%xmm0
956	xorps	%xmm1,%xmm4
957	movups	64(%esi),%xmm1
958	xorps	%xmm0,%xmm5
959	movups	%xmm2,(%edi)
960	xorps	%xmm1,%xmm6
961	movups	%xmm3,16(%edi)
962	movups	%xmm4,32(%edi)
963	movups	%xmm5,48(%edi)
964	movups	%xmm6,64(%edi)
965	jmp	.L040ctr32_ret
966.align	16
967.L037ctr32_one_shortcut:
968	movups	(%ebx),%xmm2
969	movl	240(%edx),%ecx
970.L041ctr32_one:
971	movups	(%edx),%xmm0
972	movups	16(%edx),%xmm1
973	leal	32(%edx),%edx
974	xorps	%xmm0,%xmm2
975.L045enc1_loop_7:
976.byte	102,15,56,220,209
977	decl	%ecx
978	movups	(%edx),%xmm1
979	leal	16(%edx),%edx
980	jnz	.L045enc1_loop_7
981.byte	102,15,56,221,209
982	movups	(%esi),%xmm6
983	xorps	%xmm2,%xmm6
984	movups	%xmm6,(%edi)
985	jmp	.L040ctr32_ret
986.align	16
987.L042ctr32_two:
988	call	_aesni_encrypt2
989	movups	(%esi),%xmm5
990	movups	16(%esi),%xmm6
991	xorps	%xmm5,%xmm2
992	xorps	%xmm6,%xmm3
993	movups	%xmm2,(%edi)
994	movups	%xmm3,16(%edi)
995	jmp	.L040ctr32_ret
996.align	16
997.L043ctr32_three:
998	call	_aesni_encrypt3
999	movups	(%esi),%xmm5
1000	movups	16(%esi),%xmm6
1001	xorps	%xmm5,%xmm2
1002	movups	32(%esi),%xmm7
1003	xorps	%xmm6,%xmm3
1004	movups	%xmm2,(%edi)
1005	xorps	%xmm7,%xmm4
1006	movups	%xmm3,16(%edi)
1007	movups	%xmm4,32(%edi)
1008	jmp	.L040ctr32_ret
1009.align	16
1010.L044ctr32_four:
1011	call	_aesni_encrypt4
1012	movups	(%esi),%xmm6
1013	movups	16(%esi),%xmm7
1014	movups	32(%esi),%xmm1
1015	xorps	%xmm6,%xmm2
1016	movups	48(%esi),%xmm0
1017	xorps	%xmm7,%xmm3
1018	movups	%xmm2,(%edi)
1019	xorps	%xmm1,%xmm4
1020	movups	%xmm3,16(%edi)
1021	xorps	%xmm0,%xmm5
1022	movups	%xmm4,32(%edi)
1023	movups	%xmm5,48(%edi)
1024.L040ctr32_ret:
1025	pxor	%xmm0,%xmm0
1026	pxor	%xmm1,%xmm1
1027	pxor	%xmm2,%xmm2
1028	pxor	%xmm3,%xmm3
1029	pxor	%xmm4,%xmm4
1030	movdqa	%xmm0,32(%esp)
1031	pxor	%xmm5,%xmm5
1032	movdqa	%xmm0,48(%esp)
1033	pxor	%xmm6,%xmm6
1034	movdqa	%xmm0,64(%esp)
1035	pxor	%xmm7,%xmm7
1036	movl	80(%esp),%esp
1037	popl	%edi
1038	popl	%esi
1039	popl	%ebx
1040	popl	%ebp
1041	ret
1042.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
1043.globl	aesni_xts_encrypt
1044.type	aesni_xts_encrypt,@function
1045.align	16
1046aesni_xts_encrypt:
1047.L_aesni_xts_encrypt_begin:
1048	pushl	%ebp
1049	pushl	%ebx
1050	pushl	%esi
1051	pushl	%edi
1052	movl	36(%esp),%edx
1053	movl	40(%esp),%esi
1054	movl	240(%edx),%ecx
1055	movups	(%esi),%xmm2
1056	movups	(%edx),%xmm0
1057	movups	16(%edx),%xmm1
1058	leal	32(%edx),%edx
1059	xorps	%xmm0,%xmm2
1060.L046enc1_loop_8:
1061.byte	102,15,56,220,209
1062	decl	%ecx
1063	movups	(%edx),%xmm1
1064	leal	16(%edx),%edx
1065	jnz	.L046enc1_loop_8
1066.byte	102,15,56,221,209
1067	movl	20(%esp),%esi
1068	movl	24(%esp),%edi
1069	movl	28(%esp),%eax
1070	movl	32(%esp),%edx
1071	movl	%esp,%ebp
1072	subl	$120,%esp
1073	movl	240(%edx),%ecx
1074	andl	$-16,%esp
1075	movl	$135,96(%esp)
1076	movl	$0,100(%esp)
1077	movl	$1,104(%esp)
1078	movl	$0,108(%esp)
1079	movl	%eax,112(%esp)
1080	movl	%ebp,116(%esp)
1081	movdqa	%xmm2,%xmm1
1082	pxor	%xmm0,%xmm0
1083	movdqa	96(%esp),%xmm3
1084	pcmpgtd	%xmm1,%xmm0
1085	andl	$-16,%eax
1086	movl	%edx,%ebp
1087	movl	%ecx,%ebx
1088	subl	$96,%eax
1089	jc	.L047xts_enc_short
1090	shll	$4,%ecx
1091	movl	$16,%ebx
1092	subl	%ecx,%ebx
1093	leal	32(%edx,%ecx,1),%edx
1094	jmp	.L048xts_enc_loop6
1095.align	16
1096.L048xts_enc_loop6:
1097	pshufd	$19,%xmm0,%xmm2
1098	pxor	%xmm0,%xmm0
1099	movdqa	%xmm1,(%esp)
1100	paddq	%xmm1,%xmm1
1101	pand	%xmm3,%xmm2
1102	pcmpgtd	%xmm1,%xmm0
1103	pxor	%xmm2,%xmm1
1104	pshufd	$19,%xmm0,%xmm2
1105	pxor	%xmm0,%xmm0
1106	movdqa	%xmm1,16(%esp)
1107	paddq	%xmm1,%xmm1
1108	pand	%xmm3,%xmm2
1109	pcmpgtd	%xmm1,%xmm0
1110	pxor	%xmm2,%xmm1
1111	pshufd	$19,%xmm0,%xmm2
1112	pxor	%xmm0,%xmm0
1113	movdqa	%xmm1,32(%esp)
1114	paddq	%xmm1,%xmm1
1115	pand	%xmm3,%xmm2
1116	pcmpgtd	%xmm1,%xmm0
1117	pxor	%xmm2,%xmm1
1118	pshufd	$19,%xmm0,%xmm2
1119	pxor	%xmm0,%xmm0
1120	movdqa	%xmm1,48(%esp)
1121	paddq	%xmm1,%xmm1
1122	pand	%xmm3,%xmm2
1123	pcmpgtd	%xmm1,%xmm0
1124	pxor	%xmm2,%xmm1
1125	pshufd	$19,%xmm0,%xmm7
1126	movdqa	%xmm1,64(%esp)
1127	paddq	%xmm1,%xmm1
1128	movups	(%ebp),%xmm0
1129	pand	%xmm3,%xmm7
1130	movups	(%esi),%xmm2
1131	pxor	%xmm1,%xmm7
1132	movl	%ebx,%ecx
1133	movdqu	16(%esi),%xmm3
1134	xorps	%xmm0,%xmm2
1135	movdqu	32(%esi),%xmm4
1136	pxor	%xmm0,%xmm3
1137	movdqu	48(%esi),%xmm5
1138	pxor	%xmm0,%xmm4
1139	movdqu	64(%esi),%xmm6
1140	pxor	%xmm0,%xmm5
1141	movdqu	80(%esi),%xmm1
1142	pxor	%xmm0,%xmm6
1143	leal	96(%esi),%esi
1144	pxor	(%esp),%xmm2
1145	movdqa	%xmm7,80(%esp)
1146	pxor	%xmm1,%xmm7
1147	movups	16(%ebp),%xmm1
1148	pxor	16(%esp),%xmm3
1149	pxor	32(%esp),%xmm4
1150.byte	102,15,56,220,209
1151	pxor	48(%esp),%xmm5
1152	pxor	64(%esp),%xmm6
1153.byte	102,15,56,220,217
1154	pxor	%xmm0,%xmm7
1155	movups	32(%ebp),%xmm0
1156.byte	102,15,56,220,225
1157.byte	102,15,56,220,233
1158.byte	102,15,56,220,241
1159.byte	102,15,56,220,249
1160	call	.L_aesni_encrypt6_enter
1161	movdqa	80(%esp),%xmm1
1162	pxor	%xmm0,%xmm0
1163	xorps	(%esp),%xmm2
1164	pcmpgtd	%xmm1,%xmm0
1165	xorps	16(%esp),%xmm3
1166	movups	%xmm2,(%edi)
1167	xorps	32(%esp),%xmm4
1168	movups	%xmm3,16(%edi)
1169	xorps	48(%esp),%xmm5
1170	movups	%xmm4,32(%edi)
1171	xorps	64(%esp),%xmm6
1172	movups	%xmm5,48(%edi)
1173	xorps	%xmm1,%xmm7
1174	movups	%xmm6,64(%edi)
1175	pshufd	$19,%xmm0,%xmm2
1176	movups	%xmm7,80(%edi)
1177	leal	96(%edi),%edi
1178	movdqa	96(%esp),%xmm3
1179	pxor	%xmm0,%xmm0
1180	paddq	%xmm1,%xmm1
1181	pand	%xmm3,%xmm2
1182	pcmpgtd	%xmm1,%xmm0
1183	pxor	%xmm2,%xmm1
1184	subl	$96,%eax
1185	jnc	.L048xts_enc_loop6
1186	movl	240(%ebp),%ecx
1187	movl	%ebp,%edx
1188	movl	%ecx,%ebx
1189.L047xts_enc_short:
1190	addl	$96,%eax
1191	jz	.L049xts_enc_done6x
1192	movdqa	%xmm1,%xmm5
1193	cmpl	$32,%eax
1194	jb	.L050xts_enc_one
1195	pshufd	$19,%xmm0,%xmm2
1196	pxor	%xmm0,%xmm0
1197	paddq	%xmm1,%xmm1
1198	pand	%xmm3,%xmm2
1199	pcmpgtd	%xmm1,%xmm0
1200	pxor	%xmm2,%xmm1
1201	je	.L051xts_enc_two
1202	pshufd	$19,%xmm0,%xmm2
1203	pxor	%xmm0,%xmm0
1204	movdqa	%xmm1,%xmm6
1205	paddq	%xmm1,%xmm1
1206	pand	%xmm3,%xmm2
1207	pcmpgtd	%xmm1,%xmm0
1208	pxor	%xmm2,%xmm1
1209	cmpl	$64,%eax
1210	jb	.L052xts_enc_three
1211	pshufd	$19,%xmm0,%xmm2
1212	pxor	%xmm0,%xmm0
1213	movdqa	%xmm1,%xmm7
1214	paddq	%xmm1,%xmm1
1215	pand	%xmm3,%xmm2
1216	pcmpgtd	%xmm1,%xmm0
1217	pxor	%xmm2,%xmm1
1218	movdqa	%xmm5,(%esp)
1219	movdqa	%xmm6,16(%esp)
1220	je	.L053xts_enc_four
1221	movdqa	%xmm7,32(%esp)
1222	pshufd	$19,%xmm0,%xmm7
1223	movdqa	%xmm1,48(%esp)
1224	paddq	%xmm1,%xmm1
1225	pand	%xmm3,%xmm7
1226	pxor	%xmm1,%xmm7
1227	movdqu	(%esi),%xmm2
1228	movdqu	16(%esi),%xmm3
1229	movdqu	32(%esi),%xmm4
1230	pxor	(%esp),%xmm2
1231	movdqu	48(%esi),%xmm5
1232	pxor	16(%esp),%xmm3
1233	movdqu	64(%esi),%xmm6
1234	pxor	32(%esp),%xmm4
1235	leal	80(%esi),%esi
1236	pxor	48(%esp),%xmm5
1237	movdqa	%xmm7,64(%esp)
1238	pxor	%xmm7,%xmm6
1239	call	_aesni_encrypt6
1240	movaps	64(%esp),%xmm1
1241	xorps	(%esp),%xmm2
1242	xorps	16(%esp),%xmm3
1243	xorps	32(%esp),%xmm4
1244	movups	%xmm2,(%edi)
1245	xorps	48(%esp),%xmm5
1246	movups	%xmm3,16(%edi)
1247	xorps	%xmm1,%xmm6
1248	movups	%xmm4,32(%edi)
1249	movups	%xmm5,48(%edi)
1250	movups	%xmm6,64(%edi)
1251	leal	80(%edi),%edi
1252	jmp	.L054xts_enc_done
1253.align	16
1254.L050xts_enc_one:
1255	movups	(%esi),%xmm2
1256	leal	16(%esi),%esi
1257	xorps	%xmm5,%xmm2
1258	movups	(%edx),%xmm0
1259	movups	16(%edx),%xmm1
1260	leal	32(%edx),%edx
1261	xorps	%xmm0,%xmm2
1262.L055enc1_loop_9:
1263.byte	102,15,56,220,209
1264	decl	%ecx
1265	movups	(%edx),%xmm1
1266	leal	16(%edx),%edx
1267	jnz	.L055enc1_loop_9
1268.byte	102,15,56,221,209
1269	xorps	%xmm5,%xmm2
1270	movups	%xmm2,(%edi)
1271	leal	16(%edi),%edi
1272	movdqa	%xmm5,%xmm1
1273	jmp	.L054xts_enc_done
1274.align	16
1275.L051xts_enc_two:
1276	movaps	%xmm1,%xmm6
1277	movups	(%esi),%xmm2
1278	movups	16(%esi),%xmm3
1279	leal	32(%esi),%esi
1280	xorps	%xmm5,%xmm2
1281	xorps	%xmm6,%xmm3
1282	call	_aesni_encrypt2
1283	xorps	%xmm5,%xmm2
1284	xorps	%xmm6,%xmm3
1285	movups	%xmm2,(%edi)
1286	movups	%xmm3,16(%edi)
1287	leal	32(%edi),%edi
1288	movdqa	%xmm6,%xmm1
1289	jmp	.L054xts_enc_done
1290.align	16
1291.L052xts_enc_three:
1292	movaps	%xmm1,%xmm7
1293	movups	(%esi),%xmm2
1294	movups	16(%esi),%xmm3
1295	movups	32(%esi),%xmm4
1296	leal	48(%esi),%esi
1297	xorps	%xmm5,%xmm2
1298	xorps	%xmm6,%xmm3
1299	xorps	%xmm7,%xmm4
1300	call	_aesni_encrypt3
1301	xorps	%xmm5,%xmm2
1302	xorps	%xmm6,%xmm3
1303	xorps	%xmm7,%xmm4
1304	movups	%xmm2,(%edi)
1305	movups	%xmm3,16(%edi)
1306	movups	%xmm4,32(%edi)
1307	leal	48(%edi),%edi
1308	movdqa	%xmm7,%xmm1
1309	jmp	.L054xts_enc_done
1310.align	16
1311.L053xts_enc_four:
1312	movaps	%xmm1,%xmm6
1313	movups	(%esi),%xmm2
1314	movups	16(%esi),%xmm3
1315	movups	32(%esi),%xmm4
1316	xorps	(%esp),%xmm2
1317	movups	48(%esi),%xmm5
1318	leal	64(%esi),%esi
1319	xorps	16(%esp),%xmm3
1320	xorps	%xmm7,%xmm4
1321	xorps	%xmm6,%xmm5
1322	call	_aesni_encrypt4
1323	xorps	(%esp),%xmm2
1324	xorps	16(%esp),%xmm3
1325	xorps	%xmm7,%xmm4
1326	movups	%xmm2,(%edi)
1327	xorps	%xmm6,%xmm5
1328	movups	%xmm3,16(%edi)
1329	movups	%xmm4,32(%edi)
1330	movups	%xmm5,48(%edi)
1331	leal	64(%edi),%edi
1332	movdqa	%xmm6,%xmm1
1333	jmp	.L054xts_enc_done
1334.align	16
1335.L049xts_enc_done6x:
1336	movl	112(%esp),%eax
1337	andl	$15,%eax
1338	jz	.L056xts_enc_ret
1339	movdqa	%xmm1,%xmm5
1340	movl	%eax,112(%esp)
1341	jmp	.L057xts_enc_steal
1342.align	16
1343.L054xts_enc_done:
1344	movl	112(%esp),%eax
1345	pxor	%xmm0,%xmm0
1346	andl	$15,%eax
1347	jz	.L056xts_enc_ret
1348	pcmpgtd	%xmm1,%xmm0
1349	movl	%eax,112(%esp)
1350	pshufd	$19,%xmm0,%xmm5
1351	paddq	%xmm1,%xmm1
1352	pand	96(%esp),%xmm5
1353	pxor	%xmm1,%xmm5
1354.L057xts_enc_steal:
1355	movzbl	(%esi),%ecx
1356	movzbl	-16(%edi),%edx
1357	leal	1(%esi),%esi
1358	movb	%cl,-16(%edi)
1359	movb	%dl,(%edi)
1360	leal	1(%edi),%edi
1361	subl	$1,%eax
1362	jnz	.L057xts_enc_steal
1363	subl	112(%esp),%edi
1364	movl	%ebp,%edx
1365	movl	%ebx,%ecx
1366	movups	-16(%edi),%xmm2
1367	xorps	%xmm5,%xmm2
1368	movups	(%edx),%xmm0
1369	movups	16(%edx),%xmm1
1370	leal	32(%edx),%edx
1371	xorps	%xmm0,%xmm2
1372.L058enc1_loop_10:
1373.byte	102,15,56,220,209
1374	decl	%ecx
1375	movups	(%edx),%xmm1
1376	leal	16(%edx),%edx
1377	jnz	.L058enc1_loop_10
1378.byte	102,15,56,221,209
1379	xorps	%xmm5,%xmm2
1380	movups	%xmm2,-16(%edi)
1381.L056xts_enc_ret:
1382	pxor	%xmm0,%xmm0
1383	pxor	%xmm1,%xmm1
1384	pxor	%xmm2,%xmm2
1385	movdqa	%xmm0,(%esp)
1386	pxor	%xmm3,%xmm3
1387	movdqa	%xmm0,16(%esp)
1388	pxor	%xmm4,%xmm4
1389	movdqa	%xmm0,32(%esp)
1390	pxor	%xmm5,%xmm5
1391	movdqa	%xmm0,48(%esp)
1392	pxor	%xmm6,%xmm6
1393	movdqa	%xmm0,64(%esp)
1394	pxor	%xmm7,%xmm7
1395	movdqa	%xmm0,80(%esp)
1396	movl	116(%esp),%esp
1397	popl	%edi
1398	popl	%esi
1399	popl	%ebx
1400	popl	%ebp
1401	ret
1402.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
1403.globl	aesni_xts_decrypt
1404.type	aesni_xts_decrypt,@function
1405.align	16
1406aesni_xts_decrypt:
1407.L_aesni_xts_decrypt_begin:
1408	pushl	%ebp
1409	pushl	%ebx
1410	pushl	%esi
1411	pushl	%edi
1412	movl	36(%esp),%edx
1413	movl	40(%esp),%esi
1414	movl	240(%edx),%ecx
1415	movups	(%esi),%xmm2
1416	movups	(%edx),%xmm0
1417	movups	16(%edx),%xmm1
1418	leal	32(%edx),%edx
1419	xorps	%xmm0,%xmm2
1420.L059enc1_loop_11:
1421.byte	102,15,56,220,209
1422	decl	%ecx
1423	movups	(%edx),%xmm1
1424	leal	16(%edx),%edx
1425	jnz	.L059enc1_loop_11
1426.byte	102,15,56,221,209
1427	movl	20(%esp),%esi
1428	movl	24(%esp),%edi
1429	movl	28(%esp),%eax
1430	movl	32(%esp),%edx
1431	movl	%esp,%ebp
1432	subl	$120,%esp
1433	andl	$-16,%esp
1434	xorl	%ebx,%ebx
1435	testl	$15,%eax
1436	setnz	%bl
1437	shll	$4,%ebx
1438	subl	%ebx,%eax
1439	movl	$135,96(%esp)
1440	movl	$0,100(%esp)
1441	movl	$1,104(%esp)
1442	movl	$0,108(%esp)
1443	movl	%eax,112(%esp)
1444	movl	%ebp,116(%esp)
1445	movl	240(%edx),%ecx
1446	movl	%edx,%ebp
1447	movl	%ecx,%ebx
1448	movdqa	%xmm2,%xmm1
1449	pxor	%xmm0,%xmm0
1450	movdqa	96(%esp),%xmm3
1451	pcmpgtd	%xmm1,%xmm0
1452	andl	$-16,%eax
1453	subl	$96,%eax
1454	jc	.L060xts_dec_short
1455	shll	$4,%ecx
1456	movl	$16,%ebx
1457	subl	%ecx,%ebx
1458	leal	32(%edx,%ecx,1),%edx
1459	jmp	.L061xts_dec_loop6
1460.align	16
1461.L061xts_dec_loop6:
1462	pshufd	$19,%xmm0,%xmm2
1463	pxor	%xmm0,%xmm0
1464	movdqa	%xmm1,(%esp)
1465	paddq	%xmm1,%xmm1
1466	pand	%xmm3,%xmm2
1467	pcmpgtd	%xmm1,%xmm0
1468	pxor	%xmm2,%xmm1
1469	pshufd	$19,%xmm0,%xmm2
1470	pxor	%xmm0,%xmm0
1471	movdqa	%xmm1,16(%esp)
1472	paddq	%xmm1,%xmm1
1473	pand	%xmm3,%xmm2
1474	pcmpgtd	%xmm1,%xmm0
1475	pxor	%xmm2,%xmm1
1476	pshufd	$19,%xmm0,%xmm2
1477	pxor	%xmm0,%xmm0
1478	movdqa	%xmm1,32(%esp)
1479	paddq	%xmm1,%xmm1
1480	pand	%xmm3,%xmm2
1481	pcmpgtd	%xmm1,%xmm0
1482	pxor	%xmm2,%xmm1
1483	pshufd	$19,%xmm0,%xmm2
1484	pxor	%xmm0,%xmm0
1485	movdqa	%xmm1,48(%esp)
1486	paddq	%xmm1,%xmm1
1487	pand	%xmm3,%xmm2
1488	pcmpgtd	%xmm1,%xmm0
1489	pxor	%xmm2,%xmm1
1490	pshufd	$19,%xmm0,%xmm7
1491	movdqa	%xmm1,64(%esp)
1492	paddq	%xmm1,%xmm1
1493	movups	(%ebp),%xmm0
1494	pand	%xmm3,%xmm7
1495	movups	(%esi),%xmm2
1496	pxor	%xmm1,%xmm7
1497	movl	%ebx,%ecx
1498	movdqu	16(%esi),%xmm3
1499	xorps	%xmm0,%xmm2
1500	movdqu	32(%esi),%xmm4
1501	pxor	%xmm0,%xmm3
1502	movdqu	48(%esi),%xmm5
1503	pxor	%xmm0,%xmm4
1504	movdqu	64(%esi),%xmm6
1505	pxor	%xmm0,%xmm5
1506	movdqu	80(%esi),%xmm1
1507	pxor	%xmm0,%xmm6
1508	leal	96(%esi),%esi
1509	pxor	(%esp),%xmm2
1510	movdqa	%xmm7,80(%esp)
1511	pxor	%xmm1,%xmm7
1512	movups	16(%ebp),%xmm1
1513	pxor	16(%esp),%xmm3
1514	pxor	32(%esp),%xmm4
1515.byte	102,15,56,222,209
1516	pxor	48(%esp),%xmm5
1517	pxor	64(%esp),%xmm6
1518.byte	102,15,56,222,217
1519	pxor	%xmm0,%xmm7
1520	movups	32(%ebp),%xmm0
1521.byte	102,15,56,222,225
1522.byte	102,15,56,222,233
1523.byte	102,15,56,222,241
1524.byte	102,15,56,222,249
1525	call	.L_aesni_decrypt6_enter
1526	movdqa	80(%esp),%xmm1
1527	pxor	%xmm0,%xmm0
1528	xorps	(%esp),%xmm2
1529	pcmpgtd	%xmm1,%xmm0
1530	xorps	16(%esp),%xmm3
1531	movups	%xmm2,(%edi)
1532	xorps	32(%esp),%xmm4
1533	movups	%xmm3,16(%edi)
1534	xorps	48(%esp),%xmm5
1535	movups	%xmm4,32(%edi)
1536	xorps	64(%esp),%xmm6
1537	movups	%xmm5,48(%edi)
1538	xorps	%xmm1,%xmm7
1539	movups	%xmm6,64(%edi)
1540	pshufd	$19,%xmm0,%xmm2
1541	movups	%xmm7,80(%edi)
1542	leal	96(%edi),%edi
1543	movdqa	96(%esp),%xmm3
1544	pxor	%xmm0,%xmm0
1545	paddq	%xmm1,%xmm1
1546	pand	%xmm3,%xmm2
1547	pcmpgtd	%xmm1,%xmm0
1548	pxor	%xmm2,%xmm1
1549	subl	$96,%eax
1550	jnc	.L061xts_dec_loop6
1551	movl	240(%ebp),%ecx
1552	movl	%ebp,%edx
1553	movl	%ecx,%ebx
1554.L060xts_dec_short:
1555	addl	$96,%eax
1556	jz	.L062xts_dec_done6x
1557	movdqa	%xmm1,%xmm5
1558	cmpl	$32,%eax
1559	jb	.L063xts_dec_one
1560	pshufd	$19,%xmm0,%xmm2
1561	pxor	%xmm0,%xmm0
1562	paddq	%xmm1,%xmm1
1563	pand	%xmm3,%xmm2
1564	pcmpgtd	%xmm1,%xmm0
1565	pxor	%xmm2,%xmm1
1566	je	.L064xts_dec_two
1567	pshufd	$19,%xmm0,%xmm2
1568	pxor	%xmm0,%xmm0
1569	movdqa	%xmm1,%xmm6
1570	paddq	%xmm1,%xmm1
1571	pand	%xmm3,%xmm2
1572	pcmpgtd	%xmm1,%xmm0
1573	pxor	%xmm2,%xmm1
1574	cmpl	$64,%eax
1575	jb	.L065xts_dec_three
1576	pshufd	$19,%xmm0,%xmm2
1577	pxor	%xmm0,%xmm0
1578	movdqa	%xmm1,%xmm7
1579	paddq	%xmm1,%xmm1
1580	pand	%xmm3,%xmm2
1581	pcmpgtd	%xmm1,%xmm0
1582	pxor	%xmm2,%xmm1
1583	movdqa	%xmm5,(%esp)
1584	movdqa	%xmm6,16(%esp)
1585	je	.L066xts_dec_four
1586	movdqa	%xmm7,32(%esp)
1587	pshufd	$19,%xmm0,%xmm7
1588	movdqa	%xmm1,48(%esp)
1589	paddq	%xmm1,%xmm1
1590	pand	%xmm3,%xmm7
1591	pxor	%xmm1,%xmm7
1592	movdqu	(%esi),%xmm2
1593	movdqu	16(%esi),%xmm3
1594	movdqu	32(%esi),%xmm4
1595	pxor	(%esp),%xmm2
1596	movdqu	48(%esi),%xmm5
1597	pxor	16(%esp),%xmm3
1598	movdqu	64(%esi),%xmm6
1599	pxor	32(%esp),%xmm4
1600	leal	80(%esi),%esi
1601	pxor	48(%esp),%xmm5
1602	movdqa	%xmm7,64(%esp)
1603	pxor	%xmm7,%xmm6
1604	call	_aesni_decrypt6
1605	movaps	64(%esp),%xmm1
1606	xorps	(%esp),%xmm2
1607	xorps	16(%esp),%xmm3
1608	xorps	32(%esp),%xmm4
1609	movups	%xmm2,(%edi)
1610	xorps	48(%esp),%xmm5
1611	movups	%xmm3,16(%edi)
1612	xorps	%xmm1,%xmm6
1613	movups	%xmm4,32(%edi)
1614	movups	%xmm5,48(%edi)
1615	movups	%xmm6,64(%edi)
1616	leal	80(%edi),%edi
1617	jmp	.L067xts_dec_done
1618.align	16
1619.L063xts_dec_one:
1620	movups	(%esi),%xmm2
1621	leal	16(%esi),%esi
1622	xorps	%xmm5,%xmm2
1623	movups	(%edx),%xmm0
1624	movups	16(%edx),%xmm1
1625	leal	32(%edx),%edx
1626	xorps	%xmm0,%xmm2
1627.L068dec1_loop_12:
1628.byte	102,15,56,222,209
1629	decl	%ecx
1630	movups	(%edx),%xmm1
1631	leal	16(%edx),%edx
1632	jnz	.L068dec1_loop_12
1633.byte	102,15,56,223,209
1634	xorps	%xmm5,%xmm2
1635	movups	%xmm2,(%edi)
1636	leal	16(%edi),%edi
1637	movdqa	%xmm5,%xmm1
1638	jmp	.L067xts_dec_done
1639.align	16
1640.L064xts_dec_two:
1641	movaps	%xmm1,%xmm6
1642	movups	(%esi),%xmm2
1643	movups	16(%esi),%xmm3
1644	leal	32(%esi),%esi
1645	xorps	%xmm5,%xmm2
1646	xorps	%xmm6,%xmm3
1647	call	_aesni_decrypt2
1648	xorps	%xmm5,%xmm2
1649	xorps	%xmm6,%xmm3
1650	movups	%xmm2,(%edi)
1651	movups	%xmm3,16(%edi)
1652	leal	32(%edi),%edi
1653	movdqa	%xmm6,%xmm1
1654	jmp	.L067xts_dec_done
1655.align	16
1656.L065xts_dec_three:
1657	movaps	%xmm1,%xmm7
1658	movups	(%esi),%xmm2
1659	movups	16(%esi),%xmm3
1660	movups	32(%esi),%xmm4
1661	leal	48(%esi),%esi
1662	xorps	%xmm5,%xmm2
1663	xorps	%xmm6,%xmm3
1664	xorps	%xmm7,%xmm4
1665	call	_aesni_decrypt3
1666	xorps	%xmm5,%xmm2
1667	xorps	%xmm6,%xmm3
1668	xorps	%xmm7,%xmm4
1669	movups	%xmm2,(%edi)
1670	movups	%xmm3,16(%edi)
1671	movups	%xmm4,32(%edi)
1672	leal	48(%edi),%edi
1673	movdqa	%xmm7,%xmm1
1674	jmp	.L067xts_dec_done
1675.align	16
1676.L066xts_dec_four:
1677	movaps	%xmm1,%xmm6
1678	movups	(%esi),%xmm2
1679	movups	16(%esi),%xmm3
1680	movups	32(%esi),%xmm4
1681	xorps	(%esp),%xmm2
1682	movups	48(%esi),%xmm5
1683	leal	64(%esi),%esi
1684	xorps	16(%esp),%xmm3
1685	xorps	%xmm7,%xmm4
1686	xorps	%xmm6,%xmm5
1687	call	_aesni_decrypt4
1688	xorps	(%esp),%xmm2
1689	xorps	16(%esp),%xmm3
1690	xorps	%xmm7,%xmm4
1691	movups	%xmm2,(%edi)
1692	xorps	%xmm6,%xmm5
1693	movups	%xmm3,16(%edi)
1694	movups	%xmm4,32(%edi)
1695	movups	%xmm5,48(%edi)
1696	leal	64(%edi),%edi
1697	movdqa	%xmm6,%xmm1
1698	jmp	.L067xts_dec_done
1699.align	16
1700.L062xts_dec_done6x:
1701	movl	112(%esp),%eax
1702	andl	$15,%eax
1703	jz	.L069xts_dec_ret
1704	movl	%eax,112(%esp)
1705	jmp	.L070xts_dec_only_one_more
1706.align	16
1707.L067xts_dec_done:
1708	movl	112(%esp),%eax
1709	pxor	%xmm0,%xmm0
1710	andl	$15,%eax
1711	jz	.L069xts_dec_ret
1712	pcmpgtd	%xmm1,%xmm0
1713	movl	%eax,112(%esp)
1714	pshufd	$19,%xmm0,%xmm2
1715	pxor	%xmm0,%xmm0
1716	movdqa	96(%esp),%xmm3
1717	paddq	%xmm1,%xmm1
1718	pand	%xmm3,%xmm2
1719	pcmpgtd	%xmm1,%xmm0
1720	pxor	%xmm2,%xmm1
1721.L070xts_dec_only_one_more:
1722	pshufd	$19,%xmm0,%xmm5
1723	movdqa	%xmm1,%xmm6
1724	paddq	%xmm1,%xmm1
1725	pand	%xmm3,%xmm5
1726	pxor	%xmm1,%xmm5
1727	movl	%ebp,%edx
1728	movl	%ebx,%ecx
1729	movups	(%esi),%xmm2
1730	xorps	%xmm5,%xmm2
1731	movups	(%edx),%xmm0
1732	movups	16(%edx),%xmm1
1733	leal	32(%edx),%edx
1734	xorps	%xmm0,%xmm2
1735.L071dec1_loop_13:
1736.byte	102,15,56,222,209
1737	decl	%ecx
1738	movups	(%edx),%xmm1
1739	leal	16(%edx),%edx
1740	jnz	.L071dec1_loop_13
1741.byte	102,15,56,223,209
1742	xorps	%xmm5,%xmm2
1743	movups	%xmm2,(%edi)
1744.L072xts_dec_steal:
1745	movzbl	16(%esi),%ecx
1746	movzbl	(%edi),%edx
1747	leal	1(%esi),%esi
1748	movb	%cl,(%edi)
1749	movb	%dl,16(%edi)
1750	leal	1(%edi),%edi
1751	subl	$1,%eax
1752	jnz	.L072xts_dec_steal
1753	subl	112(%esp),%edi
1754	movl	%ebp,%edx
1755	movl	%ebx,%ecx
1756	movups	(%edi),%xmm2
1757	xorps	%xmm6,%xmm2
1758	movups	(%edx),%xmm0
1759	movups	16(%edx),%xmm1
1760	leal	32(%edx),%edx
1761	xorps	%xmm0,%xmm2
1762.L073dec1_loop_14:
1763.byte	102,15,56,222,209
1764	decl	%ecx
1765	movups	(%edx),%xmm1
1766	leal	16(%edx),%edx
1767	jnz	.L073dec1_loop_14
1768.byte	102,15,56,223,209
1769	xorps	%xmm6,%xmm2
1770	movups	%xmm2,(%edi)
1771.L069xts_dec_ret:
1772	pxor	%xmm0,%xmm0
1773	pxor	%xmm1,%xmm1
1774	pxor	%xmm2,%xmm2
1775	movdqa	%xmm0,(%esp)
1776	pxor	%xmm3,%xmm3
1777	movdqa	%xmm0,16(%esp)
1778	pxor	%xmm4,%xmm4
1779	movdqa	%xmm0,32(%esp)
1780	pxor	%xmm5,%xmm5
1781	movdqa	%xmm0,48(%esp)
1782	pxor	%xmm6,%xmm6
1783	movdqa	%xmm0,64(%esp)
1784	pxor	%xmm7,%xmm7
1785	movdqa	%xmm0,80(%esp)
1786	movl	116(%esp),%esp
1787	popl	%edi
1788	popl	%esi
1789	popl	%ebx
1790	popl	%ebp
1791	ret
1792.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
1793.globl	aesni_ocb_encrypt
1794.type	aesni_ocb_encrypt,@function
1795.align	16
1796aesni_ocb_encrypt:
1797.L_aesni_ocb_encrypt_begin:
1798	pushl	%ebp
1799	pushl	%ebx
1800	pushl	%esi
1801	pushl	%edi
1802	movl	40(%esp),%ecx
1803	movl	48(%esp),%ebx
1804	movl	20(%esp),%esi
1805	movl	24(%esp),%edi
1806	movl	28(%esp),%eax
1807	movl	32(%esp),%edx
1808	movdqu	(%ecx),%xmm0
1809	movl	36(%esp),%ebp
1810	movdqu	(%ebx),%xmm1
1811	movl	44(%esp),%ebx
1812	movl	%esp,%ecx
1813	subl	$132,%esp
1814	andl	$-16,%esp
1815	subl	%esi,%edi
1816	shll	$4,%eax
1817	leal	-96(%esi,%eax,1),%eax
1818	movl	%edi,120(%esp)
1819	movl	%eax,124(%esp)
1820	movl	%ecx,128(%esp)
1821	movl	240(%edx),%ecx
1822	testl	$1,%ebp
1823	jnz	.L074odd
1824	bsfl	%ebp,%eax
1825	addl	$1,%ebp
1826	shll	$4,%eax
1827	movdqu	(%ebx,%eax,1),%xmm7
1828	movl	%edx,%eax
1829	movdqu	(%esi),%xmm2
1830	leal	16(%esi),%esi
1831	pxor	%xmm0,%xmm7
1832	pxor	%xmm2,%xmm1
1833	pxor	%xmm7,%xmm2
1834	movdqa	%xmm1,%xmm6
1835	movups	(%edx),%xmm0
1836	movups	16(%edx),%xmm1
1837	leal	32(%edx),%edx
1838	xorps	%xmm0,%xmm2
1839.L075enc1_loop_15:
1840.byte	102,15,56,220,209
1841	decl	%ecx
1842	movups	(%edx),%xmm1
1843	leal	16(%edx),%edx
1844	jnz	.L075enc1_loop_15
1845.byte	102,15,56,221,209
1846	xorps	%xmm7,%xmm2
1847	movdqa	%xmm7,%xmm0
1848	movdqa	%xmm6,%xmm1
1849	movups	%xmm2,-16(%edi,%esi,1)
1850	movl	240(%eax),%ecx
1851	movl	%eax,%edx
1852	movl	124(%esp),%eax
1853.L074odd:
1854	shll	$4,%ecx
1855	movl	$16,%edi
1856	subl	%ecx,%edi
1857	movl	%edx,112(%esp)
1858	leal	32(%edx,%ecx,1),%edx
1859	movl	%edi,116(%esp)
1860	cmpl	%eax,%esi
1861	ja	.L076short
1862	jmp	.L077grandloop
1863.align	32
1864.L077grandloop:
1865	leal	1(%ebp),%ecx
1866	leal	3(%ebp),%eax
1867	leal	5(%ebp),%edi
1868	addl	$6,%ebp
1869	bsfl	%ecx,%ecx
1870	bsfl	%eax,%eax
1871	bsfl	%edi,%edi
1872	shll	$4,%ecx
1873	shll	$4,%eax
1874	shll	$4,%edi
1875	movdqu	(%ebx),%xmm2
1876	movdqu	(%ebx,%ecx,1),%xmm3
1877	movl	116(%esp),%ecx
1878	movdqa	%xmm2,%xmm4
1879	movdqu	(%ebx,%eax,1),%xmm5
1880	movdqa	%xmm2,%xmm6
1881	movdqu	(%ebx,%edi,1),%xmm7
1882	pxor	%xmm0,%xmm2
1883	pxor	%xmm2,%xmm3
1884	movdqa	%xmm2,(%esp)
1885	pxor	%xmm3,%xmm4
1886	movdqa	%xmm3,16(%esp)
1887	pxor	%xmm4,%xmm5
1888	movdqa	%xmm4,32(%esp)
1889	pxor	%xmm5,%xmm6
1890	movdqa	%xmm5,48(%esp)
1891	pxor	%xmm6,%xmm7
1892	movdqa	%xmm6,64(%esp)
1893	movdqa	%xmm7,80(%esp)
1894	movups	-48(%edx,%ecx,1),%xmm0
1895	movdqu	(%esi),%xmm2
1896	movdqu	16(%esi),%xmm3
1897	movdqu	32(%esi),%xmm4
1898	movdqu	48(%esi),%xmm5
1899	movdqu	64(%esi),%xmm6
1900	movdqu	80(%esi),%xmm7
1901	leal	96(%esi),%esi
1902	pxor	%xmm2,%xmm1
1903	pxor	%xmm0,%xmm2
1904	pxor	%xmm3,%xmm1
1905	pxor	%xmm0,%xmm3
1906	pxor	%xmm4,%xmm1
1907	pxor	%xmm0,%xmm4
1908	pxor	%xmm5,%xmm1
1909	pxor	%xmm0,%xmm5
1910	pxor	%xmm6,%xmm1
1911	pxor	%xmm0,%xmm6
1912	pxor	%xmm7,%xmm1
1913	pxor	%xmm0,%xmm7
1914	movdqa	%xmm1,96(%esp)
1915	movups	-32(%edx,%ecx,1),%xmm1
1916	pxor	(%esp),%xmm2
1917	pxor	16(%esp),%xmm3
1918	pxor	32(%esp),%xmm4
1919	pxor	48(%esp),%xmm5
1920	pxor	64(%esp),%xmm6
1921	pxor	80(%esp),%xmm7
1922	movups	-16(%edx,%ecx,1),%xmm0
1923.byte	102,15,56,220,209
1924.byte	102,15,56,220,217
1925.byte	102,15,56,220,225
1926.byte	102,15,56,220,233
1927.byte	102,15,56,220,241
1928.byte	102,15,56,220,249
1929	movl	120(%esp),%edi
1930	movl	124(%esp),%eax
1931	call	.L_aesni_encrypt6_enter
1932	movdqa	80(%esp),%xmm0
1933	pxor	(%esp),%xmm2
1934	pxor	16(%esp),%xmm3
1935	pxor	32(%esp),%xmm4
1936	pxor	48(%esp),%xmm5
1937	pxor	64(%esp),%xmm6
1938	pxor	%xmm0,%xmm7
1939	movdqa	96(%esp),%xmm1
1940	movdqu	%xmm2,-96(%edi,%esi,1)
1941	movdqu	%xmm3,-80(%edi,%esi,1)
1942	movdqu	%xmm4,-64(%edi,%esi,1)
1943	movdqu	%xmm5,-48(%edi,%esi,1)
1944	movdqu	%xmm6,-32(%edi,%esi,1)
1945	movdqu	%xmm7,-16(%edi,%esi,1)
1946	cmpl	%eax,%esi
1947	jb	.L077grandloop
1948.L076short:
1949	addl	$96,%eax
1950	subl	%esi,%eax
1951	jz	.L078done
1952	cmpl	$32,%eax
1953	jb	.L079one
1954	je	.L080two
1955	cmpl	$64,%eax
1956	jb	.L081three
1957	je	.L082four
1958	leal	1(%ebp),%ecx
1959	leal	3(%ebp),%eax
1960	bsfl	%ecx,%ecx
1961	bsfl	%eax,%eax
1962	shll	$4,%ecx
1963	shll	$4,%eax
1964	movdqu	(%ebx),%xmm2
1965	movdqu	(%ebx,%ecx,1),%xmm3
1966	movl	116(%esp),%ecx
1967	movdqa	%xmm2,%xmm4
1968	movdqu	(%ebx,%eax,1),%xmm5
1969	movdqa	%xmm2,%xmm6
1970	pxor	%xmm0,%xmm2
1971	pxor	%xmm2,%xmm3
1972	movdqa	%xmm2,(%esp)
1973	pxor	%xmm3,%xmm4
1974	movdqa	%xmm3,16(%esp)
1975	pxor	%xmm4,%xmm5
1976	movdqa	%xmm4,32(%esp)
1977	pxor	%xmm5,%xmm6
1978	movdqa	%xmm5,48(%esp)
1979	pxor	%xmm6,%xmm7
1980	movdqa	%xmm6,64(%esp)
1981	movups	-48(%edx,%ecx,1),%xmm0
1982	movdqu	(%esi),%xmm2
1983	movdqu	16(%esi),%xmm3
1984	movdqu	32(%esi),%xmm4
1985	movdqu	48(%esi),%xmm5
1986	movdqu	64(%esi),%xmm6
1987	pxor	%xmm7,%xmm7
1988	pxor	%xmm2,%xmm1
1989	pxor	%xmm0,%xmm2
1990	pxor	%xmm3,%xmm1
1991	pxor	%xmm0,%xmm3
1992	pxor	%xmm4,%xmm1
1993	pxor	%xmm0,%xmm4
1994	pxor	%xmm5,%xmm1
1995	pxor	%xmm0,%xmm5
1996	pxor	%xmm6,%xmm1
1997	pxor	%xmm0,%xmm6
1998	movdqa	%xmm1,96(%esp)
1999	movups	-32(%edx,%ecx,1),%xmm1
2000	pxor	(%esp),%xmm2
2001	pxor	16(%esp),%xmm3
2002	pxor	32(%esp),%xmm4
2003	pxor	48(%esp),%xmm5
2004	pxor	64(%esp),%xmm6
2005	movups	-16(%edx,%ecx,1),%xmm0
2006.byte	102,15,56,220,209
2007.byte	102,15,56,220,217
2008.byte	102,15,56,220,225
2009.byte	102,15,56,220,233
2010.byte	102,15,56,220,241
2011.byte	102,15,56,220,249
2012	movl	120(%esp),%edi
2013	call	.L_aesni_encrypt6_enter
2014	movdqa	64(%esp),%xmm0
2015	pxor	(%esp),%xmm2
2016	pxor	16(%esp),%xmm3
2017	pxor	32(%esp),%xmm4
2018	pxor	48(%esp),%xmm5
2019	pxor	%xmm0,%xmm6
2020	movdqa	96(%esp),%xmm1
2021	movdqu	%xmm2,(%edi,%esi,1)
2022	movdqu	%xmm3,16(%edi,%esi,1)
2023	movdqu	%xmm4,32(%edi,%esi,1)
2024	movdqu	%xmm5,48(%edi,%esi,1)
2025	movdqu	%xmm6,64(%edi,%esi,1)
2026	jmp	.L078done
2027.align	16
2028.L079one:
2029	movdqu	(%ebx),%xmm7
2030	movl	112(%esp),%edx
2031	movdqu	(%esi),%xmm2
2032	movl	240(%edx),%ecx
2033	pxor	%xmm0,%xmm7
2034	pxor	%xmm2,%xmm1
2035	pxor	%xmm7,%xmm2
2036	movdqa	%xmm1,%xmm6
2037	movl	120(%esp),%edi
2038	movups	(%edx),%xmm0
2039	movups	16(%edx),%xmm1
2040	leal	32(%edx),%edx
2041	xorps	%xmm0,%xmm2
2042.L083enc1_loop_16:
2043.byte	102,15,56,220,209
2044	decl	%ecx
2045	movups	(%edx),%xmm1
2046	leal	16(%edx),%edx
2047	jnz	.L083enc1_loop_16
2048.byte	102,15,56,221,209
2049	xorps	%xmm7,%xmm2
2050	movdqa	%xmm7,%xmm0
2051	movdqa	%xmm6,%xmm1
2052	movups	%xmm2,(%edi,%esi,1)
2053	jmp	.L078done
2054.align	16
2055.L080two:
2056	leal	1(%ebp),%ecx
2057	movl	112(%esp),%edx
2058	bsfl	%ecx,%ecx
2059	shll	$4,%ecx
2060	movdqu	(%ebx),%xmm6
2061	movdqu	(%ebx,%ecx,1),%xmm7
2062	movdqu	(%esi),%xmm2
2063	movdqu	16(%esi),%xmm3
2064	movl	240(%edx),%ecx
2065	pxor	%xmm0,%xmm6
2066	pxor	%xmm6,%xmm7
2067	pxor	%xmm2,%xmm1
2068	pxor	%xmm6,%xmm2
2069	pxor	%xmm3,%xmm1
2070	pxor	%xmm7,%xmm3
2071	movdqa	%xmm1,%xmm5
2072	movl	120(%esp),%edi
2073	call	_aesni_encrypt2
2074	xorps	%xmm6,%xmm2
2075	xorps	%xmm7,%xmm3
2076	movdqa	%xmm7,%xmm0
2077	movdqa	%xmm5,%xmm1
2078	movups	%xmm2,(%edi,%esi,1)
2079	movups	%xmm3,16(%edi,%esi,1)
2080	jmp	.L078done
2081.align	16
2082.L081three:
2083	leal	1(%ebp),%ecx
2084	movl	112(%esp),%edx
2085	bsfl	%ecx,%ecx
2086	shll	$4,%ecx
2087	movdqu	(%ebx),%xmm5
2088	movdqu	(%ebx,%ecx,1),%xmm6
2089	movdqa	%xmm5,%xmm7
2090	movdqu	(%esi),%xmm2
2091	movdqu	16(%esi),%xmm3
2092	movdqu	32(%esi),%xmm4
2093	movl	240(%edx),%ecx
2094	pxor	%xmm0,%xmm5
2095	pxor	%xmm5,%xmm6
2096	pxor	%xmm6,%xmm7
2097	pxor	%xmm2,%xmm1
2098	pxor	%xmm5,%xmm2
2099	pxor	%xmm3,%xmm1
2100	pxor	%xmm6,%xmm3
2101	pxor	%xmm4,%xmm1
2102	pxor	%xmm7,%xmm4
2103	movdqa	%xmm1,96(%esp)
2104	movl	120(%esp),%edi
2105	call	_aesni_encrypt3
2106	xorps	%xmm5,%xmm2
2107	xorps	%xmm6,%xmm3
2108	xorps	%xmm7,%xmm4
2109	movdqa	%xmm7,%xmm0
2110	movdqa	96(%esp),%xmm1
2111	movups	%xmm2,(%edi,%esi,1)
2112	movups	%xmm3,16(%edi,%esi,1)
2113	movups	%xmm4,32(%edi,%esi,1)
2114	jmp	.L078done
2115.align	16
2116.L082four:
2117	leal	1(%ebp),%ecx
2118	leal	3(%ebp),%eax
2119	bsfl	%ecx,%ecx
2120	bsfl	%eax,%eax
2121	movl	112(%esp),%edx
2122	shll	$4,%ecx
2123	shll	$4,%eax
2124	movdqu	(%ebx),%xmm4
2125	movdqu	(%ebx,%ecx,1),%xmm5
2126	movdqa	%xmm4,%xmm6
2127	movdqu	(%ebx,%eax,1),%xmm7
2128	pxor	%xmm0,%xmm4
2129	movdqu	(%esi),%xmm2
2130	pxor	%xmm4,%xmm5
2131	movdqu	16(%esi),%xmm3
2132	pxor	%xmm5,%xmm6
2133	movdqa	%xmm4,(%esp)
2134	pxor	%xmm6,%xmm7
2135	movdqa	%xmm5,16(%esp)
2136	movdqu	32(%esi),%xmm4
2137	movdqu	48(%esi),%xmm5
2138	movl	240(%edx),%ecx
2139	pxor	%xmm2,%xmm1
2140	pxor	(%esp),%xmm2
2141	pxor	%xmm3,%xmm1
2142	pxor	16(%esp),%xmm3
2143	pxor	%xmm4,%xmm1
2144	pxor	%xmm6,%xmm4
2145	pxor	%xmm5,%xmm1
2146	pxor	%xmm7,%xmm5
2147	movdqa	%xmm1,96(%esp)
2148	movl	120(%esp),%edi
2149	call	_aesni_encrypt4
2150	xorps	(%esp),%xmm2
2151	xorps	16(%esp),%xmm3
2152	xorps	%xmm6,%xmm4
2153	movups	%xmm2,(%edi,%esi,1)
2154	xorps	%xmm7,%xmm5
2155	movups	%xmm3,16(%edi,%esi,1)
2156	movdqa	%xmm7,%xmm0
2157	movups	%xmm4,32(%edi,%esi,1)
2158	movdqa	96(%esp),%xmm1
2159	movups	%xmm5,48(%edi,%esi,1)
2160.L078done:
2161	movl	128(%esp),%edx
2162	pxor	%xmm2,%xmm2
2163	pxor	%xmm3,%xmm3
2164	movdqa	%xmm2,(%esp)
2165	pxor	%xmm4,%xmm4
2166	movdqa	%xmm2,16(%esp)
2167	pxor	%xmm5,%xmm5
2168	movdqa	%xmm2,32(%esp)
2169	pxor	%xmm6,%xmm6
2170	movdqa	%xmm2,48(%esp)
2171	pxor	%xmm7,%xmm7
2172	movdqa	%xmm2,64(%esp)
2173	movdqa	%xmm2,80(%esp)
2174	movdqa	%xmm2,96(%esp)
2175	leal	(%edx),%esp
2176	movl	40(%esp),%ecx
2177	movl	48(%esp),%ebx
2178	movdqu	%xmm0,(%ecx)
2179	pxor	%xmm0,%xmm0
2180	movdqu	%xmm1,(%ebx)
2181	pxor	%xmm1,%xmm1
2182	popl	%edi
2183	popl	%esi
2184	popl	%ebx
2185	popl	%ebp
2186	ret
2187.size	aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
2188.globl	aesni_ocb_decrypt
2189.type	aesni_ocb_decrypt,@function
2190.align	16
2191aesni_ocb_decrypt:
2192.L_aesni_ocb_decrypt_begin:
2193	pushl	%ebp
2194	pushl	%ebx
2195	pushl	%esi
2196	pushl	%edi
2197	movl	40(%esp),%ecx
2198	movl	48(%esp),%ebx
2199	movl	20(%esp),%esi
2200	movl	24(%esp),%edi
2201	movl	28(%esp),%eax
2202	movl	32(%esp),%edx
2203	movdqu	(%ecx),%xmm0
2204	movl	36(%esp),%ebp
2205	movdqu	(%ebx),%xmm1
2206	movl	44(%esp),%ebx
2207	movl	%esp,%ecx
2208	subl	$132,%esp
2209	andl	$-16,%esp
2210	subl	%esi,%edi
2211	shll	$4,%eax
2212	leal	-96(%esi,%eax,1),%eax
2213	movl	%edi,120(%esp)
2214	movl	%eax,124(%esp)
2215	movl	%ecx,128(%esp)
2216	movl	240(%edx),%ecx
2217	testl	$1,%ebp
2218	jnz	.L084odd
2219	bsfl	%ebp,%eax
2220	addl	$1,%ebp
2221	shll	$4,%eax
2222	movdqu	(%ebx,%eax,1),%xmm7
2223	movl	%edx,%eax
2224	movdqu	(%esi),%xmm2
2225	leal	16(%esi),%esi
2226	pxor	%xmm0,%xmm7
2227	pxor	%xmm7,%xmm2
2228	movdqa	%xmm1,%xmm6
2229	movups	(%edx),%xmm0
2230	movups	16(%edx),%xmm1
2231	leal	32(%edx),%edx
2232	xorps	%xmm0,%xmm2
2233.L085dec1_loop_17:
2234.byte	102,15,56,222,209
2235	decl	%ecx
2236	movups	(%edx),%xmm1
2237	leal	16(%edx),%edx
2238	jnz	.L085dec1_loop_17
2239.byte	102,15,56,223,209
2240	xorps	%xmm7,%xmm2
2241	movaps	%xmm6,%xmm1
2242	movdqa	%xmm7,%xmm0
2243	xorps	%xmm2,%xmm1
2244	movups	%xmm2,-16(%edi,%esi,1)
2245	movl	240(%eax),%ecx
2246	movl	%eax,%edx
2247	movl	124(%esp),%eax
2248.L084odd:
2249	shll	$4,%ecx
2250	movl	$16,%edi
2251	subl	%ecx,%edi
2252	movl	%edx,112(%esp)
2253	leal	32(%edx,%ecx,1),%edx
2254	movl	%edi,116(%esp)
2255	cmpl	%eax,%esi
2256	ja	.L086short
2257	jmp	.L087grandloop
2258.align	32
2259.L087grandloop:
2260	leal	1(%ebp),%ecx
2261	leal	3(%ebp),%eax
2262	leal	5(%ebp),%edi
2263	addl	$6,%ebp
2264	bsfl	%ecx,%ecx
2265	bsfl	%eax,%eax
2266	bsfl	%edi,%edi
2267	shll	$4,%ecx
2268	shll	$4,%eax
2269	shll	$4,%edi
2270	movdqu	(%ebx),%xmm2
2271	movdqu	(%ebx,%ecx,1),%xmm3
2272	movl	116(%esp),%ecx
2273	movdqa	%xmm2,%xmm4
2274	movdqu	(%ebx,%eax,1),%xmm5
2275	movdqa	%xmm2,%xmm6
2276	movdqu	(%ebx,%edi,1),%xmm7
2277	pxor	%xmm0,%xmm2
2278	pxor	%xmm2,%xmm3
2279	movdqa	%xmm2,(%esp)
2280	pxor	%xmm3,%xmm4
2281	movdqa	%xmm3,16(%esp)
2282	pxor	%xmm4,%xmm5
2283	movdqa	%xmm4,32(%esp)
2284	pxor	%xmm5,%xmm6
2285	movdqa	%xmm5,48(%esp)
2286	pxor	%xmm6,%xmm7
2287	movdqa	%xmm6,64(%esp)
2288	movdqa	%xmm7,80(%esp)
2289	movups	-48(%edx,%ecx,1),%xmm0
2290	movdqu	(%esi),%xmm2
2291	movdqu	16(%esi),%xmm3
2292	movdqu	32(%esi),%xmm4
2293	movdqu	48(%esi),%xmm5
2294	movdqu	64(%esi),%xmm6
2295	movdqu	80(%esi),%xmm7
2296	leal	96(%esi),%esi
2297	movdqa	%xmm1,96(%esp)
2298	pxor	%xmm0,%xmm2
2299	pxor	%xmm0,%xmm3
2300	pxor	%xmm0,%xmm4
2301	pxor	%xmm0,%xmm5
2302	pxor	%xmm0,%xmm6
2303	pxor	%xmm0,%xmm7
2304	movups	-32(%edx,%ecx,1),%xmm1
2305	pxor	(%esp),%xmm2
2306	pxor	16(%esp),%xmm3
2307	pxor	32(%esp),%xmm4
2308	pxor	48(%esp),%xmm5
2309	pxor	64(%esp),%xmm6
2310	pxor	80(%esp),%xmm7
2311	movups	-16(%edx,%ecx,1),%xmm0
2312.byte	102,15,56,222,209
2313.byte	102,15,56,222,217
2314.byte	102,15,56,222,225
2315.byte	102,15,56,222,233
2316.byte	102,15,56,222,241
2317.byte	102,15,56,222,249
2318	movl	120(%esp),%edi
2319	movl	124(%esp),%eax
2320	call	.L_aesni_decrypt6_enter
2321	movdqa	80(%esp),%xmm0
2322	pxor	(%esp),%xmm2
2323	movdqa	96(%esp),%xmm1
2324	pxor	16(%esp),%xmm3
2325	pxor	32(%esp),%xmm4
2326	pxor	48(%esp),%xmm5
2327	pxor	64(%esp),%xmm6
2328	pxor	%xmm0,%xmm7
2329	pxor	%xmm2,%xmm1
2330	movdqu	%xmm2,-96(%edi,%esi,1)
2331	pxor	%xmm3,%xmm1
2332	movdqu	%xmm3,-80(%edi,%esi,1)
2333	pxor	%xmm4,%xmm1
2334	movdqu	%xmm4,-64(%edi,%esi,1)
2335	pxor	%xmm5,%xmm1
2336	movdqu	%xmm5,-48(%edi,%esi,1)
2337	pxor	%xmm6,%xmm1
2338	movdqu	%xmm6,-32(%edi,%esi,1)
2339	pxor	%xmm7,%xmm1
2340	movdqu	%xmm7,-16(%edi,%esi,1)
2341	cmpl	%eax,%esi
2342	jb	.L087grandloop
2343.L086short:
2344	addl	$96,%eax
2345	subl	%esi,%eax
2346	jz	.L088done
2347	cmpl	$32,%eax
2348	jb	.L089one
2349	je	.L090two
2350	cmpl	$64,%eax
2351	jb	.L091three
2352	je	.L092four
2353	leal	1(%ebp),%ecx
2354	leal	3(%ebp),%eax
2355	bsfl	%ecx,%ecx
2356	bsfl	%eax,%eax
2357	shll	$4,%ecx
2358	shll	$4,%eax
2359	movdqu	(%ebx),%xmm2
2360	movdqu	(%ebx,%ecx,1),%xmm3
2361	movl	116(%esp),%ecx
2362	movdqa	%xmm2,%xmm4
2363	movdqu	(%ebx,%eax,1),%xmm5
2364	movdqa	%xmm2,%xmm6
2365	pxor	%xmm0,%xmm2
2366	pxor	%xmm2,%xmm3
2367	movdqa	%xmm2,(%esp)
2368	pxor	%xmm3,%xmm4
2369	movdqa	%xmm3,16(%esp)
2370	pxor	%xmm4,%xmm5
2371	movdqa	%xmm4,32(%esp)
2372	pxor	%xmm5,%xmm6
2373	movdqa	%xmm5,48(%esp)
2374	pxor	%xmm6,%xmm7
2375	movdqa	%xmm6,64(%esp)
2376	movups	-48(%edx,%ecx,1),%xmm0
2377	movdqu	(%esi),%xmm2
2378	movdqu	16(%esi),%xmm3
2379	movdqu	32(%esi),%xmm4
2380	movdqu	48(%esi),%xmm5
2381	movdqu	64(%esi),%xmm6
2382	pxor	%xmm7,%xmm7
2383	movdqa	%xmm1,96(%esp)
2384	pxor	%xmm0,%xmm2
2385	pxor	%xmm0,%xmm3
2386	pxor	%xmm0,%xmm4
2387	pxor	%xmm0,%xmm5
2388	pxor	%xmm0,%xmm6
2389	movups	-32(%edx,%ecx,1),%xmm1
2390	pxor	(%esp),%xmm2
2391	pxor	16(%esp),%xmm3
2392	pxor	32(%esp),%xmm4
2393	pxor	48(%esp),%xmm5
2394	pxor	64(%esp),%xmm6
2395	movups	-16(%edx,%ecx,1),%xmm0
2396.byte	102,15,56,222,209
2397.byte	102,15,56,222,217
2398.byte	102,15,56,222,225
2399.byte	102,15,56,222,233
2400.byte	102,15,56,222,241
2401.byte	102,15,56,222,249
2402	movl	120(%esp),%edi
2403	call	.L_aesni_decrypt6_enter
2404	movdqa	64(%esp),%xmm0
2405	pxor	(%esp),%xmm2
2406	movdqa	96(%esp),%xmm1
2407	pxor	16(%esp),%xmm3
2408	pxor	32(%esp),%xmm4
2409	pxor	48(%esp),%xmm5
2410	pxor	%xmm0,%xmm6
2411	pxor	%xmm2,%xmm1
2412	movdqu	%xmm2,(%edi,%esi,1)
2413	pxor	%xmm3,%xmm1
2414	movdqu	%xmm3,16(%edi,%esi,1)
2415	pxor	%xmm4,%xmm1
2416	movdqu	%xmm4,32(%edi,%esi,1)
2417	pxor	%xmm5,%xmm1
2418	movdqu	%xmm5,48(%edi,%esi,1)
2419	pxor	%xmm6,%xmm1
2420	movdqu	%xmm6,64(%edi,%esi,1)
2421	jmp	.L088done
2422.align	16
2423.L089one:
2424	movdqu	(%ebx),%xmm7
2425	movl	112(%esp),%edx
2426	movdqu	(%esi),%xmm2
2427	movl	240(%edx),%ecx
2428	pxor	%xmm0,%xmm7
2429	pxor	%xmm7,%xmm2
2430	movdqa	%xmm1,%xmm6
2431	movl	120(%esp),%edi
2432	movups	(%edx),%xmm0
2433	movups	16(%edx),%xmm1
2434	leal	32(%edx),%edx
2435	xorps	%xmm0,%xmm2
2436.L093dec1_loop_18:
2437.byte	102,15,56,222,209
2438	decl	%ecx
2439	movups	(%edx),%xmm1
2440	leal	16(%edx),%edx
2441	jnz	.L093dec1_loop_18
2442.byte	102,15,56,223,209
2443	xorps	%xmm7,%xmm2
2444	movaps	%xmm6,%xmm1
2445	movdqa	%xmm7,%xmm0
2446	xorps	%xmm2,%xmm1
2447	movups	%xmm2,(%edi,%esi,1)
2448	jmp	.L088done
2449.align	16
2450.L090two:
2451	leal	1(%ebp),%ecx
2452	movl	112(%esp),%edx
2453	bsfl	%ecx,%ecx
2454	shll	$4,%ecx
2455	movdqu	(%ebx),%xmm6
2456	movdqu	(%ebx,%ecx,1),%xmm7
2457	movdqu	(%esi),%xmm2
2458	movdqu	16(%esi),%xmm3
2459	movl	240(%edx),%ecx
2460	movdqa	%xmm1,%xmm5
2461	pxor	%xmm0,%xmm6
2462	pxor	%xmm6,%xmm7
2463	pxor	%xmm6,%xmm2
2464	pxor	%xmm7,%xmm3
2465	movl	120(%esp),%edi
2466	call	_aesni_decrypt2
2467	xorps	%xmm6,%xmm2
2468	xorps	%xmm7,%xmm3
2469	movdqa	%xmm7,%xmm0
2470	xorps	%xmm2,%xmm5
2471	movups	%xmm2,(%edi,%esi,1)
2472	xorps	%xmm3,%xmm5
2473	movups	%xmm3,16(%edi,%esi,1)
2474	movaps	%xmm5,%xmm1
2475	jmp	.L088done
2476.align	16
2477.L091three:
2478	leal	1(%ebp),%ecx
2479	movl	112(%esp),%edx
2480	bsfl	%ecx,%ecx
2481	shll	$4,%ecx
2482	movdqu	(%ebx),%xmm5
2483	movdqu	(%ebx,%ecx,1),%xmm6
2484	movdqa	%xmm5,%xmm7
2485	movdqu	(%esi),%xmm2
2486	movdqu	16(%esi),%xmm3
2487	movdqu	32(%esi),%xmm4
2488	movl	240(%edx),%ecx
2489	movdqa	%xmm1,96(%esp)
2490	pxor	%xmm0,%xmm5
2491	pxor	%xmm5,%xmm6
2492	pxor	%xmm6,%xmm7
2493	pxor	%xmm5,%xmm2
2494	pxor	%xmm6,%xmm3
2495	pxor	%xmm7,%xmm4
2496	movl	120(%esp),%edi
2497	call	_aesni_decrypt3
2498	movdqa	96(%esp),%xmm1
2499	xorps	%xmm5,%xmm2
2500	xorps	%xmm6,%xmm3
2501	xorps	%xmm7,%xmm4
2502	movups	%xmm2,(%edi,%esi,1)
2503	pxor	%xmm2,%xmm1
2504	movdqa	%xmm7,%xmm0
2505	movups	%xmm3,16(%edi,%esi,1)
2506	pxor	%xmm3,%xmm1
2507	movups	%xmm4,32(%edi,%esi,1)
2508	pxor	%xmm4,%xmm1
2509	jmp	.L088done
2510.align	16
2511.L092four:
2512	leal	1(%ebp),%ecx
2513	leal	3(%ebp),%eax
2514	bsfl	%ecx,%ecx
2515	bsfl	%eax,%eax
2516	movl	112(%esp),%edx
2517	shll	$4,%ecx
2518	shll	$4,%eax
2519	movdqu	(%ebx),%xmm4
2520	movdqu	(%ebx,%ecx,1),%xmm5
2521	movdqa	%xmm4,%xmm6
2522	movdqu	(%ebx,%eax,1),%xmm7
2523	pxor	%xmm0,%xmm4
2524	movdqu	(%esi),%xmm2
2525	pxor	%xmm4,%xmm5
2526	movdqu	16(%esi),%xmm3
2527	pxor	%xmm5,%xmm6
2528	movdqa	%xmm4,(%esp)
2529	pxor	%xmm6,%xmm7
2530	movdqa	%xmm5,16(%esp)
2531	movdqu	32(%esi),%xmm4
2532	movdqu	48(%esi),%xmm5
2533	movl	240(%edx),%ecx
2534	movdqa	%xmm1,96(%esp)
2535	pxor	(%esp),%xmm2
2536	pxor	16(%esp),%xmm3
2537	pxor	%xmm6,%xmm4
2538	pxor	%xmm7,%xmm5
2539	movl	120(%esp),%edi
2540	call	_aesni_decrypt4
2541	movdqa	96(%esp),%xmm1
2542	xorps	(%esp),%xmm2
2543	xorps	16(%esp),%xmm3
2544	xorps	%xmm6,%xmm4
2545	movups	%xmm2,(%edi,%esi,1)
2546	pxor	%xmm2,%xmm1
2547	xorps	%xmm7,%xmm5
2548	movups	%xmm3,16(%edi,%esi,1)
2549	pxor	%xmm3,%xmm1
2550	movdqa	%xmm7,%xmm0
2551	movups	%xmm4,32(%edi,%esi,1)
2552	pxor	%xmm4,%xmm1
2553	movups	%xmm5,48(%edi,%esi,1)
2554	pxor	%xmm5,%xmm1
2555.L088done:
2556	movl	128(%esp),%edx
2557	pxor	%xmm2,%xmm2
2558	pxor	%xmm3,%xmm3
2559	movdqa	%xmm2,(%esp)
2560	pxor	%xmm4,%xmm4
2561	movdqa	%xmm2,16(%esp)
2562	pxor	%xmm5,%xmm5
2563	movdqa	%xmm2,32(%esp)
2564	pxor	%xmm6,%xmm6
2565	movdqa	%xmm2,48(%esp)
2566	pxor	%xmm7,%xmm7
2567	movdqa	%xmm2,64(%esp)
2568	movdqa	%xmm2,80(%esp)
2569	movdqa	%xmm2,96(%esp)
2570	leal	(%edx),%esp
2571	movl	40(%esp),%ecx
2572	movl	48(%esp),%ebx
2573	movdqu	%xmm0,(%ecx)
2574	pxor	%xmm0,%xmm0
2575	movdqu	%xmm1,(%ebx)
2576	pxor	%xmm1,%xmm1
2577	popl	%edi
2578	popl	%esi
2579	popl	%ebx
2580	popl	%ebp
2581	ret
2582.size	aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
2583.globl	aesni_cbc_encrypt
2584.type	aesni_cbc_encrypt,@function
2585.align	16
2586aesni_cbc_encrypt:
2587.L_aesni_cbc_encrypt_begin:
2588	pushl	%ebp
2589	pushl	%ebx
2590	pushl	%esi
2591	pushl	%edi
2592	movl	20(%esp),%esi
2593	movl	%esp,%ebx
2594	movl	24(%esp),%edi
2595	subl	$24,%ebx
2596	movl	28(%esp),%eax
2597	andl	$-16,%ebx
2598	movl	32(%esp),%edx
2599	movl	36(%esp),%ebp
2600	testl	%eax,%eax
2601	jz	.L094cbc_abort
2602	cmpl	$0,40(%esp)
2603	xchgl	%esp,%ebx
2604	movups	(%ebp),%xmm7
2605	movl	240(%edx),%ecx
2606	movl	%edx,%ebp
2607	movl	%ebx,16(%esp)
2608	movl	%ecx,%ebx
2609	je	.L095cbc_decrypt
2610	movaps	%xmm7,%xmm2
2611	cmpl	$16,%eax
2612	jb	.L096cbc_enc_tail
2613	subl	$16,%eax
2614	jmp	.L097cbc_enc_loop
2615.align	16
2616.L097cbc_enc_loop:
2617	movups	(%esi),%xmm7
2618	leal	16(%esi),%esi
2619	movups	(%edx),%xmm0
2620	movups	16(%edx),%xmm1
2621	xorps	%xmm0,%xmm7
2622	leal	32(%edx),%edx
2623	xorps	%xmm7,%xmm2
2624.L098enc1_loop_19:
2625.byte	102,15,56,220,209
2626	decl	%ecx
2627	movups	(%edx),%xmm1
2628	leal	16(%edx),%edx
2629	jnz	.L098enc1_loop_19
2630.byte	102,15,56,221,209
2631	movl	%ebx,%ecx
2632	movl	%ebp,%edx
2633	movups	%xmm2,(%edi)
2634	leal	16(%edi),%edi
2635	subl	$16,%eax
2636	jnc	.L097cbc_enc_loop
2637	addl	$16,%eax
2638	jnz	.L096cbc_enc_tail
2639	movaps	%xmm2,%xmm7
2640	pxor	%xmm2,%xmm2
2641	jmp	.L099cbc_ret
2642.L096cbc_enc_tail:
2643	movl	%eax,%ecx
2644.long	2767451785
2645	movl	$16,%ecx
2646	subl	%eax,%ecx
2647	xorl	%eax,%eax
2648.long	2868115081
2649	leal	-16(%edi),%edi
2650	movl	%ebx,%ecx
2651	movl	%edi,%esi
2652	movl	%ebp,%edx
2653	jmp	.L097cbc_enc_loop
2654.align	16
2655.L095cbc_decrypt:
2656	cmpl	$80,%eax
2657	jbe	.L100cbc_dec_tail
2658	movaps	%xmm7,(%esp)
2659	subl	$80,%eax
2660	jmp	.L101cbc_dec_loop6_enter
2661.align	16
2662.L102cbc_dec_loop6:
2663	movaps	%xmm0,(%esp)
2664	movups	%xmm7,(%edi)
2665	leal	16(%edi),%edi
2666.L101cbc_dec_loop6_enter:
2667	movdqu	(%esi),%xmm2
2668	movdqu	16(%esi),%xmm3
2669	movdqu	32(%esi),%xmm4
2670	movdqu	48(%esi),%xmm5
2671	movdqu	64(%esi),%xmm6
2672	movdqu	80(%esi),%xmm7
2673	call	_aesni_decrypt6
2674	movups	(%esi),%xmm1
2675	movups	16(%esi),%xmm0
2676	xorps	(%esp),%xmm2
2677	xorps	%xmm1,%xmm3
2678	movups	32(%esi),%xmm1
2679	xorps	%xmm0,%xmm4
2680	movups	48(%esi),%xmm0
2681	xorps	%xmm1,%xmm5
2682	movups	64(%esi),%xmm1
2683	xorps	%xmm0,%xmm6
2684	movups	80(%esi),%xmm0
2685	xorps	%xmm1,%xmm7
2686	movups	%xmm2,(%edi)
2687	movups	%xmm3,16(%edi)
2688	leal	96(%esi),%esi
2689	movups	%xmm4,32(%edi)
2690	movl	%ebx,%ecx
2691	movups	%xmm5,48(%edi)
2692	movl	%ebp,%edx
2693	movups	%xmm6,64(%edi)
2694	leal	80(%edi),%edi
2695	subl	$96,%eax
2696	ja	.L102cbc_dec_loop6
2697	movaps	%xmm7,%xmm2
2698	movaps	%xmm0,%xmm7
2699	addl	$80,%eax
2700	jle	.L103cbc_dec_clear_tail_collected
2701	movups	%xmm2,(%edi)
2702	leal	16(%edi),%edi
2703.L100cbc_dec_tail:
2704	movups	(%esi),%xmm2
2705	movaps	%xmm2,%xmm6
2706	cmpl	$16,%eax
2707	jbe	.L104cbc_dec_one
2708	movups	16(%esi),%xmm3
2709	movaps	%xmm3,%xmm5
2710	cmpl	$32,%eax
2711	jbe	.L105cbc_dec_two
2712	movups	32(%esi),%xmm4
2713	cmpl	$48,%eax
2714	jbe	.L106cbc_dec_three
2715	movups	48(%esi),%xmm5
2716	cmpl	$64,%eax
2717	jbe	.L107cbc_dec_four
2718	movups	64(%esi),%xmm6
2719	movaps	%xmm7,(%esp)
2720	movups	(%esi),%xmm2
2721	xorps	%xmm7,%xmm7
2722	call	_aesni_decrypt6
2723	movups	(%esi),%xmm1
2724	movups	16(%esi),%xmm0
2725	xorps	(%esp),%xmm2
2726	xorps	%xmm1,%xmm3
2727	movups	32(%esi),%xmm1
2728	xorps	%xmm0,%xmm4
2729	movups	48(%esi),%xmm0
2730	xorps	%xmm1,%xmm5
2731	movups	64(%esi),%xmm7
2732	xorps	%xmm0,%xmm6
2733	movups	%xmm2,(%edi)
2734	movups	%xmm3,16(%edi)
2735	pxor	%xmm3,%xmm3
2736	movups	%xmm4,32(%edi)
2737	pxor	%xmm4,%xmm4
2738	movups	%xmm5,48(%edi)
2739	pxor	%xmm5,%xmm5
2740	leal	64(%edi),%edi
2741	movaps	%xmm6,%xmm2
2742	pxor	%xmm6,%xmm6
2743	subl	$80,%eax
2744	jmp	.L108cbc_dec_tail_collected
2745.align	16
2746.L104cbc_dec_one:
2747	movups	(%edx),%xmm0
2748	movups	16(%edx),%xmm1
2749	leal	32(%edx),%edx
2750	xorps	%xmm0,%xmm2
2751.L109dec1_loop_20:
2752.byte	102,15,56,222,209
2753	decl	%ecx
2754	movups	(%edx),%xmm1
2755	leal	16(%edx),%edx
2756	jnz	.L109dec1_loop_20
2757.byte	102,15,56,223,209
2758	xorps	%xmm7,%xmm2
2759	movaps	%xmm6,%xmm7
2760	subl	$16,%eax
2761	jmp	.L108cbc_dec_tail_collected
2762.align	16
2763.L105cbc_dec_two:
2764	call	_aesni_decrypt2
2765	xorps	%xmm7,%xmm2
2766	xorps	%xmm6,%xmm3
2767	movups	%xmm2,(%edi)
2768	movaps	%xmm3,%xmm2
2769	pxor	%xmm3,%xmm3
2770	leal	16(%edi),%edi
2771	movaps	%xmm5,%xmm7
2772	subl	$32,%eax
2773	jmp	.L108cbc_dec_tail_collected
2774.align	16
2775.L106cbc_dec_three:
2776	call	_aesni_decrypt3
2777	xorps	%xmm7,%xmm2
2778	xorps	%xmm6,%xmm3
2779	xorps	%xmm5,%xmm4
2780	movups	%xmm2,(%edi)
2781	movaps	%xmm4,%xmm2
2782	pxor	%xmm4,%xmm4
2783	movups	%xmm3,16(%edi)
2784	pxor	%xmm3,%xmm3
2785	leal	32(%edi),%edi
2786	movups	32(%esi),%xmm7
2787	subl	$48,%eax
2788	jmp	.L108cbc_dec_tail_collected
2789.align	16
2790.L107cbc_dec_four:
2791	call	_aesni_decrypt4
2792	movups	16(%esi),%xmm1
2793	movups	32(%esi),%xmm0
2794	xorps	%xmm7,%xmm2
2795	movups	48(%esi),%xmm7
2796	xorps	%xmm6,%xmm3
2797	movups	%xmm2,(%edi)
2798	xorps	%xmm1,%xmm4
2799	movups	%xmm3,16(%edi)
2800	pxor	%xmm3,%xmm3
2801	xorps	%xmm0,%xmm5
2802	movups	%xmm4,32(%edi)
2803	pxor	%xmm4,%xmm4
2804	leal	48(%edi),%edi
2805	movaps	%xmm5,%xmm2
2806	pxor	%xmm5,%xmm5
2807	subl	$64,%eax
2808	jmp	.L108cbc_dec_tail_collected
2809.align	16
2810.L103cbc_dec_clear_tail_collected:
2811	pxor	%xmm3,%xmm3
2812	pxor	%xmm4,%xmm4
2813	pxor	%xmm5,%xmm5
2814	pxor	%xmm6,%xmm6
2815.L108cbc_dec_tail_collected:
2816	andl	$15,%eax
2817	jnz	.L110cbc_dec_tail_partial
2818	movups	%xmm2,(%edi)
2819	pxor	%xmm0,%xmm0
2820	jmp	.L099cbc_ret
2821.align	16
2822.L110cbc_dec_tail_partial:
2823	movaps	%xmm2,(%esp)
2824	pxor	%xmm0,%xmm0
2825	movl	$16,%ecx
2826	movl	%esp,%esi
2827	subl	%eax,%ecx
2828.long	2767451785
2829	movdqa	%xmm2,(%esp)
2830.L099cbc_ret:
2831	movl	16(%esp),%esp
2832	movl	36(%esp),%ebp
2833	pxor	%xmm2,%xmm2
2834	pxor	%xmm1,%xmm1
2835	movups	%xmm7,(%ebp)
2836	pxor	%xmm7,%xmm7
2837.L094cbc_abort:
2838	popl	%edi
2839	popl	%esi
2840	popl	%ebx
2841	popl	%ebp
2842	ret
2843.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
2844.type	_aesni_set_encrypt_key,@function
2845.align	16
2846_aesni_set_encrypt_key:
2847	pushl	%ebp
2848	pushl	%ebx
2849	testl	%eax,%eax
2850	jz	.L111bad_pointer
2851	testl	%edx,%edx
2852	jz	.L111bad_pointer
2853	call	.L112pic
2854.L112pic:
2855	popl	%ebx
2856	leal	.Lkey_const-.L112pic(%ebx),%ebx
2857	leal	OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
2858	movups	(%eax),%xmm0
2859	xorps	%xmm4,%xmm4
2860	movl	4(%ebp),%ebp
2861	leal	16(%edx),%edx
2862	andl	$268437504,%ebp
2863	cmpl	$256,%ecx
2864	je	.L11314rounds
2865	cmpl	$192,%ecx
2866	je	.L11412rounds
2867	cmpl	$128,%ecx
2868	jne	.L115bad_keybits
2869.align	16
2870.L11610rounds:
2871	cmpl	$268435456,%ebp
2872	je	.L11710rounds_alt
2873	movl	$9,%ecx
2874	movups	%xmm0,-16(%edx)
2875.byte	102,15,58,223,200,1
2876	call	.L118key_128_cold
2877.byte	102,15,58,223,200,2
2878	call	.L119key_128
2879.byte	102,15,58,223,200,4
2880	call	.L119key_128
2881.byte	102,15,58,223,200,8
2882	call	.L119key_128
2883.byte	102,15,58,223,200,16
2884	call	.L119key_128
2885.byte	102,15,58,223,200,32
2886	call	.L119key_128
2887.byte	102,15,58,223,200,64
2888	call	.L119key_128
2889.byte	102,15,58,223,200,128
2890	call	.L119key_128
2891.byte	102,15,58,223,200,27
2892	call	.L119key_128
2893.byte	102,15,58,223,200,54
2894	call	.L119key_128
2895	movups	%xmm0,(%edx)
2896	movl	%ecx,80(%edx)
2897	jmp	.L120good_key
2898.align	16
2899.L119key_128:
2900	movups	%xmm0,(%edx)
2901	leal	16(%edx),%edx
2902.L118key_128_cold:
2903	shufps	$16,%xmm0,%xmm4
2904	xorps	%xmm4,%xmm0
2905	shufps	$140,%xmm0,%xmm4
2906	xorps	%xmm4,%xmm0
2907	shufps	$255,%xmm1,%xmm1
2908	xorps	%xmm1,%xmm0
2909	ret
2910.align	16
2911.L11710rounds_alt:
2912	movdqa	(%ebx),%xmm5
2913	movl	$8,%ecx
2914	movdqa	32(%ebx),%xmm4
2915	movdqa	%xmm0,%xmm2
2916	movdqu	%xmm0,-16(%edx)
2917.L121loop_key128:
2918.byte	102,15,56,0,197
2919.byte	102,15,56,221,196
2920	pslld	$1,%xmm4
2921	leal	16(%edx),%edx
2922	movdqa	%xmm2,%xmm3
2923	pslldq	$4,%xmm2
2924	pxor	%xmm2,%xmm3
2925	pslldq	$4,%xmm2
2926	pxor	%xmm2,%xmm3
2927	pslldq	$4,%xmm2
2928	pxor	%xmm3,%xmm2
2929	pxor	%xmm2,%xmm0
2930	movdqu	%xmm0,-16(%edx)
2931	movdqa	%xmm0,%xmm2
2932	decl	%ecx
2933	jnz	.L121loop_key128
2934	movdqa	48(%ebx),%xmm4
2935.byte	102,15,56,0,197
2936.byte	102,15,56,221,196
2937	pslld	$1,%xmm4
2938	movdqa	%xmm2,%xmm3
2939	pslldq	$4,%xmm2
2940	pxor	%xmm2,%xmm3
2941	pslldq	$4,%xmm2
2942	pxor	%xmm2,%xmm3
2943	pslldq	$4,%xmm2
2944	pxor	%xmm3,%xmm2
2945	pxor	%xmm2,%xmm0
2946	movdqu	%xmm0,(%edx)
2947	movdqa	%xmm0,%xmm2
2948.byte	102,15,56,0,197
2949.byte	102,15,56,221,196
2950	movdqa	%xmm2,%xmm3
2951	pslldq	$4,%xmm2
2952	pxor	%xmm2,%xmm3
2953	pslldq	$4,%xmm2
2954	pxor	%xmm2,%xmm3
2955	pslldq	$4,%xmm2
2956	pxor	%xmm3,%xmm2
2957	pxor	%xmm2,%xmm0
2958	movdqu	%xmm0,16(%edx)
2959	movl	$9,%ecx
2960	movl	%ecx,96(%edx)
2961	jmp	.L120good_key
2962.align	16
2963.L11412rounds:
2964	movq	16(%eax),%xmm2
2965	cmpl	$268435456,%ebp
2966	je	.L12212rounds_alt
2967	movl	$11,%ecx
2968	movups	%xmm0,-16(%edx)
2969.byte	102,15,58,223,202,1
2970	call	.L123key_192a_cold
2971.byte	102,15,58,223,202,2
2972	call	.L124key_192b
2973.byte	102,15,58,223,202,4
2974	call	.L125key_192a
2975.byte	102,15,58,223,202,8
2976	call	.L124key_192b
2977.byte	102,15,58,223,202,16
2978	call	.L125key_192a
2979.byte	102,15,58,223,202,32
2980	call	.L124key_192b
2981.byte	102,15,58,223,202,64
2982	call	.L125key_192a
2983.byte	102,15,58,223,202,128
2984	call	.L124key_192b
2985	movups	%xmm0,(%edx)
2986	movl	%ecx,48(%edx)
2987	jmp	.L120good_key
2988.align	16
2989.L125key_192a:
2990	movups	%xmm0,(%edx)
2991	leal	16(%edx),%edx
2992.align	16
2993.L123key_192a_cold:
2994	movaps	%xmm2,%xmm5
2995.L126key_192b_warm:
2996	shufps	$16,%xmm0,%xmm4
2997	movdqa	%xmm2,%xmm3
2998	xorps	%xmm4,%xmm0
2999	shufps	$140,%xmm0,%xmm4
3000	pslldq	$4,%xmm3
3001	xorps	%xmm4,%xmm0
3002	pshufd	$85,%xmm1,%xmm1
3003	pxor	%xmm3,%xmm2
3004	pxor	%xmm1,%xmm0
3005	pshufd	$255,%xmm0,%xmm3
3006	pxor	%xmm3,%xmm2
3007	ret
3008.align	16
3009.L124key_192b:
3010	movaps	%xmm0,%xmm3
3011	shufps	$68,%xmm0,%xmm5
3012	movups	%xmm5,(%edx)
3013	shufps	$78,%xmm2,%xmm3
3014	movups	%xmm3,16(%edx)
3015	leal	32(%edx),%edx
3016	jmp	.L126key_192b_warm
3017.align	16
3018.L12212rounds_alt:
3019	movdqa	16(%ebx),%xmm5
3020	movdqa	32(%ebx),%xmm4
3021	movl	$8,%ecx
3022	movdqu	%xmm0,-16(%edx)
3023.L127loop_key192:
3024	movq	%xmm2,(%edx)
3025	movdqa	%xmm2,%xmm1
3026.byte	102,15,56,0,213
3027.byte	102,15,56,221,212
3028	pslld	$1,%xmm4
3029	leal	24(%edx),%edx
3030	movdqa	%xmm0,%xmm3
3031	pslldq	$4,%xmm0
3032	pxor	%xmm0,%xmm3
3033	pslldq	$4,%xmm0
3034	pxor	%xmm0,%xmm3
3035	pslldq	$4,%xmm0
3036	pxor	%xmm3,%xmm0
3037	pshufd	$255,%xmm0,%xmm3
3038	pxor	%xmm1,%xmm3
3039	pslldq	$4,%xmm1
3040	pxor	%xmm1,%xmm3
3041	pxor	%xmm2,%xmm0
3042	pxor	%xmm3,%xmm2
3043	movdqu	%xmm0,-16(%edx)
3044	decl	%ecx
3045	jnz	.L127loop_key192
3046	movl	$11,%ecx
3047	movl	%ecx,32(%edx)
3048	jmp	.L120good_key
3049.align	16
3050.L11314rounds:
3051	movups	16(%eax),%xmm2
3052	leal	16(%edx),%edx
3053	cmpl	$268435456,%ebp
3054	je	.L12814rounds_alt
3055	movl	$13,%ecx
3056	movups	%xmm0,-32(%edx)
3057	movups	%xmm2,-16(%edx)
3058.byte	102,15,58,223,202,1
3059	call	.L129key_256a_cold
3060.byte	102,15,58,223,200,1
3061	call	.L130key_256b
3062.byte	102,15,58,223,202,2
3063	call	.L131key_256a
3064.byte	102,15,58,223,200,2
3065	call	.L130key_256b
3066.byte	102,15,58,223,202,4
3067	call	.L131key_256a
3068.byte	102,15,58,223,200,4
3069	call	.L130key_256b
3070.byte	102,15,58,223,202,8
3071	call	.L131key_256a
3072.byte	102,15,58,223,200,8
3073	call	.L130key_256b
3074.byte	102,15,58,223,202,16
3075	call	.L131key_256a
3076.byte	102,15,58,223,200,16
3077	call	.L130key_256b
3078.byte	102,15,58,223,202,32
3079	call	.L131key_256a
3080.byte	102,15,58,223,200,32
3081	call	.L130key_256b
3082.byte	102,15,58,223,202,64
3083	call	.L131key_256a
3084	movups	%xmm0,(%edx)
3085	movl	%ecx,16(%edx)
3086	xorl	%eax,%eax
3087	jmp	.L120good_key
3088.align	16
3089.L131key_256a:
3090	movups	%xmm2,(%edx)
3091	leal	16(%edx),%edx
3092.L129key_256a_cold:
3093	shufps	$16,%xmm0,%xmm4
3094	xorps	%xmm4,%xmm0
3095	shufps	$140,%xmm0,%xmm4
3096	xorps	%xmm4,%xmm0
3097	shufps	$255,%xmm1,%xmm1
3098	xorps	%xmm1,%xmm0
3099	ret
3100.align	16
3101.L130key_256b:
3102	movups	%xmm0,(%edx)
3103	leal	16(%edx),%edx
3104	shufps	$16,%xmm2,%xmm4
3105	xorps	%xmm4,%xmm2
3106	shufps	$140,%xmm2,%xmm4
3107	xorps	%xmm4,%xmm2
3108	shufps	$170,%xmm1,%xmm1
3109	xorps	%xmm1,%xmm2
3110	ret
3111.align	16
3112.L12814rounds_alt:
3113	movdqa	(%ebx),%xmm5
3114	movdqa	32(%ebx),%xmm4
3115	movl	$7,%ecx
3116	movdqu	%xmm0,-32(%edx)
3117	movdqa	%xmm2,%xmm1
3118	movdqu	%xmm2,-16(%edx)
3119.L132loop_key256:
3120.byte	102,15,56,0,213
3121.byte	102,15,56,221,212
3122	movdqa	%xmm0,%xmm3
3123	pslldq	$4,%xmm0
3124	pxor	%xmm0,%xmm3
3125	pslldq	$4,%xmm0
3126	pxor	%xmm0,%xmm3
3127	pslldq	$4,%xmm0
3128	pxor	%xmm3,%xmm0
3129	pslld	$1,%xmm4
3130	pxor	%xmm2,%xmm0
3131	movdqu	%xmm0,(%edx)
3132	decl	%ecx
3133	jz	.L133done_key256
3134	pshufd	$255,%xmm0,%xmm2
3135	pxor	%xmm3,%xmm3
3136.byte	102,15,56,221,211
3137	movdqa	%xmm1,%xmm3
3138	pslldq	$4,%xmm1
3139	pxor	%xmm1,%xmm3
3140	pslldq	$4,%xmm1
3141	pxor	%xmm1,%xmm3
3142	pslldq	$4,%xmm1
3143	pxor	%xmm3,%xmm1
3144	pxor	%xmm1,%xmm2
3145	movdqu	%xmm2,16(%edx)
3146	leal	32(%edx),%edx
3147	movdqa	%xmm2,%xmm1
3148	jmp	.L132loop_key256
3149.L133done_key256:
3150	movl	$13,%ecx
3151	movl	%ecx,16(%edx)
3152.L120good_key:
3153	pxor	%xmm0,%xmm0
3154	pxor	%xmm1,%xmm1
3155	pxor	%xmm2,%xmm2
3156	pxor	%xmm3,%xmm3
3157	pxor	%xmm4,%xmm4
3158	pxor	%xmm5,%xmm5
3159	xorl	%eax,%eax
3160	popl	%ebx
3161	popl	%ebp
3162	ret
3163.align	4
3164.L111bad_pointer:
3165	movl	$-1,%eax
3166	popl	%ebx
3167	popl	%ebp
3168	ret
3169.align	4
3170.L115bad_keybits:
3171	pxor	%xmm0,%xmm0
3172	movl	$-2,%eax
3173	popl	%ebx
3174	popl	%ebp
3175	ret
3176.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
3177.globl	aesni_set_encrypt_key
3178.type	aesni_set_encrypt_key,@function
3179.align	16
3180aesni_set_encrypt_key:
3181.L_aesni_set_encrypt_key_begin:
3182	movl	4(%esp),%eax
3183	movl	8(%esp),%ecx
3184	movl	12(%esp),%edx
3185	call	_aesni_set_encrypt_key
3186	ret
3187.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
3188.globl	aesni_set_decrypt_key
3189.type	aesni_set_decrypt_key,@function
3190.align	16
3191aesni_set_decrypt_key:
3192.L_aesni_set_decrypt_key_begin:
3193	movl	4(%esp),%eax
3194	movl	8(%esp),%ecx
3195	movl	12(%esp),%edx
3196	call	_aesni_set_encrypt_key
3197	movl	12(%esp),%edx
3198	shll	$4,%ecx
3199	testl	%eax,%eax
3200	jnz	.L134dec_key_ret
3201	leal	16(%edx,%ecx,1),%eax
3202	movups	(%edx),%xmm0
3203	movups	(%eax),%xmm1
3204	movups	%xmm0,(%eax)
3205	movups	%xmm1,(%edx)
3206	leal	16(%edx),%edx
3207	leal	-16(%eax),%eax
3208.L135dec_key_inverse:
3209	movups	(%edx),%xmm0
3210	movups	(%eax),%xmm1
3211.byte	102,15,56,219,192
3212.byte	102,15,56,219,201
3213	leal	16(%edx),%edx
3214	leal	-16(%eax),%eax
3215	movups	%xmm0,16(%eax)
3216	movups	%xmm1,-16(%edx)
3217	cmpl	%edx,%eax
3218	ja	.L135dec_key_inverse
3219	movups	(%edx),%xmm0
3220.byte	102,15,56,219,192
3221	movups	%xmm0,(%edx)
3222	pxor	%xmm0,%xmm0
3223	pxor	%xmm1,%xmm1
3224	xorl	%eax,%eax
3225.L134dec_key_ret:
3226	ret
3227.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
3228.align	64
3229.Lkey_const:
3230.long	202313229,202313229,202313229,202313229
3231.long	67569157,67569157,67569157,67569157
3232.long	1,1,1,1
3233.long	27,27,27,27
3234.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
3235.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
3236.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
3237.byte	115,108,46,111,114,103,62,0
3238.comm	OPENSSL_ia32cap_P,16,4
3239