1299481Sjkim# $FreeBSD: releng/11.0/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S 299481 2016-05-11 20:11:21Z jkim $
2299481Sjkim# Do not modify. This file is auto-generated from aesni-mb-x86_64.pl.
3290207Sjkim.text
4290207Sjkim
5290207Sjkim
6290207Sjkim
7290207Sjkim.globl	aesni_multi_cbc_encrypt
8290207Sjkim.type	aesni_multi_cbc_encrypt,@function
9290207Sjkim.align	32
10290207Sjkimaesni_multi_cbc_encrypt:
11299481Sjkim	cmpl	$2,%edx
12299481Sjkim	jb	.Lenc_non_avx
13299481Sjkim	movl	OPENSSL_ia32cap_P+4(%rip),%ecx
14299481Sjkim	testl	$268435456,%ecx
15299481Sjkim	jnz	_avx_cbc_enc_shortcut
16299481Sjkim	jmp	.Lenc_non_avx
17299481Sjkim.align	16
18299481Sjkim.Lenc_non_avx:
19290207Sjkim	movq	%rsp,%rax
20290207Sjkim	pushq	%rbx
21290207Sjkim	pushq	%rbp
22290207Sjkim	pushq	%r12
23290207Sjkim	pushq	%r13
24290207Sjkim	pushq	%r14
25290207Sjkim	pushq	%r15
26290207Sjkim
27290207Sjkim
28290207Sjkim
29290207Sjkim
30290207Sjkim
31290207Sjkim
32290207Sjkim	subq	$48,%rsp
33290207Sjkim	andq	$-64,%rsp
34290207Sjkim	movq	%rax,16(%rsp)
35290207Sjkim
36290207Sjkim.Lenc4x_body:
37290207Sjkim	movdqu	(%rsi),%xmm12
38290207Sjkim	leaq	120(%rsi),%rsi
39290207Sjkim	leaq	80(%rdi),%rdi
40290207Sjkim
41290207Sjkim.Lenc4x_loop_grande:
42290207Sjkim	movl	%edx,24(%rsp)
43290207Sjkim	xorl	%edx,%edx
44290207Sjkim	movl	-64(%rdi),%ecx
45290207Sjkim	movq	-80(%rdi),%r8
46290207Sjkim	cmpl	%edx,%ecx
47290207Sjkim	movq	-72(%rdi),%r12
48290207Sjkim	cmovgl	%ecx,%edx
49290207Sjkim	testl	%ecx,%ecx
50290207Sjkim	movdqu	-56(%rdi),%xmm2
51290207Sjkim	movl	%ecx,32(%rsp)
52290207Sjkim	cmovleq	%rsp,%r8
53290207Sjkim	movl	-24(%rdi),%ecx
54290207Sjkim	movq	-40(%rdi),%r9
55290207Sjkim	cmpl	%edx,%ecx
56290207Sjkim	movq	-32(%rdi),%r13
57290207Sjkim	cmovgl	%ecx,%edx
58290207Sjkim	testl	%ecx,%ecx
59290207Sjkim	movdqu	-16(%rdi),%xmm3
60290207Sjkim	movl	%ecx,36(%rsp)
61290207Sjkim	cmovleq	%rsp,%r9
62290207Sjkim	movl	16(%rdi),%ecx
63290207Sjkim	movq	0(%rdi),%r10
64290207Sjkim	cmpl	%edx,%ecx
65290207Sjkim	movq	8(%rdi),%r14
66290207Sjkim	cmovgl	%ecx,%edx
67290207Sjkim	testl	%ecx,%ecx
68290207Sjkim	movdqu	24(%rdi),%xmm4
69290207Sjkim	movl	%ecx,40(%rsp)
70290207Sjkim	cmovleq	%rsp,%r10
71290207Sjkim	movl	56(%rdi),%ecx
72290207Sjkim	movq	40(%rdi),%r11
73290207Sjkim	cmpl	%edx,%ecx
74290207Sjkim	movq	48(%rdi),%r15
75290207Sjkim	cmovgl	%ecx,%edx
76290207Sjkim	testl	%ecx,%ecx
77290207Sjkim	movdqu	64(%rdi),%xmm5
78290207Sjkim	movl	%ecx,44(%rsp)
79290207Sjkim	cmovleq	%rsp,%r11
80290207Sjkim	testl	%edx,%edx
81290207Sjkim	jz	.Lenc4x_done
82290207Sjkim
83290207Sjkim	movups	16-120(%rsi),%xmm1
84290207Sjkim	pxor	%xmm12,%xmm2
85290207Sjkim	movups	32-120(%rsi),%xmm0
86290207Sjkim	pxor	%xmm12,%xmm3
87290207Sjkim	movl	240-120(%rsi),%eax
88290207Sjkim	pxor	%xmm12,%xmm4
89290207Sjkim	movdqu	(%r8),%xmm6
90290207Sjkim	pxor	%xmm12,%xmm5
91290207Sjkim	movdqu	(%r9),%xmm7
92290207Sjkim	pxor	%xmm6,%xmm2
93290207Sjkim	movdqu	(%r10),%xmm8
94290207Sjkim	pxor	%xmm7,%xmm3
95290207Sjkim	movdqu	(%r11),%xmm9
96290207Sjkim	pxor	%xmm8,%xmm4
97290207Sjkim	pxor	%xmm9,%xmm5
98290207Sjkim	movdqa	32(%rsp),%xmm10
99290207Sjkim	xorq	%rbx,%rbx
100290207Sjkim	jmp	.Loop_enc4x
101290207Sjkim
102290207Sjkim.align	32
103290207Sjkim.Loop_enc4x:
104290207Sjkim	addq	$16,%rbx
105290207Sjkim	leaq	16(%rsp),%rbp
106290207Sjkim	movl	$1,%ecx
107290207Sjkim	subq	%rbx,%rbp
108290207Sjkim
109290207Sjkim.byte	102,15,56,220,209
110290207Sjkim	prefetcht0	31(%r8,%rbx,1)
111290207Sjkim	prefetcht0	31(%r9,%rbx,1)
112290207Sjkim.byte	102,15,56,220,217
113290207Sjkim	prefetcht0	31(%r10,%rbx,1)
114290207Sjkim	prefetcht0	31(%r10,%rbx,1)
115290207Sjkim.byte	102,15,56,220,225
116290207Sjkim.byte	102,15,56,220,233
117290207Sjkim	movups	48-120(%rsi),%xmm1
118290207Sjkim	cmpl	32(%rsp),%ecx
119290207Sjkim.byte	102,15,56,220,208
120290207Sjkim.byte	102,15,56,220,216
121290207Sjkim.byte	102,15,56,220,224
122290207Sjkim	cmovgeq	%rbp,%r8
123290207Sjkim	cmovgq	%rbp,%r12
124290207Sjkim.byte	102,15,56,220,232
125290207Sjkim	movups	-56(%rsi),%xmm0
126290207Sjkim	cmpl	36(%rsp),%ecx
127290207Sjkim.byte	102,15,56,220,209
128290207Sjkim.byte	102,15,56,220,217
129290207Sjkim.byte	102,15,56,220,225
130290207Sjkim	cmovgeq	%rbp,%r9
131290207Sjkim	cmovgq	%rbp,%r13
132290207Sjkim.byte	102,15,56,220,233
133290207Sjkim	movups	-40(%rsi),%xmm1
134290207Sjkim	cmpl	40(%rsp),%ecx
135290207Sjkim.byte	102,15,56,220,208
136290207Sjkim.byte	102,15,56,220,216
137290207Sjkim.byte	102,15,56,220,224
138290207Sjkim	cmovgeq	%rbp,%r10
139290207Sjkim	cmovgq	%rbp,%r14
140290207Sjkim.byte	102,15,56,220,232
141290207Sjkim	movups	-24(%rsi),%xmm0
142290207Sjkim	cmpl	44(%rsp),%ecx
143290207Sjkim.byte	102,15,56,220,209
144290207Sjkim.byte	102,15,56,220,217
145290207Sjkim.byte	102,15,56,220,225
146290207Sjkim	cmovgeq	%rbp,%r11
147290207Sjkim	cmovgq	%rbp,%r15
148290207Sjkim.byte	102,15,56,220,233
149290207Sjkim	movups	-8(%rsi),%xmm1
150290207Sjkim	movdqa	%xmm10,%xmm11
151290207Sjkim.byte	102,15,56,220,208
152290207Sjkim	prefetcht0	15(%r12,%rbx,1)
153290207Sjkim	prefetcht0	15(%r13,%rbx,1)
154290207Sjkim.byte	102,15,56,220,216
155290207Sjkim	prefetcht0	15(%r14,%rbx,1)
156290207Sjkim	prefetcht0	15(%r15,%rbx,1)
157290207Sjkim.byte	102,15,56,220,224
158290207Sjkim.byte	102,15,56,220,232
159290207Sjkim	movups	128-120(%rsi),%xmm0
160290207Sjkim	pxor	%xmm12,%xmm12
161290207Sjkim
162290207Sjkim.byte	102,15,56,220,209
163290207Sjkim	pcmpgtd	%xmm12,%xmm11
164290207Sjkim	movdqu	-120(%rsi),%xmm12
165290207Sjkim.byte	102,15,56,220,217
166290207Sjkim	paddd	%xmm11,%xmm10
167290207Sjkim	movdqa	%xmm10,32(%rsp)
168290207Sjkim.byte	102,15,56,220,225
169290207Sjkim.byte	102,15,56,220,233
170290207Sjkim	movups	144-120(%rsi),%xmm1
171290207Sjkim
172290207Sjkim	cmpl	$11,%eax
173290207Sjkim
174290207Sjkim.byte	102,15,56,220,208
175290207Sjkim.byte	102,15,56,220,216
176290207Sjkim.byte	102,15,56,220,224
177290207Sjkim.byte	102,15,56,220,232
178290207Sjkim	movups	160-120(%rsi),%xmm0
179290207Sjkim
180290207Sjkim	jb	.Lenc4x_tail
181290207Sjkim
182290207Sjkim.byte	102,15,56,220,209
183290207Sjkim.byte	102,15,56,220,217
184290207Sjkim.byte	102,15,56,220,225
185290207Sjkim.byte	102,15,56,220,233
186290207Sjkim	movups	176-120(%rsi),%xmm1
187290207Sjkim
188290207Sjkim.byte	102,15,56,220,208
189290207Sjkim.byte	102,15,56,220,216
190290207Sjkim.byte	102,15,56,220,224
191290207Sjkim.byte	102,15,56,220,232
192290207Sjkim	movups	192-120(%rsi),%xmm0
193290207Sjkim
194290207Sjkim	je	.Lenc4x_tail
195290207Sjkim
196290207Sjkim.byte	102,15,56,220,209
197290207Sjkim.byte	102,15,56,220,217
198290207Sjkim.byte	102,15,56,220,225
199290207Sjkim.byte	102,15,56,220,233
200290207Sjkim	movups	208-120(%rsi),%xmm1
201290207Sjkim
202290207Sjkim.byte	102,15,56,220,208
203290207Sjkim.byte	102,15,56,220,216
204290207Sjkim.byte	102,15,56,220,224
205290207Sjkim.byte	102,15,56,220,232
206290207Sjkim	movups	224-120(%rsi),%xmm0
207290207Sjkim	jmp	.Lenc4x_tail
208290207Sjkim
209290207Sjkim.align	32
210290207Sjkim.Lenc4x_tail:
211290207Sjkim.byte	102,15,56,220,209
212290207Sjkim.byte	102,15,56,220,217
213290207Sjkim.byte	102,15,56,220,225
214290207Sjkim.byte	102,15,56,220,233
215290207Sjkim	movdqu	(%r8,%rbx,1),%xmm6
216290207Sjkim	movdqu	16-120(%rsi),%xmm1
217290207Sjkim
218290207Sjkim.byte	102,15,56,221,208
219290207Sjkim	movdqu	(%r9,%rbx,1),%xmm7
220290207Sjkim	pxor	%xmm12,%xmm6
221290207Sjkim.byte	102,15,56,221,216
222290207Sjkim	movdqu	(%r10,%rbx,1),%xmm8
223290207Sjkim	pxor	%xmm12,%xmm7
224290207Sjkim.byte	102,15,56,221,224
225290207Sjkim	movdqu	(%r11,%rbx,1),%xmm9
226290207Sjkim	pxor	%xmm12,%xmm8
227290207Sjkim.byte	102,15,56,221,232
228290207Sjkim	movdqu	32-120(%rsi),%xmm0
229290207Sjkim	pxor	%xmm12,%xmm9
230290207Sjkim
231290207Sjkim	movups	%xmm2,-16(%r12,%rbx,1)
232290207Sjkim	pxor	%xmm6,%xmm2
233290207Sjkim	movups	%xmm3,-16(%r13,%rbx,1)
234290207Sjkim	pxor	%xmm7,%xmm3
235290207Sjkim	movups	%xmm4,-16(%r14,%rbx,1)
236290207Sjkim	pxor	%xmm8,%xmm4
237290207Sjkim	movups	%xmm5,-16(%r15,%rbx,1)
238290207Sjkim	pxor	%xmm9,%xmm5
239290207Sjkim
240290207Sjkim	decl	%edx
241290207Sjkim	jnz	.Loop_enc4x
242290207Sjkim
243290207Sjkim	movq	16(%rsp),%rax
244290207Sjkim	movl	24(%rsp),%edx
245290207Sjkim
246290207Sjkim
247290207Sjkim
248290207Sjkim
249290207Sjkim
250290207Sjkim
251290207Sjkim
252290207Sjkim
253290207Sjkim
254290207Sjkim
255290207Sjkim	leaq	160(%rdi),%rdi
256290207Sjkim	decl	%edx
257290207Sjkim	jnz	.Lenc4x_loop_grande
258290207Sjkim
259290207Sjkim.Lenc4x_done:
260290207Sjkim	movq	-48(%rax),%r15
261290207Sjkim	movq	-40(%rax),%r14
262290207Sjkim	movq	-32(%rax),%r13
263290207Sjkim	movq	-24(%rax),%r12
264290207Sjkim	movq	-16(%rax),%rbp
265290207Sjkim	movq	-8(%rax),%rbx
266290207Sjkim	leaq	(%rax),%rsp
267290207Sjkim.Lenc4x_epilogue:
268290207Sjkim	.byte	0xf3,0xc3
269290207Sjkim.size	aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
270290207Sjkim
271290207Sjkim.globl	aesni_multi_cbc_decrypt
272290207Sjkim.type	aesni_multi_cbc_decrypt,@function
273290207Sjkim.align	32
274290207Sjkimaesni_multi_cbc_decrypt:
275299481Sjkim	cmpl	$2,%edx
276299481Sjkim	jb	.Ldec_non_avx
277299481Sjkim	movl	OPENSSL_ia32cap_P+4(%rip),%ecx
278299481Sjkim	testl	$268435456,%ecx
279299481Sjkim	jnz	_avx_cbc_dec_shortcut
280299481Sjkim	jmp	.Ldec_non_avx
281299481Sjkim.align	16
282299481Sjkim.Ldec_non_avx:
283290207Sjkim	movq	%rsp,%rax
284290207Sjkim	pushq	%rbx
285290207Sjkim	pushq	%rbp
286290207Sjkim	pushq	%r12
287290207Sjkim	pushq	%r13
288290207Sjkim	pushq	%r14
289290207Sjkim	pushq	%r15
290290207Sjkim
291290207Sjkim
292290207Sjkim
293290207Sjkim
294290207Sjkim
295290207Sjkim
296290207Sjkim	subq	$48,%rsp
297290207Sjkim	andq	$-64,%rsp
298290207Sjkim	movq	%rax,16(%rsp)
299290207Sjkim
300290207Sjkim.Ldec4x_body:
301290207Sjkim	movdqu	(%rsi),%xmm12
302290207Sjkim	leaq	120(%rsi),%rsi
303290207Sjkim	leaq	80(%rdi),%rdi
304290207Sjkim
305290207Sjkim.Ldec4x_loop_grande:
306290207Sjkim	movl	%edx,24(%rsp)
307290207Sjkim	xorl	%edx,%edx
308290207Sjkim	movl	-64(%rdi),%ecx
309290207Sjkim	movq	-80(%rdi),%r8
310290207Sjkim	cmpl	%edx,%ecx
311290207Sjkim	movq	-72(%rdi),%r12
312290207Sjkim	cmovgl	%ecx,%edx
313290207Sjkim	testl	%ecx,%ecx
314290207Sjkim	movdqu	-56(%rdi),%xmm6
315290207Sjkim	movl	%ecx,32(%rsp)
316290207Sjkim	cmovleq	%rsp,%r8
317290207Sjkim	movl	-24(%rdi),%ecx
318290207Sjkim	movq	-40(%rdi),%r9
319290207Sjkim	cmpl	%edx,%ecx
320290207Sjkim	movq	-32(%rdi),%r13
321290207Sjkim	cmovgl	%ecx,%edx
322290207Sjkim	testl	%ecx,%ecx
323290207Sjkim	movdqu	-16(%rdi),%xmm7
324290207Sjkim	movl	%ecx,36(%rsp)
325290207Sjkim	cmovleq	%rsp,%r9
326290207Sjkim	movl	16(%rdi),%ecx
327290207Sjkim	movq	0(%rdi),%r10
328290207Sjkim	cmpl	%edx,%ecx
329290207Sjkim	movq	8(%rdi),%r14
330290207Sjkim	cmovgl	%ecx,%edx
331290207Sjkim	testl	%ecx,%ecx
332290207Sjkim	movdqu	24(%rdi),%xmm8
333290207Sjkim	movl	%ecx,40(%rsp)
334290207Sjkim	cmovleq	%rsp,%r10
335290207Sjkim	movl	56(%rdi),%ecx
336290207Sjkim	movq	40(%rdi),%r11
337290207Sjkim	cmpl	%edx,%ecx
338290207Sjkim	movq	48(%rdi),%r15
339290207Sjkim	cmovgl	%ecx,%edx
340290207Sjkim	testl	%ecx,%ecx
341290207Sjkim	movdqu	64(%rdi),%xmm9
342290207Sjkim	movl	%ecx,44(%rsp)
343290207Sjkim	cmovleq	%rsp,%r11
344290207Sjkim	testl	%edx,%edx
345290207Sjkim	jz	.Ldec4x_done
346290207Sjkim
347290207Sjkim	movups	16-120(%rsi),%xmm1
348290207Sjkim	movups	32-120(%rsi),%xmm0
349290207Sjkim	movl	240-120(%rsi),%eax
350290207Sjkim	movdqu	(%r8),%xmm2
351290207Sjkim	movdqu	(%r9),%xmm3
352290207Sjkim	pxor	%xmm12,%xmm2
353290207Sjkim	movdqu	(%r10),%xmm4
354290207Sjkim	pxor	%xmm12,%xmm3
355290207Sjkim	movdqu	(%r11),%xmm5
356290207Sjkim	pxor	%xmm12,%xmm4
357290207Sjkim	pxor	%xmm12,%xmm5
358290207Sjkim	movdqa	32(%rsp),%xmm10
359290207Sjkim	xorq	%rbx,%rbx
360290207Sjkim	jmp	.Loop_dec4x
361290207Sjkim
362290207Sjkim.align	32
363290207Sjkim.Loop_dec4x:
364290207Sjkim	addq	$16,%rbx
365290207Sjkim	leaq	16(%rsp),%rbp
366290207Sjkim	movl	$1,%ecx
367290207Sjkim	subq	%rbx,%rbp
368290207Sjkim
369290207Sjkim.byte	102,15,56,222,209
370290207Sjkim	prefetcht0	31(%r8,%rbx,1)
371290207Sjkim	prefetcht0	31(%r9,%rbx,1)
372290207Sjkim.byte	102,15,56,222,217
373290207Sjkim	prefetcht0	31(%r10,%rbx,1)
374290207Sjkim	prefetcht0	31(%r11,%rbx,1)
375290207Sjkim.byte	102,15,56,222,225
376290207Sjkim.byte	102,15,56,222,233
377290207Sjkim	movups	48-120(%rsi),%xmm1
378290207Sjkim	cmpl	32(%rsp),%ecx
379290207Sjkim.byte	102,15,56,222,208
380290207Sjkim.byte	102,15,56,222,216
381290207Sjkim.byte	102,15,56,222,224
382290207Sjkim	cmovgeq	%rbp,%r8
383290207Sjkim	cmovgq	%rbp,%r12
384290207Sjkim.byte	102,15,56,222,232
385290207Sjkim	movups	-56(%rsi),%xmm0
386290207Sjkim	cmpl	36(%rsp),%ecx
387290207Sjkim.byte	102,15,56,222,209
388290207Sjkim.byte	102,15,56,222,217
389290207Sjkim.byte	102,15,56,222,225
390290207Sjkim	cmovgeq	%rbp,%r9
391290207Sjkim	cmovgq	%rbp,%r13
392290207Sjkim.byte	102,15,56,222,233
393290207Sjkim	movups	-40(%rsi),%xmm1
394290207Sjkim	cmpl	40(%rsp),%ecx
395290207Sjkim.byte	102,15,56,222,208
396290207Sjkim.byte	102,15,56,222,216
397290207Sjkim.byte	102,15,56,222,224
398290207Sjkim	cmovgeq	%rbp,%r10
399290207Sjkim	cmovgq	%rbp,%r14
400290207Sjkim.byte	102,15,56,222,232
401290207Sjkim	movups	-24(%rsi),%xmm0
402290207Sjkim	cmpl	44(%rsp),%ecx
403290207Sjkim.byte	102,15,56,222,209
404290207Sjkim.byte	102,15,56,222,217
405290207Sjkim.byte	102,15,56,222,225
406290207Sjkim	cmovgeq	%rbp,%r11
407290207Sjkim	cmovgq	%rbp,%r15
408290207Sjkim.byte	102,15,56,222,233
409290207Sjkim	movups	-8(%rsi),%xmm1
410290207Sjkim	movdqa	%xmm10,%xmm11
411290207Sjkim.byte	102,15,56,222,208
412290207Sjkim	prefetcht0	15(%r12,%rbx,1)
413290207Sjkim	prefetcht0	15(%r13,%rbx,1)
414290207Sjkim.byte	102,15,56,222,216
415290207Sjkim	prefetcht0	15(%r14,%rbx,1)
416290207Sjkim	prefetcht0	15(%r15,%rbx,1)
417290207Sjkim.byte	102,15,56,222,224
418290207Sjkim.byte	102,15,56,222,232
419290207Sjkim	movups	128-120(%rsi),%xmm0
420290207Sjkim	pxor	%xmm12,%xmm12
421290207Sjkim
422290207Sjkim.byte	102,15,56,222,209
423290207Sjkim	pcmpgtd	%xmm12,%xmm11
424290207Sjkim	movdqu	-120(%rsi),%xmm12
425290207Sjkim.byte	102,15,56,222,217
426290207Sjkim	paddd	%xmm11,%xmm10
427290207Sjkim	movdqa	%xmm10,32(%rsp)
428290207Sjkim.byte	102,15,56,222,225
429290207Sjkim.byte	102,15,56,222,233
430290207Sjkim	movups	144-120(%rsi),%xmm1
431290207Sjkim
432290207Sjkim	cmpl	$11,%eax
433290207Sjkim
434290207Sjkim.byte	102,15,56,222,208
435290207Sjkim.byte	102,15,56,222,216
436290207Sjkim.byte	102,15,56,222,224
437290207Sjkim.byte	102,15,56,222,232
438290207Sjkim	movups	160-120(%rsi),%xmm0
439290207Sjkim
440290207Sjkim	jb	.Ldec4x_tail
441290207Sjkim
442290207Sjkim.byte	102,15,56,222,209
443290207Sjkim.byte	102,15,56,222,217
444290207Sjkim.byte	102,15,56,222,225
445290207Sjkim.byte	102,15,56,222,233
446290207Sjkim	movups	176-120(%rsi),%xmm1
447290207Sjkim
448290207Sjkim.byte	102,15,56,222,208
449290207Sjkim.byte	102,15,56,222,216
450290207Sjkim.byte	102,15,56,222,224
451290207Sjkim.byte	102,15,56,222,232
452290207Sjkim	movups	192-120(%rsi),%xmm0
453290207Sjkim
454290207Sjkim	je	.Ldec4x_tail
455290207Sjkim
456290207Sjkim.byte	102,15,56,222,209
457290207Sjkim.byte	102,15,56,222,217
458290207Sjkim.byte	102,15,56,222,225
459290207Sjkim.byte	102,15,56,222,233
460290207Sjkim	movups	208-120(%rsi),%xmm1
461290207Sjkim
462290207Sjkim.byte	102,15,56,222,208
463290207Sjkim.byte	102,15,56,222,216
464290207Sjkim.byte	102,15,56,222,224
465290207Sjkim.byte	102,15,56,222,232
466290207Sjkim	movups	224-120(%rsi),%xmm0
467290207Sjkim	jmp	.Ldec4x_tail
468290207Sjkim
469290207Sjkim.align	32
470290207Sjkim.Ldec4x_tail:
471290207Sjkim.byte	102,15,56,222,209
472290207Sjkim.byte	102,15,56,222,217
473290207Sjkim.byte	102,15,56,222,225
474290207Sjkim	pxor	%xmm0,%xmm6
475290207Sjkim	pxor	%xmm0,%xmm7
476290207Sjkim.byte	102,15,56,222,233
477290207Sjkim	movdqu	16-120(%rsi),%xmm1
478290207Sjkim	pxor	%xmm0,%xmm8
479290207Sjkim	pxor	%xmm0,%xmm9
480290207Sjkim	movdqu	32-120(%rsi),%xmm0
481290207Sjkim
482290207Sjkim.byte	102,15,56,223,214
483290207Sjkim.byte	102,15,56,223,223
484290207Sjkim	movdqu	-16(%r8,%rbx,1),%xmm6
485290207Sjkim	movdqu	-16(%r9,%rbx,1),%xmm7
486290207Sjkim.byte	102,65,15,56,223,224
487290207Sjkim.byte	102,65,15,56,223,233
488290207Sjkim	movdqu	-16(%r10,%rbx,1),%xmm8
489290207Sjkim	movdqu	-16(%r11,%rbx,1),%xmm9
490290207Sjkim
491290207Sjkim	movups	%xmm2,-16(%r12,%rbx,1)
492290207Sjkim	movdqu	(%r8,%rbx,1),%xmm2
493290207Sjkim	movups	%xmm3,-16(%r13,%rbx,1)
494290207Sjkim	movdqu	(%r9,%rbx,1),%xmm3
495290207Sjkim	pxor	%xmm12,%xmm2
496290207Sjkim	movups	%xmm4,-16(%r14,%rbx,1)
497290207Sjkim	movdqu	(%r10,%rbx,1),%xmm4
498290207Sjkim	pxor	%xmm12,%xmm3
499290207Sjkim	movups	%xmm5,-16(%r15,%rbx,1)
500290207Sjkim	movdqu	(%r11,%rbx,1),%xmm5
501290207Sjkim	pxor	%xmm12,%xmm4
502290207Sjkim	pxor	%xmm12,%xmm5
503290207Sjkim
504290207Sjkim	decl	%edx
505290207Sjkim	jnz	.Loop_dec4x
506290207Sjkim
507290207Sjkim	movq	16(%rsp),%rax
508290207Sjkim	movl	24(%rsp),%edx
509290207Sjkim
510290207Sjkim	leaq	160(%rdi),%rdi
511290207Sjkim	decl	%edx
512290207Sjkim	jnz	.Ldec4x_loop_grande
513290207Sjkim
514290207Sjkim.Ldec4x_done:
515290207Sjkim	movq	-48(%rax),%r15
516290207Sjkim	movq	-40(%rax),%r14
517290207Sjkim	movq	-32(%rax),%r13
518290207Sjkim	movq	-24(%rax),%r12
519290207Sjkim	movq	-16(%rax),%rbp
520290207Sjkim	movq	-8(%rax),%rbx
521290207Sjkim	leaq	(%rax),%rsp
522290207Sjkim.Ldec4x_epilogue:
523290207Sjkim	.byte	0xf3,0xc3
524290207Sjkim.size	aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
525299481Sjkim.type	aesni_multi_cbc_encrypt_avx,@function
526299481Sjkim.align	32
527299481Sjkimaesni_multi_cbc_encrypt_avx:
528299481Sjkim_avx_cbc_enc_shortcut:
529299481Sjkim	movq	%rsp,%rax
530299481Sjkim	pushq	%rbx
531299481Sjkim	pushq	%rbp
532299481Sjkim	pushq	%r12
533299481Sjkim	pushq	%r13
534299481Sjkim	pushq	%r14
535299481Sjkim	pushq	%r15
536299481Sjkim
537299481Sjkim
538299481Sjkim
539299481Sjkim
540299481Sjkim
541299481Sjkim
542299481Sjkim
543299481Sjkim
544299481Sjkim	subq	$192,%rsp
545299481Sjkim	andq	$-128,%rsp
546299481Sjkim	movq	%rax,16(%rsp)
547299481Sjkim
548299481Sjkim.Lenc8x_body:
549299481Sjkim	vzeroupper
550299481Sjkim	vmovdqu	(%rsi),%xmm15
551299481Sjkim	leaq	120(%rsi),%rsi
552299481Sjkim	leaq	160(%rdi),%rdi
553299481Sjkim	shrl	$1,%edx
554299481Sjkim
555299481Sjkim.Lenc8x_loop_grande:
556299481Sjkim
557299481Sjkim	xorl	%edx,%edx
558299481Sjkim	movl	-144(%rdi),%ecx
559299481Sjkim	movq	-160(%rdi),%r8
560299481Sjkim	cmpl	%edx,%ecx
561299481Sjkim	movq	-152(%rdi),%rbx
562299481Sjkim	cmovgl	%ecx,%edx
563299481Sjkim	testl	%ecx,%ecx
564299481Sjkim	vmovdqu	-136(%rdi),%xmm2
565299481Sjkim	movl	%ecx,32(%rsp)
566299481Sjkim	cmovleq	%rsp,%r8
567299481Sjkim	subq	%r8,%rbx
568299481Sjkim	movq	%rbx,64(%rsp)
569299481Sjkim	movl	-104(%rdi),%ecx
570299481Sjkim	movq	-120(%rdi),%r9
571299481Sjkim	cmpl	%edx,%ecx
572299481Sjkim	movq	-112(%rdi),%rbp
573299481Sjkim	cmovgl	%ecx,%edx
574299481Sjkim	testl	%ecx,%ecx
575299481Sjkim	vmovdqu	-96(%rdi),%xmm3
576299481Sjkim	movl	%ecx,36(%rsp)
577299481Sjkim	cmovleq	%rsp,%r9
578299481Sjkim	subq	%r9,%rbp
579299481Sjkim	movq	%rbp,72(%rsp)
580299481Sjkim	movl	-64(%rdi),%ecx
581299481Sjkim	movq	-80(%rdi),%r10
582299481Sjkim	cmpl	%edx,%ecx
583299481Sjkim	movq	-72(%rdi),%rbp
584299481Sjkim	cmovgl	%ecx,%edx
585299481Sjkim	testl	%ecx,%ecx
586299481Sjkim	vmovdqu	-56(%rdi),%xmm4
587299481Sjkim	movl	%ecx,40(%rsp)
588299481Sjkim	cmovleq	%rsp,%r10
589299481Sjkim	subq	%r10,%rbp
590299481Sjkim	movq	%rbp,80(%rsp)
591299481Sjkim	movl	-24(%rdi),%ecx
592299481Sjkim	movq	-40(%rdi),%r11
593299481Sjkim	cmpl	%edx,%ecx
594299481Sjkim	movq	-32(%rdi),%rbp
595299481Sjkim	cmovgl	%ecx,%edx
596299481Sjkim	testl	%ecx,%ecx
597299481Sjkim	vmovdqu	-16(%rdi),%xmm5
598299481Sjkim	movl	%ecx,44(%rsp)
599299481Sjkim	cmovleq	%rsp,%r11
600299481Sjkim	subq	%r11,%rbp
601299481Sjkim	movq	%rbp,88(%rsp)
602299481Sjkim	movl	16(%rdi),%ecx
603299481Sjkim	movq	0(%rdi),%r12
604299481Sjkim	cmpl	%edx,%ecx
605299481Sjkim	movq	8(%rdi),%rbp
606299481Sjkim	cmovgl	%ecx,%edx
607299481Sjkim	testl	%ecx,%ecx
608299481Sjkim	vmovdqu	24(%rdi),%xmm6
609299481Sjkim	movl	%ecx,48(%rsp)
610299481Sjkim	cmovleq	%rsp,%r12
611299481Sjkim	subq	%r12,%rbp
612299481Sjkim	movq	%rbp,96(%rsp)
613299481Sjkim	movl	56(%rdi),%ecx
614299481Sjkim	movq	40(%rdi),%r13
615299481Sjkim	cmpl	%edx,%ecx
616299481Sjkim	movq	48(%rdi),%rbp
617299481Sjkim	cmovgl	%ecx,%edx
618299481Sjkim	testl	%ecx,%ecx
619299481Sjkim	vmovdqu	64(%rdi),%xmm7
620299481Sjkim	movl	%ecx,52(%rsp)
621299481Sjkim	cmovleq	%rsp,%r13
622299481Sjkim	subq	%r13,%rbp
623299481Sjkim	movq	%rbp,104(%rsp)
624299481Sjkim	movl	96(%rdi),%ecx
625299481Sjkim	movq	80(%rdi),%r14
626299481Sjkim	cmpl	%edx,%ecx
627299481Sjkim	movq	88(%rdi),%rbp
628299481Sjkim	cmovgl	%ecx,%edx
629299481Sjkim	testl	%ecx,%ecx
630299481Sjkim	vmovdqu	104(%rdi),%xmm8
631299481Sjkim	movl	%ecx,56(%rsp)
632299481Sjkim	cmovleq	%rsp,%r14
633299481Sjkim	subq	%r14,%rbp
634299481Sjkim	movq	%rbp,112(%rsp)
635299481Sjkim	movl	136(%rdi),%ecx
636299481Sjkim	movq	120(%rdi),%r15
637299481Sjkim	cmpl	%edx,%ecx
638299481Sjkim	movq	128(%rdi),%rbp
639299481Sjkim	cmovgl	%ecx,%edx
640299481Sjkim	testl	%ecx,%ecx
641299481Sjkim	vmovdqu	144(%rdi),%xmm9
642299481Sjkim	movl	%ecx,60(%rsp)
643299481Sjkim	cmovleq	%rsp,%r15
644299481Sjkim	subq	%r15,%rbp
645299481Sjkim	movq	%rbp,120(%rsp)
646299481Sjkim	testl	%edx,%edx
647299481Sjkim	jz	.Lenc8x_done
648299481Sjkim
649299481Sjkim	vmovups	16-120(%rsi),%xmm1
650299481Sjkim	vmovups	32-120(%rsi),%xmm0
651299481Sjkim	movl	240-120(%rsi),%eax
652299481Sjkim
653299481Sjkim	vpxor	(%r8),%xmm15,%xmm10
654299481Sjkim	leaq	128(%rsp),%rbp
655299481Sjkim	vpxor	(%r9),%xmm15,%xmm11
656299481Sjkim	vpxor	(%r10),%xmm15,%xmm12
657299481Sjkim	vpxor	(%r11),%xmm15,%xmm13
658299481Sjkim	vpxor	%xmm10,%xmm2,%xmm2
659299481Sjkim	vpxor	(%r12),%xmm15,%xmm10
660299481Sjkim	vpxor	%xmm11,%xmm3,%xmm3
661299481Sjkim	vpxor	(%r13),%xmm15,%xmm11
662299481Sjkim	vpxor	%xmm12,%xmm4,%xmm4
663299481Sjkim	vpxor	(%r14),%xmm15,%xmm12
664299481Sjkim	vpxor	%xmm13,%xmm5,%xmm5
665299481Sjkim	vpxor	(%r15),%xmm15,%xmm13
666299481Sjkim	vpxor	%xmm10,%xmm6,%xmm6
667299481Sjkim	movl	$1,%ecx
668299481Sjkim	vpxor	%xmm11,%xmm7,%xmm7
669299481Sjkim	vpxor	%xmm12,%xmm8,%xmm8
670299481Sjkim	vpxor	%xmm13,%xmm9,%xmm9
671299481Sjkim	jmp	.Loop_enc8x
672299481Sjkim
673299481Sjkim.align	32
674299481Sjkim.Loop_enc8x:
675299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
676299481Sjkim	cmpl	32+0(%rsp),%ecx
677299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
678299481Sjkim	prefetcht0	31(%r8)
679299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
680299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
681299481Sjkim	leaq	(%r8,%rbx,1),%rbx
682299481Sjkim	cmovgeq	%rsp,%r8
683299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
684299481Sjkim	cmovgq	%rsp,%rbx
685299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
686299481Sjkim	subq	%r8,%rbx
687299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
688299481Sjkim	vpxor	16(%r8),%xmm15,%xmm10
689299481Sjkim	movq	%rbx,64+0(%rsp)
690299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
691299481Sjkim	vmovups	-72(%rsi),%xmm1
692299481Sjkim	leaq	16(%r8,%rbx,1),%r8
693299481Sjkim	vmovdqu	%xmm10,0(%rbp)
694299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
695299481Sjkim	cmpl	32+4(%rsp),%ecx
696299481Sjkim	movq	64+8(%rsp),%rbx
697299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
698299481Sjkim	prefetcht0	31(%r9)
699299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
700299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
701299481Sjkim	leaq	(%r9,%rbx,1),%rbx
702299481Sjkim	cmovgeq	%rsp,%r9
703299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
704299481Sjkim	cmovgq	%rsp,%rbx
705299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
706299481Sjkim	subq	%r9,%rbx
707299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
708299481Sjkim	vpxor	16(%r9),%xmm15,%xmm11
709299481Sjkim	movq	%rbx,64+8(%rsp)
710299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
711299481Sjkim	vmovups	-56(%rsi),%xmm0
712299481Sjkim	leaq	16(%r9,%rbx,1),%r9
713299481Sjkim	vmovdqu	%xmm11,16(%rbp)
714299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
715299481Sjkim	cmpl	32+8(%rsp),%ecx
716299481Sjkim	movq	64+16(%rsp),%rbx
717299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
718299481Sjkim	prefetcht0	31(%r10)
719299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
720299481Sjkim	prefetcht0	15(%r8)
721299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
722299481Sjkim	leaq	(%r10,%rbx,1),%rbx
723299481Sjkim	cmovgeq	%rsp,%r10
724299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
725299481Sjkim	cmovgq	%rsp,%rbx
726299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
727299481Sjkim	subq	%r10,%rbx
728299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
729299481Sjkim	vpxor	16(%r10),%xmm15,%xmm12
730299481Sjkim	movq	%rbx,64+16(%rsp)
731299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
732299481Sjkim	vmovups	-40(%rsi),%xmm1
733299481Sjkim	leaq	16(%r10,%rbx,1),%r10
734299481Sjkim	vmovdqu	%xmm12,32(%rbp)
735299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
736299481Sjkim	cmpl	32+12(%rsp),%ecx
737299481Sjkim	movq	64+24(%rsp),%rbx
738299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
739299481Sjkim	prefetcht0	31(%r11)
740299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
741299481Sjkim	prefetcht0	15(%r9)
742299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
743299481Sjkim	leaq	(%r11,%rbx,1),%rbx
744299481Sjkim	cmovgeq	%rsp,%r11
745299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
746299481Sjkim	cmovgq	%rsp,%rbx
747299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
748299481Sjkim	subq	%r11,%rbx
749299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
750299481Sjkim	vpxor	16(%r11),%xmm15,%xmm13
751299481Sjkim	movq	%rbx,64+24(%rsp)
752299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
753299481Sjkim	vmovups	-24(%rsi),%xmm0
754299481Sjkim	leaq	16(%r11,%rbx,1),%r11
755299481Sjkim	vmovdqu	%xmm13,48(%rbp)
756299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
757299481Sjkim	cmpl	32+16(%rsp),%ecx
758299481Sjkim	movq	64+32(%rsp),%rbx
759299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
760299481Sjkim	prefetcht0	31(%r12)
761299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
762299481Sjkim	prefetcht0	15(%r10)
763299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
764299481Sjkim	leaq	(%r12,%rbx,1),%rbx
765299481Sjkim	cmovgeq	%rsp,%r12
766299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
767299481Sjkim	cmovgq	%rsp,%rbx
768299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
769299481Sjkim	subq	%r12,%rbx
770299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
771299481Sjkim	vpxor	16(%r12),%xmm15,%xmm10
772299481Sjkim	movq	%rbx,64+32(%rsp)
773299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
774299481Sjkim	vmovups	-8(%rsi),%xmm1
775299481Sjkim	leaq	16(%r12,%rbx,1),%r12
776299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
777299481Sjkim	cmpl	32+20(%rsp),%ecx
778299481Sjkim	movq	64+40(%rsp),%rbx
779299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
780299481Sjkim	prefetcht0	31(%r13)
781299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
782299481Sjkim	prefetcht0	15(%r11)
783299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
784299481Sjkim	leaq	(%rbx,%r13,1),%rbx
785299481Sjkim	cmovgeq	%rsp,%r13
786299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
787299481Sjkim	cmovgq	%rsp,%rbx
788299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
789299481Sjkim	subq	%r13,%rbx
790299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
791299481Sjkim	vpxor	16(%r13),%xmm15,%xmm11
792299481Sjkim	movq	%rbx,64+40(%rsp)
793299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
794299481Sjkim	vmovups	8(%rsi),%xmm0
795299481Sjkim	leaq	16(%r13,%rbx,1),%r13
796299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
797299481Sjkim	cmpl	32+24(%rsp),%ecx
798299481Sjkim	movq	64+48(%rsp),%rbx
799299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
800299481Sjkim	prefetcht0	31(%r14)
801299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
802299481Sjkim	prefetcht0	15(%r12)
803299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
804299481Sjkim	leaq	(%r14,%rbx,1),%rbx
805299481Sjkim	cmovgeq	%rsp,%r14
806299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
807299481Sjkim	cmovgq	%rsp,%rbx
808299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
809299481Sjkim	subq	%r14,%rbx
810299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
811299481Sjkim	vpxor	16(%r14),%xmm15,%xmm12
812299481Sjkim	movq	%rbx,64+48(%rsp)
813299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
814299481Sjkim	vmovups	24(%rsi),%xmm1
815299481Sjkim	leaq	16(%r14,%rbx,1),%r14
816299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
817299481Sjkim	cmpl	32+28(%rsp),%ecx
818299481Sjkim	movq	64+56(%rsp),%rbx
819299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
820299481Sjkim	prefetcht0	31(%r15)
821299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
822299481Sjkim	prefetcht0	15(%r13)
823299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
824299481Sjkim	leaq	(%r15,%rbx,1),%rbx
825299481Sjkim	cmovgeq	%rsp,%r15
826299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
827299481Sjkim	cmovgq	%rsp,%rbx
828299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
829299481Sjkim	subq	%r15,%rbx
830299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
831299481Sjkim	vpxor	16(%r15),%xmm15,%xmm13
832299481Sjkim	movq	%rbx,64+56(%rsp)
833299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
834299481Sjkim	vmovups	40(%rsi),%xmm0
835299481Sjkim	leaq	16(%r15,%rbx,1),%r15
836299481Sjkim	vmovdqu	32(%rsp),%xmm14
837299481Sjkim	prefetcht0	15(%r14)
838299481Sjkim	prefetcht0	15(%r15)
839299481Sjkim	cmpl	$11,%eax
840299481Sjkim	jb	.Lenc8x_tail
841299481Sjkim
842299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
843299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
844299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
845299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
846299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
847299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
848299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
849299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
850299481Sjkim	vmovups	176-120(%rsi),%xmm1
851299481Sjkim
852299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
853299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
854299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
855299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
856299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
857299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
858299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
859299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
860299481Sjkim	vmovups	192-120(%rsi),%xmm0
861299481Sjkim	je	.Lenc8x_tail
862299481Sjkim
863299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
864299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
865299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
866299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
867299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
868299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
869299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
870299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
871299481Sjkim	vmovups	208-120(%rsi),%xmm1
872299481Sjkim
873299481Sjkim	vaesenc	%xmm0,%xmm2,%xmm2
874299481Sjkim	vaesenc	%xmm0,%xmm3,%xmm3
875299481Sjkim	vaesenc	%xmm0,%xmm4,%xmm4
876299481Sjkim	vaesenc	%xmm0,%xmm5,%xmm5
877299481Sjkim	vaesenc	%xmm0,%xmm6,%xmm6
878299481Sjkim	vaesenc	%xmm0,%xmm7,%xmm7
879299481Sjkim	vaesenc	%xmm0,%xmm8,%xmm8
880299481Sjkim	vaesenc	%xmm0,%xmm9,%xmm9
881299481Sjkim	vmovups	224-120(%rsi),%xmm0
882299481Sjkim
883299481Sjkim.Lenc8x_tail:
884299481Sjkim	vaesenc	%xmm1,%xmm2,%xmm2
885299481Sjkim	vpxor	%xmm15,%xmm15,%xmm15
886299481Sjkim	vaesenc	%xmm1,%xmm3,%xmm3
887299481Sjkim	vaesenc	%xmm1,%xmm4,%xmm4
888299481Sjkim	vpcmpgtd	%xmm15,%xmm14,%xmm15
889299481Sjkim	vaesenc	%xmm1,%xmm5,%xmm5
890299481Sjkim	vaesenc	%xmm1,%xmm6,%xmm6
891299481Sjkim	vpaddd	%xmm14,%xmm15,%xmm15
892299481Sjkim	vmovdqu	48(%rsp),%xmm14
893299481Sjkim	vaesenc	%xmm1,%xmm7,%xmm7
894299481Sjkim	movq	64(%rsp),%rbx
895299481Sjkim	vaesenc	%xmm1,%xmm8,%xmm8
896299481Sjkim	vaesenc	%xmm1,%xmm9,%xmm9
897299481Sjkim	vmovups	16-120(%rsi),%xmm1
898299481Sjkim
899299481Sjkim	vaesenclast	%xmm0,%xmm2,%xmm2
900299481Sjkim	vmovdqa	%xmm15,32(%rsp)
901299481Sjkim	vpxor	%xmm15,%xmm15,%xmm15
902299481Sjkim	vaesenclast	%xmm0,%xmm3,%xmm3
903299481Sjkim	vaesenclast	%xmm0,%xmm4,%xmm4
904299481Sjkim	vpcmpgtd	%xmm15,%xmm14,%xmm15
905299481Sjkim	vaesenclast	%xmm0,%xmm5,%xmm5
906299481Sjkim	vaesenclast	%xmm0,%xmm6,%xmm6
907299481Sjkim	vpaddd	%xmm15,%xmm14,%xmm14
908299481Sjkim	vmovdqu	-120(%rsi),%xmm15
909299481Sjkim	vaesenclast	%xmm0,%xmm7,%xmm7
910299481Sjkim	vaesenclast	%xmm0,%xmm8,%xmm8
911299481Sjkim	vmovdqa	%xmm14,48(%rsp)
912299481Sjkim	vaesenclast	%xmm0,%xmm9,%xmm9
913299481Sjkim	vmovups	32-120(%rsi),%xmm0
914299481Sjkim
915299481Sjkim	vmovups	%xmm2,-16(%r8)
916299481Sjkim	subq	%rbx,%r8
917299481Sjkim	vpxor	0(%rbp),%xmm2,%xmm2
918299481Sjkim	vmovups	%xmm3,-16(%r9)
919299481Sjkim	subq	72(%rsp),%r9
920299481Sjkim	vpxor	16(%rbp),%xmm3,%xmm3
921299481Sjkim	vmovups	%xmm4,-16(%r10)
922299481Sjkim	subq	80(%rsp),%r10
923299481Sjkim	vpxor	32(%rbp),%xmm4,%xmm4
924299481Sjkim	vmovups	%xmm5,-16(%r11)
925299481Sjkim	subq	88(%rsp),%r11
926299481Sjkim	vpxor	48(%rbp),%xmm5,%xmm5
927299481Sjkim	vmovups	%xmm6,-16(%r12)
928299481Sjkim	subq	96(%rsp),%r12
929299481Sjkim	vpxor	%xmm10,%xmm6,%xmm6
930299481Sjkim	vmovups	%xmm7,-16(%r13)
931299481Sjkim	subq	104(%rsp),%r13
932299481Sjkim	vpxor	%xmm11,%xmm7,%xmm7
933299481Sjkim	vmovups	%xmm8,-16(%r14)
934299481Sjkim	subq	112(%rsp),%r14
935299481Sjkim	vpxor	%xmm12,%xmm8,%xmm8
936299481Sjkim	vmovups	%xmm9,-16(%r15)
937299481Sjkim	subq	120(%rsp),%r15
938299481Sjkim	vpxor	%xmm13,%xmm9,%xmm9
939299481Sjkim
940299481Sjkim	decl	%edx
941299481Sjkim	jnz	.Loop_enc8x
942299481Sjkim
943299481Sjkim	movq	16(%rsp),%rax
944299481Sjkim
945299481Sjkim
946299481Sjkim
947299481Sjkim
948299481Sjkim
949299481Sjkim.Lenc8x_done:
950299481Sjkim	vzeroupper
951299481Sjkim	movq	-48(%rax),%r15
952299481Sjkim	movq	-40(%rax),%r14
953299481Sjkim	movq	-32(%rax),%r13
954299481Sjkim	movq	-24(%rax),%r12
955299481Sjkim	movq	-16(%rax),%rbp
956299481Sjkim	movq	-8(%rax),%rbx
957299481Sjkim	leaq	(%rax),%rsp
958299481Sjkim.Lenc8x_epilogue:
959299481Sjkim	.byte	0xf3,0xc3
960299481Sjkim.size	aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
961299481Sjkim
962299481Sjkim.type	aesni_multi_cbc_decrypt_avx,@function
963299481Sjkim.align	32
964299481Sjkimaesni_multi_cbc_decrypt_avx:
965299481Sjkim_avx_cbc_dec_shortcut:
966299481Sjkim	movq	%rsp,%rax
967299481Sjkim	pushq	%rbx
968299481Sjkim	pushq	%rbp
969299481Sjkim	pushq	%r12
970299481Sjkim	pushq	%r13
971299481Sjkim	pushq	%r14
972299481Sjkim	pushq	%r15
973299481Sjkim
974299481Sjkim
975299481Sjkim
976299481Sjkim
977299481Sjkim
978299481Sjkim
979299481Sjkim
980299481Sjkim
981299481Sjkim
982299481Sjkim	subq	$256,%rsp
983299481Sjkim	andq	$-256,%rsp
984299481Sjkim	subq	$192,%rsp
985299481Sjkim	movq	%rax,16(%rsp)
986299481Sjkim
987299481Sjkim.Ldec8x_body:
988299481Sjkim	vzeroupper
989299481Sjkim	vmovdqu	(%rsi),%xmm15
990299481Sjkim	leaq	120(%rsi),%rsi
991299481Sjkim	leaq	160(%rdi),%rdi
992299481Sjkim	shrl	$1,%edx
993299481Sjkim
994299481Sjkim.Ldec8x_loop_grande:
995299481Sjkim
996299481Sjkim	xorl	%edx,%edx
997299481Sjkim	movl	-144(%rdi),%ecx
998299481Sjkim	movq	-160(%rdi),%r8
999299481Sjkim	cmpl	%edx,%ecx
1000299481Sjkim	movq	-152(%rdi),%rbx
1001299481Sjkim	cmovgl	%ecx,%edx
1002299481Sjkim	testl	%ecx,%ecx
1003299481Sjkim	vmovdqu	-136(%rdi),%xmm2
1004299481Sjkim	movl	%ecx,32(%rsp)
1005299481Sjkim	cmovleq	%rsp,%r8
1006299481Sjkim	subq	%r8,%rbx
1007299481Sjkim	movq	%rbx,64(%rsp)
1008299481Sjkim	vmovdqu	%xmm2,192(%rsp)
1009299481Sjkim	movl	-104(%rdi),%ecx
1010299481Sjkim	movq	-120(%rdi),%r9
1011299481Sjkim	cmpl	%edx,%ecx
1012299481Sjkim	movq	-112(%rdi),%rbp
1013299481Sjkim	cmovgl	%ecx,%edx
1014299481Sjkim	testl	%ecx,%ecx
1015299481Sjkim	vmovdqu	-96(%rdi),%xmm3
1016299481Sjkim	movl	%ecx,36(%rsp)
1017299481Sjkim	cmovleq	%rsp,%r9
1018299481Sjkim	subq	%r9,%rbp
1019299481Sjkim	movq	%rbp,72(%rsp)
1020299481Sjkim	vmovdqu	%xmm3,208(%rsp)
1021299481Sjkim	movl	-64(%rdi),%ecx
1022299481Sjkim	movq	-80(%rdi),%r10
1023299481Sjkim	cmpl	%edx,%ecx
1024299481Sjkim	movq	-72(%rdi),%rbp
1025299481Sjkim	cmovgl	%ecx,%edx
1026299481Sjkim	testl	%ecx,%ecx
1027299481Sjkim	vmovdqu	-56(%rdi),%xmm4
1028299481Sjkim	movl	%ecx,40(%rsp)
1029299481Sjkim	cmovleq	%rsp,%r10
1030299481Sjkim	subq	%r10,%rbp
1031299481Sjkim	movq	%rbp,80(%rsp)
1032299481Sjkim	vmovdqu	%xmm4,224(%rsp)
1033299481Sjkim	movl	-24(%rdi),%ecx
1034299481Sjkim	movq	-40(%rdi),%r11
1035299481Sjkim	cmpl	%edx,%ecx
1036299481Sjkim	movq	-32(%rdi),%rbp
1037299481Sjkim	cmovgl	%ecx,%edx
1038299481Sjkim	testl	%ecx,%ecx
1039299481Sjkim	vmovdqu	-16(%rdi),%xmm5
1040299481Sjkim	movl	%ecx,44(%rsp)
1041299481Sjkim	cmovleq	%rsp,%r11
1042299481Sjkim	subq	%r11,%rbp
1043299481Sjkim	movq	%rbp,88(%rsp)
1044299481Sjkim	vmovdqu	%xmm5,240(%rsp)
1045299481Sjkim	movl	16(%rdi),%ecx
1046299481Sjkim	movq	0(%rdi),%r12
1047299481Sjkim	cmpl	%edx,%ecx
1048299481Sjkim	movq	8(%rdi),%rbp
1049299481Sjkim	cmovgl	%ecx,%edx
1050299481Sjkim	testl	%ecx,%ecx
1051299481Sjkim	vmovdqu	24(%rdi),%xmm6
1052299481Sjkim	movl	%ecx,48(%rsp)
1053299481Sjkim	cmovleq	%rsp,%r12
1054299481Sjkim	subq	%r12,%rbp
1055299481Sjkim	movq	%rbp,96(%rsp)
1056299481Sjkim	vmovdqu	%xmm6,256(%rsp)
1057299481Sjkim	movl	56(%rdi),%ecx
1058299481Sjkim	movq	40(%rdi),%r13
1059299481Sjkim	cmpl	%edx,%ecx
1060299481Sjkim	movq	48(%rdi),%rbp
1061299481Sjkim	cmovgl	%ecx,%edx
1062299481Sjkim	testl	%ecx,%ecx
1063299481Sjkim	vmovdqu	64(%rdi),%xmm7
1064299481Sjkim	movl	%ecx,52(%rsp)
1065299481Sjkim	cmovleq	%rsp,%r13
1066299481Sjkim	subq	%r13,%rbp
1067299481Sjkim	movq	%rbp,104(%rsp)
1068299481Sjkim	vmovdqu	%xmm7,272(%rsp)
1069299481Sjkim	movl	96(%rdi),%ecx
1070299481Sjkim	movq	80(%rdi),%r14
1071299481Sjkim	cmpl	%edx,%ecx
1072299481Sjkim	movq	88(%rdi),%rbp
1073299481Sjkim	cmovgl	%ecx,%edx
1074299481Sjkim	testl	%ecx,%ecx
1075299481Sjkim	vmovdqu	104(%rdi),%xmm8
1076299481Sjkim	movl	%ecx,56(%rsp)
1077299481Sjkim	cmovleq	%rsp,%r14
1078299481Sjkim	subq	%r14,%rbp
1079299481Sjkim	movq	%rbp,112(%rsp)
1080299481Sjkim	vmovdqu	%xmm8,288(%rsp)
1081299481Sjkim	movl	136(%rdi),%ecx
1082299481Sjkim	movq	120(%rdi),%r15
1083299481Sjkim	cmpl	%edx,%ecx
1084299481Sjkim	movq	128(%rdi),%rbp
1085299481Sjkim	cmovgl	%ecx,%edx
1086299481Sjkim	testl	%ecx,%ecx
1087299481Sjkim	vmovdqu	144(%rdi),%xmm9
1088299481Sjkim	movl	%ecx,60(%rsp)
1089299481Sjkim	cmovleq	%rsp,%r15
1090299481Sjkim	subq	%r15,%rbp
1091299481Sjkim	movq	%rbp,120(%rsp)
1092299481Sjkim	vmovdqu	%xmm9,304(%rsp)
1093299481Sjkim	testl	%edx,%edx
1094299481Sjkim	jz	.Ldec8x_done
1095299481Sjkim
1096299481Sjkim	vmovups	16-120(%rsi),%xmm1
1097299481Sjkim	vmovups	32-120(%rsi),%xmm0
1098299481Sjkim	movl	240-120(%rsi),%eax
1099299481Sjkim	leaq	192+128(%rsp),%rbp
1100299481Sjkim
1101299481Sjkim	vmovdqu	(%r8),%xmm2
1102299481Sjkim	vmovdqu	(%r9),%xmm3
1103299481Sjkim	vmovdqu	(%r10),%xmm4
1104299481Sjkim	vmovdqu	(%r11),%xmm5
1105299481Sjkim	vmovdqu	(%r12),%xmm6
1106299481Sjkim	vmovdqu	(%r13),%xmm7
1107299481Sjkim	vmovdqu	(%r14),%xmm8
1108299481Sjkim	vmovdqu	(%r15),%xmm9
1109299481Sjkim	vmovdqu	%xmm2,0(%rbp)
1110299481Sjkim	vpxor	%xmm15,%xmm2,%xmm2
1111299481Sjkim	vmovdqu	%xmm3,16(%rbp)
1112299481Sjkim	vpxor	%xmm15,%xmm3,%xmm3
1113299481Sjkim	vmovdqu	%xmm4,32(%rbp)
1114299481Sjkim	vpxor	%xmm15,%xmm4,%xmm4
1115299481Sjkim	vmovdqu	%xmm5,48(%rbp)
1116299481Sjkim	vpxor	%xmm15,%xmm5,%xmm5
1117299481Sjkim	vmovdqu	%xmm6,64(%rbp)
1118299481Sjkim	vpxor	%xmm15,%xmm6,%xmm6
1119299481Sjkim	vmovdqu	%xmm7,80(%rbp)
1120299481Sjkim	vpxor	%xmm15,%xmm7,%xmm7
1121299481Sjkim	vmovdqu	%xmm8,96(%rbp)
1122299481Sjkim	vpxor	%xmm15,%xmm8,%xmm8
1123299481Sjkim	vmovdqu	%xmm9,112(%rbp)
1124299481Sjkim	vpxor	%xmm15,%xmm9,%xmm9
1125299481Sjkim	xorq	$0x80,%rbp
1126299481Sjkim	movl	$1,%ecx
1127299481Sjkim	jmp	.Loop_dec8x
1128299481Sjkim
1129299481Sjkim.align	32
1130299481Sjkim.Loop_dec8x:
1131299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1132299481Sjkim	cmpl	32+0(%rsp),%ecx
1133299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1134299481Sjkim	prefetcht0	31(%r8)
1135299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1136299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1137299481Sjkim	leaq	(%r8,%rbx,1),%rbx
1138299481Sjkim	cmovgeq	%rsp,%r8
1139299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1140299481Sjkim	cmovgq	%rsp,%rbx
1141299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1142299481Sjkim	subq	%r8,%rbx
1143299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1144299481Sjkim	vmovdqu	16(%r8),%xmm10
1145299481Sjkim	movq	%rbx,64+0(%rsp)
1146299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1147299481Sjkim	vmovups	-72(%rsi),%xmm1
1148299481Sjkim	leaq	16(%r8,%rbx,1),%r8
1149299481Sjkim	vmovdqu	%xmm10,128(%rsp)
1150299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1151299481Sjkim	cmpl	32+4(%rsp),%ecx
1152299481Sjkim	movq	64+8(%rsp),%rbx
1153299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1154299481Sjkim	prefetcht0	31(%r9)
1155299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1156299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1157299481Sjkim	leaq	(%r9,%rbx,1),%rbx
1158299481Sjkim	cmovgeq	%rsp,%r9
1159299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1160299481Sjkim	cmovgq	%rsp,%rbx
1161299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1162299481Sjkim	subq	%r9,%rbx
1163299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1164299481Sjkim	vmovdqu	16(%r9),%xmm11
1165299481Sjkim	movq	%rbx,64+8(%rsp)
1166299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1167299481Sjkim	vmovups	-56(%rsi),%xmm0
1168299481Sjkim	leaq	16(%r9,%rbx,1),%r9
1169299481Sjkim	vmovdqu	%xmm11,144(%rsp)
1170299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1171299481Sjkim	cmpl	32+8(%rsp),%ecx
1172299481Sjkim	movq	64+16(%rsp),%rbx
1173299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1174299481Sjkim	prefetcht0	31(%r10)
1175299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1176299481Sjkim	prefetcht0	15(%r8)
1177299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1178299481Sjkim	leaq	(%r10,%rbx,1),%rbx
1179299481Sjkim	cmovgeq	%rsp,%r10
1180299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1181299481Sjkim	cmovgq	%rsp,%rbx
1182299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1183299481Sjkim	subq	%r10,%rbx
1184299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1185299481Sjkim	vmovdqu	16(%r10),%xmm12
1186299481Sjkim	movq	%rbx,64+16(%rsp)
1187299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1188299481Sjkim	vmovups	-40(%rsi),%xmm1
1189299481Sjkim	leaq	16(%r10,%rbx,1),%r10
1190299481Sjkim	vmovdqu	%xmm12,160(%rsp)
1191299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1192299481Sjkim	cmpl	32+12(%rsp),%ecx
1193299481Sjkim	movq	64+24(%rsp),%rbx
1194299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1195299481Sjkim	prefetcht0	31(%r11)
1196299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1197299481Sjkim	prefetcht0	15(%r9)
1198299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1199299481Sjkim	leaq	(%r11,%rbx,1),%rbx
1200299481Sjkim	cmovgeq	%rsp,%r11
1201299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1202299481Sjkim	cmovgq	%rsp,%rbx
1203299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1204299481Sjkim	subq	%r11,%rbx
1205299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1206299481Sjkim	vmovdqu	16(%r11),%xmm13
1207299481Sjkim	movq	%rbx,64+24(%rsp)
1208299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1209299481Sjkim	vmovups	-24(%rsi),%xmm0
1210299481Sjkim	leaq	16(%r11,%rbx,1),%r11
1211299481Sjkim	vmovdqu	%xmm13,176(%rsp)
1212299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1213299481Sjkim	cmpl	32+16(%rsp),%ecx
1214299481Sjkim	movq	64+32(%rsp),%rbx
1215299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1216299481Sjkim	prefetcht0	31(%r12)
1217299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1218299481Sjkim	prefetcht0	15(%r10)
1219299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1220299481Sjkim	leaq	(%r12,%rbx,1),%rbx
1221299481Sjkim	cmovgeq	%rsp,%r12
1222299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1223299481Sjkim	cmovgq	%rsp,%rbx
1224299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1225299481Sjkim	subq	%r12,%rbx
1226299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1227299481Sjkim	vmovdqu	16(%r12),%xmm10
1228299481Sjkim	movq	%rbx,64+32(%rsp)
1229299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1230299481Sjkim	vmovups	-8(%rsi),%xmm1
1231299481Sjkim	leaq	16(%r12,%rbx,1),%r12
1232299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1233299481Sjkim	cmpl	32+20(%rsp),%ecx
1234299481Sjkim	movq	64+40(%rsp),%rbx
1235299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1236299481Sjkim	prefetcht0	31(%r13)
1237299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1238299481Sjkim	prefetcht0	15(%r11)
1239299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1240299481Sjkim	leaq	(%rbx,%r13,1),%rbx
1241299481Sjkim	cmovgeq	%rsp,%r13
1242299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1243299481Sjkim	cmovgq	%rsp,%rbx
1244299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1245299481Sjkim	subq	%r13,%rbx
1246299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1247299481Sjkim	vmovdqu	16(%r13),%xmm11
1248299481Sjkim	movq	%rbx,64+40(%rsp)
1249299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1250299481Sjkim	vmovups	8(%rsi),%xmm0
1251299481Sjkim	leaq	16(%r13,%rbx,1),%r13
1252299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1253299481Sjkim	cmpl	32+24(%rsp),%ecx
1254299481Sjkim	movq	64+48(%rsp),%rbx
1255299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1256299481Sjkim	prefetcht0	31(%r14)
1257299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1258299481Sjkim	prefetcht0	15(%r12)
1259299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1260299481Sjkim	leaq	(%r14,%rbx,1),%rbx
1261299481Sjkim	cmovgeq	%rsp,%r14
1262299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1263299481Sjkim	cmovgq	%rsp,%rbx
1264299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1265299481Sjkim	subq	%r14,%rbx
1266299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1267299481Sjkim	vmovdqu	16(%r14),%xmm12
1268299481Sjkim	movq	%rbx,64+48(%rsp)
1269299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1270299481Sjkim	vmovups	24(%rsi),%xmm1
1271299481Sjkim	leaq	16(%r14,%rbx,1),%r14
1272299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1273299481Sjkim	cmpl	32+28(%rsp),%ecx
1274299481Sjkim	movq	64+56(%rsp),%rbx
1275299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1276299481Sjkim	prefetcht0	31(%r15)
1277299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1278299481Sjkim	prefetcht0	15(%r13)
1279299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1280299481Sjkim	leaq	(%r15,%rbx,1),%rbx
1281299481Sjkim	cmovgeq	%rsp,%r15
1282299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1283299481Sjkim	cmovgq	%rsp,%rbx
1284299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1285299481Sjkim	subq	%r15,%rbx
1286299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1287299481Sjkim	vmovdqu	16(%r15),%xmm13
1288299481Sjkim	movq	%rbx,64+56(%rsp)
1289299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1290299481Sjkim	vmovups	40(%rsi),%xmm0
1291299481Sjkim	leaq	16(%r15,%rbx,1),%r15
1292299481Sjkim	vmovdqu	32(%rsp),%xmm14
1293299481Sjkim	prefetcht0	15(%r14)
1294299481Sjkim	prefetcht0	15(%r15)
1295299481Sjkim	cmpl	$11,%eax
1296299481Sjkim	jb	.Ldec8x_tail
1297299481Sjkim
1298299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1299299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1300299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1301299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1302299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1303299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1304299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1305299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1306299481Sjkim	vmovups	176-120(%rsi),%xmm1
1307299481Sjkim
1308299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1309299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1310299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1311299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1312299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1313299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1314299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1315299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1316299481Sjkim	vmovups	192-120(%rsi),%xmm0
1317299481Sjkim	je	.Ldec8x_tail
1318299481Sjkim
1319299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1320299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1321299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1322299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1323299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1324299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1325299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1326299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1327299481Sjkim	vmovups	208-120(%rsi),%xmm1
1328299481Sjkim
1329299481Sjkim	vaesdec	%xmm0,%xmm2,%xmm2
1330299481Sjkim	vaesdec	%xmm0,%xmm3,%xmm3
1331299481Sjkim	vaesdec	%xmm0,%xmm4,%xmm4
1332299481Sjkim	vaesdec	%xmm0,%xmm5,%xmm5
1333299481Sjkim	vaesdec	%xmm0,%xmm6,%xmm6
1334299481Sjkim	vaesdec	%xmm0,%xmm7,%xmm7
1335299481Sjkim	vaesdec	%xmm0,%xmm8,%xmm8
1336299481Sjkim	vaesdec	%xmm0,%xmm9,%xmm9
1337299481Sjkim	vmovups	224-120(%rsi),%xmm0
1338299481Sjkim
1339299481Sjkim.Ldec8x_tail:
1340299481Sjkim	vaesdec	%xmm1,%xmm2,%xmm2
1341299481Sjkim	vpxor	%xmm15,%xmm15,%xmm15
1342299481Sjkim	vaesdec	%xmm1,%xmm3,%xmm3
1343299481Sjkim	vaesdec	%xmm1,%xmm4,%xmm4
1344299481Sjkim	vpcmpgtd	%xmm15,%xmm14,%xmm15
1345299481Sjkim	vaesdec	%xmm1,%xmm5,%xmm5
1346299481Sjkim	vaesdec	%xmm1,%xmm6,%xmm6
1347299481Sjkim	vpaddd	%xmm14,%xmm15,%xmm15
1348299481Sjkim	vmovdqu	48(%rsp),%xmm14
1349299481Sjkim	vaesdec	%xmm1,%xmm7,%xmm7
1350299481Sjkim	movq	64(%rsp),%rbx
1351299481Sjkim	vaesdec	%xmm1,%xmm8,%xmm8
1352299481Sjkim	vaesdec	%xmm1,%xmm9,%xmm9
1353299481Sjkim	vmovups	16-120(%rsi),%xmm1
1354299481Sjkim
1355299481Sjkim	vaesdeclast	%xmm0,%xmm2,%xmm2
1356299481Sjkim	vmovdqa	%xmm15,32(%rsp)
1357299481Sjkim	vpxor	%xmm15,%xmm15,%xmm15
1358299481Sjkim	vaesdeclast	%xmm0,%xmm3,%xmm3
1359299481Sjkim	vpxor	0(%rbp),%xmm2,%xmm2
1360299481Sjkim	vaesdeclast	%xmm0,%xmm4,%xmm4
1361299481Sjkim	vpxor	16(%rbp),%xmm3,%xmm3
1362299481Sjkim	vpcmpgtd	%xmm15,%xmm14,%xmm15
1363299481Sjkim	vaesdeclast	%xmm0,%xmm5,%xmm5
1364299481Sjkim	vpxor	32(%rbp),%xmm4,%xmm4
1365299481Sjkim	vaesdeclast	%xmm0,%xmm6,%xmm6
1366299481Sjkim	vpxor	48(%rbp),%xmm5,%xmm5
1367299481Sjkim	vpaddd	%xmm15,%xmm14,%xmm14
1368299481Sjkim	vmovdqu	-120(%rsi),%xmm15
1369299481Sjkim	vaesdeclast	%xmm0,%xmm7,%xmm7
1370299481Sjkim	vpxor	64(%rbp),%xmm6,%xmm6
1371299481Sjkim	vaesdeclast	%xmm0,%xmm8,%xmm8
1372299481Sjkim	vpxor	80(%rbp),%xmm7,%xmm7
1373299481Sjkim	vmovdqa	%xmm14,48(%rsp)
1374299481Sjkim	vaesdeclast	%xmm0,%xmm9,%xmm9
1375299481Sjkim	vpxor	96(%rbp),%xmm8,%xmm8
1376299481Sjkim	vmovups	32-120(%rsi),%xmm0
1377299481Sjkim
1378299481Sjkim	vmovups	%xmm2,-16(%r8)
1379299481Sjkim	subq	%rbx,%r8
1380299481Sjkim	vmovdqu	128+0(%rsp),%xmm2
1381299481Sjkim	vpxor	112(%rbp),%xmm9,%xmm9
1382299481Sjkim	vmovups	%xmm3,-16(%r9)
1383299481Sjkim	subq	72(%rsp),%r9
1384299481Sjkim	vmovdqu	%xmm2,0(%rbp)
1385299481Sjkim	vpxor	%xmm15,%xmm2,%xmm2
1386299481Sjkim	vmovdqu	128+16(%rsp),%xmm3
1387299481Sjkim	vmovups	%xmm4,-16(%r10)
1388299481Sjkim	subq	80(%rsp),%r10
1389299481Sjkim	vmovdqu	%xmm3,16(%rbp)
1390299481Sjkim	vpxor	%xmm15,%xmm3,%xmm3
1391299481Sjkim	vmovdqu	128+32(%rsp),%xmm4
1392299481Sjkim	vmovups	%xmm5,-16(%r11)
1393299481Sjkim	subq	88(%rsp),%r11
1394299481Sjkim	vmovdqu	%xmm4,32(%rbp)
1395299481Sjkim	vpxor	%xmm15,%xmm4,%xmm4
1396299481Sjkim	vmovdqu	128+48(%rsp),%xmm5
1397299481Sjkim	vmovups	%xmm6,-16(%r12)
1398299481Sjkim	subq	96(%rsp),%r12
1399299481Sjkim	vmovdqu	%xmm5,48(%rbp)
1400299481Sjkim	vpxor	%xmm15,%xmm5,%xmm5
1401299481Sjkim	vmovdqu	%xmm10,64(%rbp)
1402299481Sjkim	vpxor	%xmm10,%xmm15,%xmm6
1403299481Sjkim	vmovups	%xmm7,-16(%r13)
1404299481Sjkim	subq	104(%rsp),%r13
1405299481Sjkim	vmovdqu	%xmm11,80(%rbp)
1406299481Sjkim	vpxor	%xmm11,%xmm15,%xmm7
1407299481Sjkim	vmovups	%xmm8,-16(%r14)
1408299481Sjkim	subq	112(%rsp),%r14
1409299481Sjkim	vmovdqu	%xmm12,96(%rbp)
1410299481Sjkim	vpxor	%xmm12,%xmm15,%xmm8
1411299481Sjkim	vmovups	%xmm9,-16(%r15)
1412299481Sjkim	subq	120(%rsp),%r15
1413299481Sjkim	vmovdqu	%xmm13,112(%rbp)
1414299481Sjkim	vpxor	%xmm13,%xmm15,%xmm9
1415299481Sjkim
1416299481Sjkim	xorq	$128,%rbp
1417299481Sjkim	decl	%edx
1418299481Sjkim	jnz	.Loop_dec8x
1419299481Sjkim
1420299481Sjkim	movq	16(%rsp),%rax
1421299481Sjkim
1422299481Sjkim
1423299481Sjkim
1424299481Sjkim
1425299481Sjkim
1426299481Sjkim.Ldec8x_done:
1427299481Sjkim	vzeroupper
1428299481Sjkim	movq	-48(%rax),%r15
1429299481Sjkim	movq	-40(%rax),%r14
1430299481Sjkim	movq	-32(%rax),%r13
1431299481Sjkim	movq	-24(%rax),%r12
1432299481Sjkim	movq	-16(%rax),%rbp
1433299481Sjkim	movq	-8(%rax),%rbx
1434299481Sjkim	leaq	(%rax),%rsp
1435299481Sjkim.Ldec8x_epilogue:
1436299481Sjkim	.byte	0xf3,0xc3
1437299481Sjkim.size	aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
1438