1#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
2.text
3.extern	OPENSSL_ia32cap_P
4.hidden OPENSSL_ia32cap_P
5.globl	aes_hw_encrypt
6.hidden aes_hw_encrypt
7.type	aes_hw_encrypt,@function
8.align	16
9aes_hw_encrypt:
10	movups	(%rdi),%xmm2
11	movl	240(%rdx),%eax
12	movups	(%rdx),%xmm0
13	movups	16(%rdx),%xmm1
14	leaq	32(%rdx),%rdx
15	xorps	%xmm0,%xmm2
16.Loop_enc1_1:
17.byte	102,15,56,220,209
18	decl	%eax
19	movups	(%rdx),%xmm1
20	leaq	16(%rdx),%rdx
21	jnz	.Loop_enc1_1
22.byte	102,15,56,221,209
23	pxor	%xmm0,%xmm0
24	pxor	%xmm1,%xmm1
25	movups	%xmm2,(%rsi)
26	pxor	%xmm2,%xmm2
27	.byte	0xf3,0xc3
28.size	aes_hw_encrypt,.-aes_hw_encrypt
29
30.globl	aes_hw_decrypt
31.hidden aes_hw_decrypt
32.type	aes_hw_decrypt,@function
33.align	16
34aes_hw_decrypt:
35	movups	(%rdi),%xmm2
36	movl	240(%rdx),%eax
37	movups	(%rdx),%xmm0
38	movups	16(%rdx),%xmm1
39	leaq	32(%rdx),%rdx
40	xorps	%xmm0,%xmm2
41.Loop_dec1_2:
42.byte	102,15,56,222,209
43	decl	%eax
44	movups	(%rdx),%xmm1
45	leaq	16(%rdx),%rdx
46	jnz	.Loop_dec1_2
47.byte	102,15,56,223,209
48	pxor	%xmm0,%xmm0
49	pxor	%xmm1,%xmm1
50	movups	%xmm2,(%rsi)
51	pxor	%xmm2,%xmm2
52	.byte	0xf3,0xc3
53.size	aes_hw_decrypt, .-aes_hw_decrypt
54.type	_aesni_encrypt2,@function
55.align	16
56_aesni_encrypt2:
57	movups	(%rcx),%xmm0
58	shll	$4,%eax
59	movups	16(%rcx),%xmm1
60	xorps	%xmm0,%xmm2
61	xorps	%xmm0,%xmm3
62	movups	32(%rcx),%xmm0
63	leaq	32(%rcx,%rax,1),%rcx
64	negq	%rax
65	addq	$16,%rax
66
67.Lenc_loop2:
68.byte	102,15,56,220,209
69.byte	102,15,56,220,217
70	movups	(%rcx,%rax,1),%xmm1
71	addq	$32,%rax
72.byte	102,15,56,220,208
73.byte	102,15,56,220,216
74	movups	-16(%rcx,%rax,1),%xmm0
75	jnz	.Lenc_loop2
76
77.byte	102,15,56,220,209
78.byte	102,15,56,220,217
79.byte	102,15,56,221,208
80.byte	102,15,56,221,216
81	.byte	0xf3,0xc3
82.size	_aesni_encrypt2,.-_aesni_encrypt2
83.type	_aesni_decrypt2,@function
84.align	16
85_aesni_decrypt2:
86	movups	(%rcx),%xmm0
87	shll	$4,%eax
88	movups	16(%rcx),%xmm1
89	xorps	%xmm0,%xmm2
90	xorps	%xmm0,%xmm3
91	movups	32(%rcx),%xmm0
92	leaq	32(%rcx,%rax,1),%rcx
93	negq	%rax
94	addq	$16,%rax
95
96.Ldec_loop2:
97.byte	102,15,56,222,209
98.byte	102,15,56,222,217
99	movups	(%rcx,%rax,1),%xmm1
100	addq	$32,%rax
101.byte	102,15,56,222,208
102.byte	102,15,56,222,216
103	movups	-16(%rcx,%rax,1),%xmm0
104	jnz	.Ldec_loop2
105
106.byte	102,15,56,222,209
107.byte	102,15,56,222,217
108.byte	102,15,56,223,208
109.byte	102,15,56,223,216
110	.byte	0xf3,0xc3
111.size	_aesni_decrypt2,.-_aesni_decrypt2
112.type	_aesni_encrypt3,@function
113.align	16
114_aesni_encrypt3:
115	movups	(%rcx),%xmm0
116	shll	$4,%eax
117	movups	16(%rcx),%xmm1
118	xorps	%xmm0,%xmm2
119	xorps	%xmm0,%xmm3
120	xorps	%xmm0,%xmm4
121	movups	32(%rcx),%xmm0
122	leaq	32(%rcx,%rax,1),%rcx
123	negq	%rax
124	addq	$16,%rax
125
126.Lenc_loop3:
127.byte	102,15,56,220,209
128.byte	102,15,56,220,217
129.byte	102,15,56,220,225
130	movups	(%rcx,%rax,1),%xmm1
131	addq	$32,%rax
132.byte	102,15,56,220,208
133.byte	102,15,56,220,216
134.byte	102,15,56,220,224
135	movups	-16(%rcx,%rax,1),%xmm0
136	jnz	.Lenc_loop3
137
138.byte	102,15,56,220,209
139.byte	102,15,56,220,217
140.byte	102,15,56,220,225
141.byte	102,15,56,221,208
142.byte	102,15,56,221,216
143.byte	102,15,56,221,224
144	.byte	0xf3,0xc3
145.size	_aesni_encrypt3,.-_aesni_encrypt3
146.type	_aesni_decrypt3,@function
147.align	16
148_aesni_decrypt3:
149	movups	(%rcx),%xmm0
150	shll	$4,%eax
151	movups	16(%rcx),%xmm1
152	xorps	%xmm0,%xmm2
153	xorps	%xmm0,%xmm3
154	xorps	%xmm0,%xmm4
155	movups	32(%rcx),%xmm0
156	leaq	32(%rcx,%rax,1),%rcx
157	negq	%rax
158	addq	$16,%rax
159
160.Ldec_loop3:
161.byte	102,15,56,222,209
162.byte	102,15,56,222,217
163.byte	102,15,56,222,225
164	movups	(%rcx,%rax,1),%xmm1
165	addq	$32,%rax
166.byte	102,15,56,222,208
167.byte	102,15,56,222,216
168.byte	102,15,56,222,224
169	movups	-16(%rcx,%rax,1),%xmm0
170	jnz	.Ldec_loop3
171
172.byte	102,15,56,222,209
173.byte	102,15,56,222,217
174.byte	102,15,56,222,225
175.byte	102,15,56,223,208
176.byte	102,15,56,223,216
177.byte	102,15,56,223,224
178	.byte	0xf3,0xc3
179.size	_aesni_decrypt3,.-_aesni_decrypt3
180.type	_aesni_encrypt4,@function
181.align	16
182_aesni_encrypt4:
183	movups	(%rcx),%xmm0
184	shll	$4,%eax
185	movups	16(%rcx),%xmm1
186	xorps	%xmm0,%xmm2
187	xorps	%xmm0,%xmm3
188	xorps	%xmm0,%xmm4
189	xorps	%xmm0,%xmm5
190	movups	32(%rcx),%xmm0
191	leaq	32(%rcx,%rax,1),%rcx
192	negq	%rax
193.byte	0x0f,0x1f,0x00
194	addq	$16,%rax
195
196.Lenc_loop4:
197.byte	102,15,56,220,209
198.byte	102,15,56,220,217
199.byte	102,15,56,220,225
200.byte	102,15,56,220,233
201	movups	(%rcx,%rax,1),%xmm1
202	addq	$32,%rax
203.byte	102,15,56,220,208
204.byte	102,15,56,220,216
205.byte	102,15,56,220,224
206.byte	102,15,56,220,232
207	movups	-16(%rcx,%rax,1),%xmm0
208	jnz	.Lenc_loop4
209
210.byte	102,15,56,220,209
211.byte	102,15,56,220,217
212.byte	102,15,56,220,225
213.byte	102,15,56,220,233
214.byte	102,15,56,221,208
215.byte	102,15,56,221,216
216.byte	102,15,56,221,224
217.byte	102,15,56,221,232
218	.byte	0xf3,0xc3
219.size	_aesni_encrypt4,.-_aesni_encrypt4
220.type	_aesni_decrypt4,@function
221.align	16
222_aesni_decrypt4:
223	movups	(%rcx),%xmm0
224	shll	$4,%eax
225	movups	16(%rcx),%xmm1
226	xorps	%xmm0,%xmm2
227	xorps	%xmm0,%xmm3
228	xorps	%xmm0,%xmm4
229	xorps	%xmm0,%xmm5
230	movups	32(%rcx),%xmm0
231	leaq	32(%rcx,%rax,1),%rcx
232	negq	%rax
233.byte	0x0f,0x1f,0x00
234	addq	$16,%rax
235
236.Ldec_loop4:
237.byte	102,15,56,222,209
238.byte	102,15,56,222,217
239.byte	102,15,56,222,225
240.byte	102,15,56,222,233
241	movups	(%rcx,%rax,1),%xmm1
242	addq	$32,%rax
243.byte	102,15,56,222,208
244.byte	102,15,56,222,216
245.byte	102,15,56,222,224
246.byte	102,15,56,222,232
247	movups	-16(%rcx,%rax,1),%xmm0
248	jnz	.Ldec_loop4
249
250.byte	102,15,56,222,209
251.byte	102,15,56,222,217
252.byte	102,15,56,222,225
253.byte	102,15,56,222,233
254.byte	102,15,56,223,208
255.byte	102,15,56,223,216
256.byte	102,15,56,223,224
257.byte	102,15,56,223,232
258	.byte	0xf3,0xc3
259.size	_aesni_decrypt4,.-_aesni_decrypt4
260.type	_aesni_encrypt6,@function
261.align	16
262_aesni_encrypt6:
263	movups	(%rcx),%xmm0
264	shll	$4,%eax
265	movups	16(%rcx),%xmm1
266	xorps	%xmm0,%xmm2
267	pxor	%xmm0,%xmm3
268	pxor	%xmm0,%xmm4
269.byte	102,15,56,220,209
270	leaq	32(%rcx,%rax,1),%rcx
271	negq	%rax
272.byte	102,15,56,220,217
273	pxor	%xmm0,%xmm5
274	pxor	%xmm0,%xmm6
275.byte	102,15,56,220,225
276	pxor	%xmm0,%xmm7
277	movups	(%rcx,%rax,1),%xmm0
278	addq	$16,%rax
279	jmp	.Lenc_loop6_enter
280.align	16
281.Lenc_loop6:
282.byte	102,15,56,220,209
283.byte	102,15,56,220,217
284.byte	102,15,56,220,225
285.Lenc_loop6_enter:
286.byte	102,15,56,220,233
287.byte	102,15,56,220,241
288.byte	102,15,56,220,249
289	movups	(%rcx,%rax,1),%xmm1
290	addq	$32,%rax
291.byte	102,15,56,220,208
292.byte	102,15,56,220,216
293.byte	102,15,56,220,224
294.byte	102,15,56,220,232
295.byte	102,15,56,220,240
296.byte	102,15,56,220,248
297	movups	-16(%rcx,%rax,1),%xmm0
298	jnz	.Lenc_loop6
299
300.byte	102,15,56,220,209
301.byte	102,15,56,220,217
302.byte	102,15,56,220,225
303.byte	102,15,56,220,233
304.byte	102,15,56,220,241
305.byte	102,15,56,220,249
306.byte	102,15,56,221,208
307.byte	102,15,56,221,216
308.byte	102,15,56,221,224
309.byte	102,15,56,221,232
310.byte	102,15,56,221,240
311.byte	102,15,56,221,248
312	.byte	0xf3,0xc3
313.size	_aesni_encrypt6,.-_aesni_encrypt6
314.type	_aesni_decrypt6,@function
315.align	16
316_aesni_decrypt6:
317	movups	(%rcx),%xmm0
318	shll	$4,%eax
319	movups	16(%rcx),%xmm1
320	xorps	%xmm0,%xmm2
321	pxor	%xmm0,%xmm3
322	pxor	%xmm0,%xmm4
323.byte	102,15,56,222,209
324	leaq	32(%rcx,%rax,1),%rcx
325	negq	%rax
326.byte	102,15,56,222,217
327	pxor	%xmm0,%xmm5
328	pxor	%xmm0,%xmm6
329.byte	102,15,56,222,225
330	pxor	%xmm0,%xmm7
331	movups	(%rcx,%rax,1),%xmm0
332	addq	$16,%rax
333	jmp	.Ldec_loop6_enter
334.align	16
335.Ldec_loop6:
336.byte	102,15,56,222,209
337.byte	102,15,56,222,217
338.byte	102,15,56,222,225
339.Ldec_loop6_enter:
340.byte	102,15,56,222,233
341.byte	102,15,56,222,241
342.byte	102,15,56,222,249
343	movups	(%rcx,%rax,1),%xmm1
344	addq	$32,%rax
345.byte	102,15,56,222,208
346.byte	102,15,56,222,216
347.byte	102,15,56,222,224
348.byte	102,15,56,222,232
349.byte	102,15,56,222,240
350.byte	102,15,56,222,248
351	movups	-16(%rcx,%rax,1),%xmm0
352	jnz	.Ldec_loop6
353
354.byte	102,15,56,222,209
355.byte	102,15,56,222,217
356.byte	102,15,56,222,225
357.byte	102,15,56,222,233
358.byte	102,15,56,222,241
359.byte	102,15,56,222,249
360.byte	102,15,56,223,208
361.byte	102,15,56,223,216
362.byte	102,15,56,223,224
363.byte	102,15,56,223,232
364.byte	102,15,56,223,240
365.byte	102,15,56,223,248
366	.byte	0xf3,0xc3
367.size	_aesni_decrypt6,.-_aesni_decrypt6
368.type	_aesni_encrypt8,@function
369.align	16
370_aesni_encrypt8:
371	movups	(%rcx),%xmm0
372	shll	$4,%eax
373	movups	16(%rcx),%xmm1
374	xorps	%xmm0,%xmm2
375	xorps	%xmm0,%xmm3
376	pxor	%xmm0,%xmm4
377	pxor	%xmm0,%xmm5
378	pxor	%xmm0,%xmm6
379	leaq	32(%rcx,%rax,1),%rcx
380	negq	%rax
381.byte	102,15,56,220,209
382	pxor	%xmm0,%xmm7
383	pxor	%xmm0,%xmm8
384.byte	102,15,56,220,217
385	pxor	%xmm0,%xmm9
386	movups	(%rcx,%rax,1),%xmm0
387	addq	$16,%rax
388	jmp	.Lenc_loop8_inner
389.align	16
390.Lenc_loop8:
391.byte	102,15,56,220,209
392.byte	102,15,56,220,217
393.Lenc_loop8_inner:
394.byte	102,15,56,220,225
395.byte	102,15,56,220,233
396.byte	102,15,56,220,241
397.byte	102,15,56,220,249
398.byte	102,68,15,56,220,193
399.byte	102,68,15,56,220,201
400.Lenc_loop8_enter:
401	movups	(%rcx,%rax,1),%xmm1
402	addq	$32,%rax
403.byte	102,15,56,220,208
404.byte	102,15,56,220,216
405.byte	102,15,56,220,224
406.byte	102,15,56,220,232
407.byte	102,15,56,220,240
408.byte	102,15,56,220,248
409.byte	102,68,15,56,220,192
410.byte	102,68,15,56,220,200
411	movups	-16(%rcx,%rax,1),%xmm0
412	jnz	.Lenc_loop8
413
414.byte	102,15,56,220,209
415.byte	102,15,56,220,217
416.byte	102,15,56,220,225
417.byte	102,15,56,220,233
418.byte	102,15,56,220,241
419.byte	102,15,56,220,249
420.byte	102,68,15,56,220,193
421.byte	102,68,15,56,220,201
422.byte	102,15,56,221,208
423.byte	102,15,56,221,216
424.byte	102,15,56,221,224
425.byte	102,15,56,221,232
426.byte	102,15,56,221,240
427.byte	102,15,56,221,248
428.byte	102,68,15,56,221,192
429.byte	102,68,15,56,221,200
430	.byte	0xf3,0xc3
431.size	_aesni_encrypt8,.-_aesni_encrypt8
432.type	_aesni_decrypt8,@function
433.align	16
434_aesni_decrypt8:
435	movups	(%rcx),%xmm0
436	shll	$4,%eax
437	movups	16(%rcx),%xmm1
438	xorps	%xmm0,%xmm2
439	xorps	%xmm0,%xmm3
440	pxor	%xmm0,%xmm4
441	pxor	%xmm0,%xmm5
442	pxor	%xmm0,%xmm6
443	leaq	32(%rcx,%rax,1),%rcx
444	negq	%rax
445.byte	102,15,56,222,209
446	pxor	%xmm0,%xmm7
447	pxor	%xmm0,%xmm8
448.byte	102,15,56,222,217
449	pxor	%xmm0,%xmm9
450	movups	(%rcx,%rax,1),%xmm0
451	addq	$16,%rax
452	jmp	.Ldec_loop8_inner
453.align	16
454.Ldec_loop8:
455.byte	102,15,56,222,209
456.byte	102,15,56,222,217
457.Ldec_loop8_inner:
458.byte	102,15,56,222,225
459.byte	102,15,56,222,233
460.byte	102,15,56,222,241
461.byte	102,15,56,222,249
462.byte	102,68,15,56,222,193
463.byte	102,68,15,56,222,201
464.Ldec_loop8_enter:
465	movups	(%rcx,%rax,1),%xmm1
466	addq	$32,%rax
467.byte	102,15,56,222,208
468.byte	102,15,56,222,216
469.byte	102,15,56,222,224
470.byte	102,15,56,222,232
471.byte	102,15,56,222,240
472.byte	102,15,56,222,248
473.byte	102,68,15,56,222,192
474.byte	102,68,15,56,222,200
475	movups	-16(%rcx,%rax,1),%xmm0
476	jnz	.Ldec_loop8
477
478.byte	102,15,56,222,209
479.byte	102,15,56,222,217
480.byte	102,15,56,222,225
481.byte	102,15,56,222,233
482.byte	102,15,56,222,241
483.byte	102,15,56,222,249
484.byte	102,68,15,56,222,193
485.byte	102,68,15,56,222,201
486.byte	102,15,56,223,208
487.byte	102,15,56,223,216
488.byte	102,15,56,223,224
489.byte	102,15,56,223,232
490.byte	102,15,56,223,240
491.byte	102,15,56,223,248
492.byte	102,68,15,56,223,192
493.byte	102,68,15,56,223,200
494	.byte	0xf3,0xc3
495.size	_aesni_decrypt8,.-_aesni_decrypt8
496.globl	aes_hw_ecb_encrypt
497.hidden aes_hw_ecb_encrypt
498.type	aes_hw_ecb_encrypt,@function
499.align	16
500aes_hw_ecb_encrypt:
501	andq	$-16,%rdx
502	jz	.Lecb_ret
503
504	movl	240(%rcx),%eax
505	movups	(%rcx),%xmm0
506	movq	%rcx,%r11
507	movl	%eax,%r10d
508	testl	%r8d,%r8d
509	jz	.Lecb_decrypt
510
511	cmpq	$0x80,%rdx
512	jb	.Lecb_enc_tail
513
514	movdqu	(%rdi),%xmm2
515	movdqu	16(%rdi),%xmm3
516	movdqu	32(%rdi),%xmm4
517	movdqu	48(%rdi),%xmm5
518	movdqu	64(%rdi),%xmm6
519	movdqu	80(%rdi),%xmm7
520	movdqu	96(%rdi),%xmm8
521	movdqu	112(%rdi),%xmm9
522	leaq	128(%rdi),%rdi
523	subq	$0x80,%rdx
524	jmp	.Lecb_enc_loop8_enter
525.align	16
526.Lecb_enc_loop8:
527	movups	%xmm2,(%rsi)
528	movq	%r11,%rcx
529	movdqu	(%rdi),%xmm2
530	movl	%r10d,%eax
531	movups	%xmm3,16(%rsi)
532	movdqu	16(%rdi),%xmm3
533	movups	%xmm4,32(%rsi)
534	movdqu	32(%rdi),%xmm4
535	movups	%xmm5,48(%rsi)
536	movdqu	48(%rdi),%xmm5
537	movups	%xmm6,64(%rsi)
538	movdqu	64(%rdi),%xmm6
539	movups	%xmm7,80(%rsi)
540	movdqu	80(%rdi),%xmm7
541	movups	%xmm8,96(%rsi)
542	movdqu	96(%rdi),%xmm8
543	movups	%xmm9,112(%rsi)
544	leaq	128(%rsi),%rsi
545	movdqu	112(%rdi),%xmm9
546	leaq	128(%rdi),%rdi
547.Lecb_enc_loop8_enter:
548
549	call	_aesni_encrypt8
550
551	subq	$0x80,%rdx
552	jnc	.Lecb_enc_loop8
553
554	movups	%xmm2,(%rsi)
555	movq	%r11,%rcx
556	movups	%xmm3,16(%rsi)
557	movl	%r10d,%eax
558	movups	%xmm4,32(%rsi)
559	movups	%xmm5,48(%rsi)
560	movups	%xmm6,64(%rsi)
561	movups	%xmm7,80(%rsi)
562	movups	%xmm8,96(%rsi)
563	movups	%xmm9,112(%rsi)
564	leaq	128(%rsi),%rsi
565	addq	$0x80,%rdx
566	jz	.Lecb_ret
567
568.Lecb_enc_tail:
569	movups	(%rdi),%xmm2
570	cmpq	$0x20,%rdx
571	jb	.Lecb_enc_one
572	movups	16(%rdi),%xmm3
573	je	.Lecb_enc_two
574	movups	32(%rdi),%xmm4
575	cmpq	$0x40,%rdx
576	jb	.Lecb_enc_three
577	movups	48(%rdi),%xmm5
578	je	.Lecb_enc_four
579	movups	64(%rdi),%xmm6
580	cmpq	$0x60,%rdx
581	jb	.Lecb_enc_five
582	movups	80(%rdi),%xmm7
583	je	.Lecb_enc_six
584	movdqu	96(%rdi),%xmm8
585	xorps	%xmm9,%xmm9
586	call	_aesni_encrypt8
587	movups	%xmm2,(%rsi)
588	movups	%xmm3,16(%rsi)
589	movups	%xmm4,32(%rsi)
590	movups	%xmm5,48(%rsi)
591	movups	%xmm6,64(%rsi)
592	movups	%xmm7,80(%rsi)
593	movups	%xmm8,96(%rsi)
594	jmp	.Lecb_ret
595.align	16
596.Lecb_enc_one:
597	movups	(%rcx),%xmm0
598	movups	16(%rcx),%xmm1
599	leaq	32(%rcx),%rcx
600	xorps	%xmm0,%xmm2
601.Loop_enc1_3:
602.byte	102,15,56,220,209
603	decl	%eax
604	movups	(%rcx),%xmm1
605	leaq	16(%rcx),%rcx
606	jnz	.Loop_enc1_3
607.byte	102,15,56,221,209
608	movups	%xmm2,(%rsi)
609	jmp	.Lecb_ret
610.align	16
611.Lecb_enc_two:
612	call	_aesni_encrypt2
613	movups	%xmm2,(%rsi)
614	movups	%xmm3,16(%rsi)
615	jmp	.Lecb_ret
616.align	16
617.Lecb_enc_three:
618	call	_aesni_encrypt3
619	movups	%xmm2,(%rsi)
620	movups	%xmm3,16(%rsi)
621	movups	%xmm4,32(%rsi)
622	jmp	.Lecb_ret
623.align	16
624.Lecb_enc_four:
625	call	_aesni_encrypt4
626	movups	%xmm2,(%rsi)
627	movups	%xmm3,16(%rsi)
628	movups	%xmm4,32(%rsi)
629	movups	%xmm5,48(%rsi)
630	jmp	.Lecb_ret
631.align	16
632.Lecb_enc_five:
633	xorps	%xmm7,%xmm7
634	call	_aesni_encrypt6
635	movups	%xmm2,(%rsi)
636	movups	%xmm3,16(%rsi)
637	movups	%xmm4,32(%rsi)
638	movups	%xmm5,48(%rsi)
639	movups	%xmm6,64(%rsi)
640	jmp	.Lecb_ret
641.align	16
642.Lecb_enc_six:
643	call	_aesni_encrypt6
644	movups	%xmm2,(%rsi)
645	movups	%xmm3,16(%rsi)
646	movups	%xmm4,32(%rsi)
647	movups	%xmm5,48(%rsi)
648	movups	%xmm6,64(%rsi)
649	movups	%xmm7,80(%rsi)
650	jmp	.Lecb_ret
651
652.align	16
653.Lecb_decrypt:
654	cmpq	$0x80,%rdx
655	jb	.Lecb_dec_tail
656
657	movdqu	(%rdi),%xmm2
658	movdqu	16(%rdi),%xmm3
659	movdqu	32(%rdi),%xmm4
660	movdqu	48(%rdi),%xmm5
661	movdqu	64(%rdi),%xmm6
662	movdqu	80(%rdi),%xmm7
663	movdqu	96(%rdi),%xmm8
664	movdqu	112(%rdi),%xmm9
665	leaq	128(%rdi),%rdi
666	subq	$0x80,%rdx
667	jmp	.Lecb_dec_loop8_enter
668.align	16
669.Lecb_dec_loop8:
670	movups	%xmm2,(%rsi)
671	movq	%r11,%rcx
672	movdqu	(%rdi),%xmm2
673	movl	%r10d,%eax
674	movups	%xmm3,16(%rsi)
675	movdqu	16(%rdi),%xmm3
676	movups	%xmm4,32(%rsi)
677	movdqu	32(%rdi),%xmm4
678	movups	%xmm5,48(%rsi)
679	movdqu	48(%rdi),%xmm5
680	movups	%xmm6,64(%rsi)
681	movdqu	64(%rdi),%xmm6
682	movups	%xmm7,80(%rsi)
683	movdqu	80(%rdi),%xmm7
684	movups	%xmm8,96(%rsi)
685	movdqu	96(%rdi),%xmm8
686	movups	%xmm9,112(%rsi)
687	leaq	128(%rsi),%rsi
688	movdqu	112(%rdi),%xmm9
689	leaq	128(%rdi),%rdi
690.Lecb_dec_loop8_enter:
691
692	call	_aesni_decrypt8
693
694	movups	(%r11),%xmm0
695	subq	$0x80,%rdx
696	jnc	.Lecb_dec_loop8
697
698	movups	%xmm2,(%rsi)
699	pxor	%xmm2,%xmm2
700	movq	%r11,%rcx
701	movups	%xmm3,16(%rsi)
702	pxor	%xmm3,%xmm3
703	movl	%r10d,%eax
704	movups	%xmm4,32(%rsi)
705	pxor	%xmm4,%xmm4
706	movups	%xmm5,48(%rsi)
707	pxor	%xmm5,%xmm5
708	movups	%xmm6,64(%rsi)
709	pxor	%xmm6,%xmm6
710	movups	%xmm7,80(%rsi)
711	pxor	%xmm7,%xmm7
712	movups	%xmm8,96(%rsi)
713	pxor	%xmm8,%xmm8
714	movups	%xmm9,112(%rsi)
715	pxor	%xmm9,%xmm9
716	leaq	128(%rsi),%rsi
717	addq	$0x80,%rdx
718	jz	.Lecb_ret
719
720.Lecb_dec_tail:
721	movups	(%rdi),%xmm2
722	cmpq	$0x20,%rdx
723	jb	.Lecb_dec_one
724	movups	16(%rdi),%xmm3
725	je	.Lecb_dec_two
726	movups	32(%rdi),%xmm4
727	cmpq	$0x40,%rdx
728	jb	.Lecb_dec_three
729	movups	48(%rdi),%xmm5
730	je	.Lecb_dec_four
731	movups	64(%rdi),%xmm6
732	cmpq	$0x60,%rdx
733	jb	.Lecb_dec_five
734	movups	80(%rdi),%xmm7
735	je	.Lecb_dec_six
736	movups	96(%rdi),%xmm8
737	movups	(%rcx),%xmm0
738	xorps	%xmm9,%xmm9
739	call	_aesni_decrypt8
740	movups	%xmm2,(%rsi)
741	pxor	%xmm2,%xmm2
742	movups	%xmm3,16(%rsi)
743	pxor	%xmm3,%xmm3
744	movups	%xmm4,32(%rsi)
745	pxor	%xmm4,%xmm4
746	movups	%xmm5,48(%rsi)
747	pxor	%xmm5,%xmm5
748	movups	%xmm6,64(%rsi)
749	pxor	%xmm6,%xmm6
750	movups	%xmm7,80(%rsi)
751	pxor	%xmm7,%xmm7
752	movups	%xmm8,96(%rsi)
753	pxor	%xmm8,%xmm8
754	pxor	%xmm9,%xmm9
755	jmp	.Lecb_ret
756.align	16
757.Lecb_dec_one:
758	movups	(%rcx),%xmm0
759	movups	16(%rcx),%xmm1
760	leaq	32(%rcx),%rcx
761	xorps	%xmm0,%xmm2
762.Loop_dec1_4:
763.byte	102,15,56,222,209
764	decl	%eax
765	movups	(%rcx),%xmm1
766	leaq	16(%rcx),%rcx
767	jnz	.Loop_dec1_4
768.byte	102,15,56,223,209
769	movups	%xmm2,(%rsi)
770	pxor	%xmm2,%xmm2
771	jmp	.Lecb_ret
772.align	16
773.Lecb_dec_two:
774	call	_aesni_decrypt2
775	movups	%xmm2,(%rsi)
776	pxor	%xmm2,%xmm2
777	movups	%xmm3,16(%rsi)
778	pxor	%xmm3,%xmm3
779	jmp	.Lecb_ret
780.align	16
781.Lecb_dec_three:
782	call	_aesni_decrypt3
783	movups	%xmm2,(%rsi)
784	pxor	%xmm2,%xmm2
785	movups	%xmm3,16(%rsi)
786	pxor	%xmm3,%xmm3
787	movups	%xmm4,32(%rsi)
788	pxor	%xmm4,%xmm4
789	jmp	.Lecb_ret
790.align	16
791.Lecb_dec_four:
792	call	_aesni_decrypt4
793	movups	%xmm2,(%rsi)
794	pxor	%xmm2,%xmm2
795	movups	%xmm3,16(%rsi)
796	pxor	%xmm3,%xmm3
797	movups	%xmm4,32(%rsi)
798	pxor	%xmm4,%xmm4
799	movups	%xmm5,48(%rsi)
800	pxor	%xmm5,%xmm5
801	jmp	.Lecb_ret
802.align	16
803.Lecb_dec_five:
804	xorps	%xmm7,%xmm7
805	call	_aesni_decrypt6
806	movups	%xmm2,(%rsi)
807	pxor	%xmm2,%xmm2
808	movups	%xmm3,16(%rsi)
809	pxor	%xmm3,%xmm3
810	movups	%xmm4,32(%rsi)
811	pxor	%xmm4,%xmm4
812	movups	%xmm5,48(%rsi)
813	pxor	%xmm5,%xmm5
814	movups	%xmm6,64(%rsi)
815	pxor	%xmm6,%xmm6
816	pxor	%xmm7,%xmm7
817	jmp	.Lecb_ret
818.align	16
819.Lecb_dec_six:
820	call	_aesni_decrypt6
821	movups	%xmm2,(%rsi)
822	pxor	%xmm2,%xmm2
823	movups	%xmm3,16(%rsi)
824	pxor	%xmm3,%xmm3
825	movups	%xmm4,32(%rsi)
826	pxor	%xmm4,%xmm4
827	movups	%xmm5,48(%rsi)
828	pxor	%xmm5,%xmm5
829	movups	%xmm6,64(%rsi)
830	pxor	%xmm6,%xmm6
831	movups	%xmm7,80(%rsi)
832	pxor	%xmm7,%xmm7
833
834.Lecb_ret:
835	xorps	%xmm0,%xmm0
836	pxor	%xmm1,%xmm1
837	.byte	0xf3,0xc3
838.size	aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
839.globl	aes_hw_ccm64_encrypt_blocks
840.hidden aes_hw_ccm64_encrypt_blocks
841.type	aes_hw_ccm64_encrypt_blocks,@function
842.align	16
843aes_hw_ccm64_encrypt_blocks:
844	movl	240(%rcx),%eax
845	movdqu	(%r8),%xmm6
846	movdqa	.Lincrement64(%rip),%xmm9
847	movdqa	.Lbswap_mask(%rip),%xmm7
848
849	shll	$4,%eax
850	movl	$16,%r10d
851	leaq	0(%rcx),%r11
852	movdqu	(%r9),%xmm3
853	movdqa	%xmm6,%xmm2
854	leaq	32(%rcx,%rax,1),%rcx
855.byte	102,15,56,0,247
856	subq	%rax,%r10
857	jmp	.Lccm64_enc_outer
858.align	16
859.Lccm64_enc_outer:
860	movups	(%r11),%xmm0
861	movq	%r10,%rax
862	movups	(%rdi),%xmm8
863
864	xorps	%xmm0,%xmm2
865	movups	16(%r11),%xmm1
866	xorps	%xmm8,%xmm0
867	xorps	%xmm0,%xmm3
868	movups	32(%r11),%xmm0
869
870.Lccm64_enc2_loop:
871.byte	102,15,56,220,209
872.byte	102,15,56,220,217
873	movups	(%rcx,%rax,1),%xmm1
874	addq	$32,%rax
875.byte	102,15,56,220,208
876.byte	102,15,56,220,216
877	movups	-16(%rcx,%rax,1),%xmm0
878	jnz	.Lccm64_enc2_loop
879.byte	102,15,56,220,209
880.byte	102,15,56,220,217
881	paddq	%xmm9,%xmm6
882	decq	%rdx
883.byte	102,15,56,221,208
884.byte	102,15,56,221,216
885
886	leaq	16(%rdi),%rdi
887	xorps	%xmm2,%xmm8
888	movdqa	%xmm6,%xmm2
889	movups	%xmm8,(%rsi)
890.byte	102,15,56,0,215
891	leaq	16(%rsi),%rsi
892	jnz	.Lccm64_enc_outer
893
894	pxor	%xmm0,%xmm0
895	pxor	%xmm1,%xmm1
896	pxor	%xmm2,%xmm2
897	movups	%xmm3,(%r9)
898	pxor	%xmm3,%xmm3
899	pxor	%xmm8,%xmm8
900	pxor	%xmm6,%xmm6
901	.byte	0xf3,0xc3
902.size	aes_hw_ccm64_encrypt_blocks,.-aes_hw_ccm64_encrypt_blocks
903.globl	aes_hw_ccm64_decrypt_blocks
904.hidden aes_hw_ccm64_decrypt_blocks
905.type	aes_hw_ccm64_decrypt_blocks,@function
906.align	16
907aes_hw_ccm64_decrypt_blocks:
908	movl	240(%rcx),%eax
909	movups	(%r8),%xmm6
910	movdqu	(%r9),%xmm3
911	movdqa	.Lincrement64(%rip),%xmm9
912	movdqa	.Lbswap_mask(%rip),%xmm7
913
914	movaps	%xmm6,%xmm2
915	movl	%eax,%r10d
916	movq	%rcx,%r11
917.byte	102,15,56,0,247
918	movups	(%rcx),%xmm0
919	movups	16(%rcx),%xmm1
920	leaq	32(%rcx),%rcx
921	xorps	%xmm0,%xmm2
922.Loop_enc1_5:
923.byte	102,15,56,220,209
924	decl	%eax
925	movups	(%rcx),%xmm1
926	leaq	16(%rcx),%rcx
927	jnz	.Loop_enc1_5
928.byte	102,15,56,221,209
929	shll	$4,%r10d
930	movl	$16,%eax
931	movups	(%rdi),%xmm8
932	paddq	%xmm9,%xmm6
933	leaq	16(%rdi),%rdi
934	subq	%r10,%rax
935	leaq	32(%r11,%r10,1),%rcx
936	movq	%rax,%r10
937	jmp	.Lccm64_dec_outer
938.align	16
939.Lccm64_dec_outer:
940	xorps	%xmm2,%xmm8
941	movdqa	%xmm6,%xmm2
942	movups	%xmm8,(%rsi)
943	leaq	16(%rsi),%rsi
944.byte	102,15,56,0,215
945
946	subq	$1,%rdx
947	jz	.Lccm64_dec_break
948
949	movups	(%r11),%xmm0
950	movq	%r10,%rax
951	movups	16(%r11),%xmm1
952	xorps	%xmm0,%xmm8
953	xorps	%xmm0,%xmm2
954	xorps	%xmm8,%xmm3
955	movups	32(%r11),%xmm0
956	jmp	.Lccm64_dec2_loop
957.align	16
958.Lccm64_dec2_loop:
959.byte	102,15,56,220,209
960.byte	102,15,56,220,217
961	movups	(%rcx,%rax,1),%xmm1
962	addq	$32,%rax
963.byte	102,15,56,220,208
964.byte	102,15,56,220,216
965	movups	-16(%rcx,%rax,1),%xmm0
966	jnz	.Lccm64_dec2_loop
967	movups	(%rdi),%xmm8
968	paddq	%xmm9,%xmm6
969.byte	102,15,56,220,209
970.byte	102,15,56,220,217
971.byte	102,15,56,221,208
972.byte	102,15,56,221,216
973	leaq	16(%rdi),%rdi
974	jmp	.Lccm64_dec_outer
975
976.align	16
977.Lccm64_dec_break:
978
979	movl	240(%r11),%eax
980	movups	(%r11),%xmm0
981	movups	16(%r11),%xmm1
982	xorps	%xmm0,%xmm8
983	leaq	32(%r11),%r11
984	xorps	%xmm8,%xmm3
985.Loop_enc1_6:
986.byte	102,15,56,220,217
987	decl	%eax
988	movups	(%r11),%xmm1
989	leaq	16(%r11),%r11
990	jnz	.Loop_enc1_6
991.byte	102,15,56,221,217
992	pxor	%xmm0,%xmm0
993	pxor	%xmm1,%xmm1
994	pxor	%xmm2,%xmm2
995	movups	%xmm3,(%r9)
996	pxor	%xmm3,%xmm3
997	pxor	%xmm8,%xmm8
998	pxor	%xmm6,%xmm6
999	.byte	0xf3,0xc3
1000.size	aes_hw_ccm64_decrypt_blocks,.-aes_hw_ccm64_decrypt_blocks
1001.globl	aes_hw_ctr32_encrypt_blocks
1002.hidden aes_hw_ctr32_encrypt_blocks
1003.type	aes_hw_ctr32_encrypt_blocks,@function
1004.align	16
1005aes_hw_ctr32_encrypt_blocks:
1006.cfi_startproc
1007	cmpq	$1,%rdx
1008	jne	.Lctr32_bulk
1009
1010
1011
1012	movups	(%r8),%xmm2
1013	movups	(%rdi),%xmm3
1014	movl	240(%rcx),%edx
1015	movups	(%rcx),%xmm0
1016	movups	16(%rcx),%xmm1
1017	leaq	32(%rcx),%rcx
1018	xorps	%xmm0,%xmm2
1019.Loop_enc1_7:
1020.byte	102,15,56,220,209
1021	decl	%edx
1022	movups	(%rcx),%xmm1
1023	leaq	16(%rcx),%rcx
1024	jnz	.Loop_enc1_7
1025.byte	102,15,56,221,209
1026	pxor	%xmm0,%xmm0
1027	pxor	%xmm1,%xmm1
1028	xorps	%xmm3,%xmm2
1029	pxor	%xmm3,%xmm3
1030	movups	%xmm2,(%rsi)
1031	xorps	%xmm2,%xmm2
1032	jmp	.Lctr32_epilogue
1033
1034.align	16
1035.Lctr32_bulk:
1036	leaq	(%rsp),%r11
1037.cfi_def_cfa_register	%r11
1038	pushq	%rbp
1039.cfi_offset	%rbp,-16
1040	subq	$128,%rsp
1041	andq	$-16,%rsp
1042
1043
1044
1045
1046	movdqu	(%r8),%xmm2
1047	movdqu	(%rcx),%xmm0
1048	movl	12(%r8),%r8d
1049	pxor	%xmm0,%xmm2
1050	movl	12(%rcx),%ebp
1051	movdqa	%xmm2,0(%rsp)
1052	bswapl	%r8d
1053	movdqa	%xmm2,%xmm3
1054	movdqa	%xmm2,%xmm4
1055	movdqa	%xmm2,%xmm5
1056	movdqa	%xmm2,64(%rsp)
1057	movdqa	%xmm2,80(%rsp)
1058	movdqa	%xmm2,96(%rsp)
1059	movq	%rdx,%r10
1060	movdqa	%xmm2,112(%rsp)
1061
1062	leaq	1(%r8),%rax
1063	leaq	2(%r8),%rdx
1064	bswapl	%eax
1065	bswapl	%edx
1066	xorl	%ebp,%eax
1067	xorl	%ebp,%edx
1068.byte	102,15,58,34,216,3
1069	leaq	3(%r8),%rax
1070	movdqa	%xmm3,16(%rsp)
1071.byte	102,15,58,34,226,3
1072	bswapl	%eax
1073	movq	%r10,%rdx
1074	leaq	4(%r8),%r10
1075	movdqa	%xmm4,32(%rsp)
1076	xorl	%ebp,%eax
1077	bswapl	%r10d
1078.byte	102,15,58,34,232,3
1079	xorl	%ebp,%r10d
1080	movdqa	%xmm5,48(%rsp)
1081	leaq	5(%r8),%r9
1082	movl	%r10d,64+12(%rsp)
1083	bswapl	%r9d
1084	leaq	6(%r8),%r10
1085	movl	240(%rcx),%eax
1086	xorl	%ebp,%r9d
1087	bswapl	%r10d
1088	movl	%r9d,80+12(%rsp)
1089	xorl	%ebp,%r10d
1090	leaq	7(%r8),%r9
1091	movl	%r10d,96+12(%rsp)
1092	bswapl	%r9d
1093	leaq	OPENSSL_ia32cap_P(%rip),%r10
1094	movl	4(%r10),%r10d
1095	xorl	%ebp,%r9d
1096	andl	$71303168,%r10d
1097	movl	%r9d,112+12(%rsp)
1098
1099	movups	16(%rcx),%xmm1
1100
1101	movdqa	64(%rsp),%xmm6
1102	movdqa	80(%rsp),%xmm7
1103
1104	cmpq	$8,%rdx
1105	jb	.Lctr32_tail
1106
1107	subq	$6,%rdx
1108	cmpl	$4194304,%r10d
1109	je	.Lctr32_6x
1110
1111	leaq	128(%rcx),%rcx
1112	subq	$2,%rdx
1113	jmp	.Lctr32_loop8
1114
1115.align	16
1116.Lctr32_6x:
1117	shll	$4,%eax
1118	movl	$48,%r10d
1119	bswapl	%ebp
1120	leaq	32(%rcx,%rax,1),%rcx
1121	subq	%rax,%r10
1122	jmp	.Lctr32_loop6
1123
1124.align	16
1125.Lctr32_loop6:
1126	addl	$6,%r8d
1127	movups	-48(%rcx,%r10,1),%xmm0
1128.byte	102,15,56,220,209
1129	movl	%r8d,%eax
1130	xorl	%ebp,%eax
1131.byte	102,15,56,220,217
1132.byte	0x0f,0x38,0xf1,0x44,0x24,12
1133	leal	1(%r8),%eax
1134.byte	102,15,56,220,225
1135	xorl	%ebp,%eax
1136.byte	0x0f,0x38,0xf1,0x44,0x24,28
1137.byte	102,15,56,220,233
1138	leal	2(%r8),%eax
1139	xorl	%ebp,%eax
1140.byte	102,15,56,220,241
1141.byte	0x0f,0x38,0xf1,0x44,0x24,44
1142	leal	3(%r8),%eax
1143.byte	102,15,56,220,249
1144	movups	-32(%rcx,%r10,1),%xmm1
1145	xorl	%ebp,%eax
1146
1147.byte	102,15,56,220,208
1148.byte	0x0f,0x38,0xf1,0x44,0x24,60
1149	leal	4(%r8),%eax
1150.byte	102,15,56,220,216
1151	xorl	%ebp,%eax
1152.byte	0x0f,0x38,0xf1,0x44,0x24,76
1153.byte	102,15,56,220,224
1154	leal	5(%r8),%eax
1155	xorl	%ebp,%eax
1156.byte	102,15,56,220,232
1157.byte	0x0f,0x38,0xf1,0x44,0x24,92
1158	movq	%r10,%rax
1159.byte	102,15,56,220,240
1160.byte	102,15,56,220,248
1161	movups	-16(%rcx,%r10,1),%xmm0
1162
1163	call	.Lenc_loop6
1164
1165	movdqu	(%rdi),%xmm8
1166	movdqu	16(%rdi),%xmm9
1167	movdqu	32(%rdi),%xmm10
1168	movdqu	48(%rdi),%xmm11
1169	movdqu	64(%rdi),%xmm12
1170	movdqu	80(%rdi),%xmm13
1171	leaq	96(%rdi),%rdi
1172	movups	-64(%rcx,%r10,1),%xmm1
1173	pxor	%xmm2,%xmm8
1174	movaps	0(%rsp),%xmm2
1175	pxor	%xmm3,%xmm9
1176	movaps	16(%rsp),%xmm3
1177	pxor	%xmm4,%xmm10
1178	movaps	32(%rsp),%xmm4
1179	pxor	%xmm5,%xmm11
1180	movaps	48(%rsp),%xmm5
1181	pxor	%xmm6,%xmm12
1182	movaps	64(%rsp),%xmm6
1183	pxor	%xmm7,%xmm13
1184	movaps	80(%rsp),%xmm7
1185	movdqu	%xmm8,(%rsi)
1186	movdqu	%xmm9,16(%rsi)
1187	movdqu	%xmm10,32(%rsi)
1188	movdqu	%xmm11,48(%rsi)
1189	movdqu	%xmm12,64(%rsi)
1190	movdqu	%xmm13,80(%rsi)
1191	leaq	96(%rsi),%rsi
1192
1193	subq	$6,%rdx
1194	jnc	.Lctr32_loop6
1195
1196	addq	$6,%rdx
1197	jz	.Lctr32_done
1198
1199	leal	-48(%r10),%eax
1200	leaq	-80(%rcx,%r10,1),%rcx
1201	negl	%eax
1202	shrl	$4,%eax
1203	jmp	.Lctr32_tail
1204
1205.align	32
1206.Lctr32_loop8:
1207	addl	$8,%r8d
1208	movdqa	96(%rsp),%xmm8
1209.byte	102,15,56,220,209
1210	movl	%r8d,%r9d
1211	movdqa	112(%rsp),%xmm9
1212.byte	102,15,56,220,217
1213	bswapl	%r9d
1214	movups	32-128(%rcx),%xmm0
1215.byte	102,15,56,220,225
1216	xorl	%ebp,%r9d
1217	nop
1218.byte	102,15,56,220,233
1219	movl	%r9d,0+12(%rsp)
1220	leaq	1(%r8),%r9
1221.byte	102,15,56,220,241
1222.byte	102,15,56,220,249
1223.byte	102,68,15,56,220,193
1224.byte	102,68,15,56,220,201
1225	movups	48-128(%rcx),%xmm1
1226	bswapl	%r9d
1227.byte	102,15,56,220,208
1228.byte	102,15,56,220,216
1229	xorl	%ebp,%r9d
1230.byte	0x66,0x90
1231.byte	102,15,56,220,224
1232.byte	102,15,56,220,232
1233	movl	%r9d,16+12(%rsp)
1234	leaq	2(%r8),%r9
1235.byte	102,15,56,220,240
1236.byte	102,15,56,220,248
1237.byte	102,68,15,56,220,192
1238.byte	102,68,15,56,220,200
1239	movups	64-128(%rcx),%xmm0
1240	bswapl	%r9d
1241.byte	102,15,56,220,209
1242.byte	102,15,56,220,217
1243	xorl	%ebp,%r9d
1244.byte	0x66,0x90
1245.byte	102,15,56,220,225
1246.byte	102,15,56,220,233
1247	movl	%r9d,32+12(%rsp)
1248	leaq	3(%r8),%r9
1249.byte	102,15,56,220,241
1250.byte	102,15,56,220,249
1251.byte	102,68,15,56,220,193
1252.byte	102,68,15,56,220,201
1253	movups	80-128(%rcx),%xmm1
1254	bswapl	%r9d
1255.byte	102,15,56,220,208
1256.byte	102,15,56,220,216
1257	xorl	%ebp,%r9d
1258.byte	0x66,0x90
1259.byte	102,15,56,220,224
1260.byte	102,15,56,220,232
1261	movl	%r9d,48+12(%rsp)
1262	leaq	4(%r8),%r9
1263.byte	102,15,56,220,240
1264.byte	102,15,56,220,248
1265.byte	102,68,15,56,220,192
1266.byte	102,68,15,56,220,200
1267	movups	96-128(%rcx),%xmm0
1268	bswapl	%r9d
1269.byte	102,15,56,220,209
1270.byte	102,15,56,220,217
1271	xorl	%ebp,%r9d
1272.byte	0x66,0x90
1273.byte	102,15,56,220,225
1274.byte	102,15,56,220,233
1275	movl	%r9d,64+12(%rsp)
1276	leaq	5(%r8),%r9
1277.byte	102,15,56,220,241
1278.byte	102,15,56,220,249
1279.byte	102,68,15,56,220,193
1280.byte	102,68,15,56,220,201
1281	movups	112-128(%rcx),%xmm1
1282	bswapl	%r9d
1283.byte	102,15,56,220,208
1284.byte	102,15,56,220,216
1285	xorl	%ebp,%r9d
1286.byte	0x66,0x90
1287.byte	102,15,56,220,224
1288.byte	102,15,56,220,232
1289	movl	%r9d,80+12(%rsp)
1290	leaq	6(%r8),%r9
1291.byte	102,15,56,220,240
1292.byte	102,15,56,220,248
1293.byte	102,68,15,56,220,192
1294.byte	102,68,15,56,220,200
1295	movups	128-128(%rcx),%xmm0
1296	bswapl	%r9d
1297.byte	102,15,56,220,209
1298.byte	102,15,56,220,217
1299	xorl	%ebp,%r9d
1300.byte	0x66,0x90
1301.byte	102,15,56,220,225
1302.byte	102,15,56,220,233
1303	movl	%r9d,96+12(%rsp)
1304	leaq	7(%r8),%r9
1305.byte	102,15,56,220,241
1306.byte	102,15,56,220,249
1307.byte	102,68,15,56,220,193
1308.byte	102,68,15,56,220,201
1309	movups	144-128(%rcx),%xmm1
1310	bswapl	%r9d
1311.byte	102,15,56,220,208
1312.byte	102,15,56,220,216
1313.byte	102,15,56,220,224
1314	xorl	%ebp,%r9d
1315	movdqu	0(%rdi),%xmm10
1316.byte	102,15,56,220,232
1317	movl	%r9d,112+12(%rsp)
1318	cmpl	$11,%eax
1319.byte	102,15,56,220,240
1320.byte	102,15,56,220,248
1321.byte	102,68,15,56,220,192
1322.byte	102,68,15,56,220,200
1323	movups	160-128(%rcx),%xmm0
1324
1325	jb	.Lctr32_enc_done
1326
1327.byte	102,15,56,220,209
1328.byte	102,15,56,220,217
1329.byte	102,15,56,220,225
1330.byte	102,15,56,220,233
1331.byte	102,15,56,220,241
1332.byte	102,15,56,220,249
1333.byte	102,68,15,56,220,193
1334.byte	102,68,15,56,220,201
1335	movups	176-128(%rcx),%xmm1
1336
1337.byte	102,15,56,220,208
1338.byte	102,15,56,220,216
1339.byte	102,15,56,220,224
1340.byte	102,15,56,220,232
1341.byte	102,15,56,220,240
1342.byte	102,15,56,220,248
1343.byte	102,68,15,56,220,192
1344.byte	102,68,15,56,220,200
1345	movups	192-128(%rcx),%xmm0
1346	je	.Lctr32_enc_done
1347
1348.byte	102,15,56,220,209
1349.byte	102,15,56,220,217
1350.byte	102,15,56,220,225
1351.byte	102,15,56,220,233
1352.byte	102,15,56,220,241
1353.byte	102,15,56,220,249
1354.byte	102,68,15,56,220,193
1355.byte	102,68,15,56,220,201
1356	movups	208-128(%rcx),%xmm1
1357
1358.byte	102,15,56,220,208
1359.byte	102,15,56,220,216
1360.byte	102,15,56,220,224
1361.byte	102,15,56,220,232
1362.byte	102,15,56,220,240
1363.byte	102,15,56,220,248
1364.byte	102,68,15,56,220,192
1365.byte	102,68,15,56,220,200
1366	movups	224-128(%rcx),%xmm0
1367	jmp	.Lctr32_enc_done
1368
1369.align	16
1370.Lctr32_enc_done:
1371	movdqu	16(%rdi),%xmm11
1372	pxor	%xmm0,%xmm10
1373	movdqu	32(%rdi),%xmm12
1374	pxor	%xmm0,%xmm11
1375	movdqu	48(%rdi),%xmm13
1376	pxor	%xmm0,%xmm12
1377	movdqu	64(%rdi),%xmm14
1378	pxor	%xmm0,%xmm13
1379	movdqu	80(%rdi),%xmm15
1380	pxor	%xmm0,%xmm14
1381	pxor	%xmm0,%xmm15
1382.byte	102,15,56,220,209
1383.byte	102,15,56,220,217
1384.byte	102,15,56,220,225
1385.byte	102,15,56,220,233
1386.byte	102,15,56,220,241
1387.byte	102,15,56,220,249
1388.byte	102,68,15,56,220,193
1389.byte	102,68,15,56,220,201
1390	movdqu	96(%rdi),%xmm1
1391	leaq	128(%rdi),%rdi
1392
1393.byte	102,65,15,56,221,210
1394	pxor	%xmm0,%xmm1
1395	movdqu	112-128(%rdi),%xmm10
1396.byte	102,65,15,56,221,219
1397	pxor	%xmm0,%xmm10
1398	movdqa	0(%rsp),%xmm11
1399.byte	102,65,15,56,221,228
1400.byte	102,65,15,56,221,237
1401	movdqa	16(%rsp),%xmm12
1402	movdqa	32(%rsp),%xmm13
1403.byte	102,65,15,56,221,246
1404.byte	102,65,15,56,221,255
1405	movdqa	48(%rsp),%xmm14
1406	movdqa	64(%rsp),%xmm15
1407.byte	102,68,15,56,221,193
1408	movdqa	80(%rsp),%xmm0
1409	movups	16-128(%rcx),%xmm1
1410.byte	102,69,15,56,221,202
1411
1412	movups	%xmm2,(%rsi)
1413	movdqa	%xmm11,%xmm2
1414	movups	%xmm3,16(%rsi)
1415	movdqa	%xmm12,%xmm3
1416	movups	%xmm4,32(%rsi)
1417	movdqa	%xmm13,%xmm4
1418	movups	%xmm5,48(%rsi)
1419	movdqa	%xmm14,%xmm5
1420	movups	%xmm6,64(%rsi)
1421	movdqa	%xmm15,%xmm6
1422	movups	%xmm7,80(%rsi)
1423	movdqa	%xmm0,%xmm7
1424	movups	%xmm8,96(%rsi)
1425	movups	%xmm9,112(%rsi)
1426	leaq	128(%rsi),%rsi
1427
1428	subq	$8,%rdx
1429	jnc	.Lctr32_loop8
1430
1431	addq	$8,%rdx
1432	jz	.Lctr32_done
1433	leaq	-128(%rcx),%rcx
1434
1435.Lctr32_tail:
1436
1437
1438	leaq	16(%rcx),%rcx
1439	cmpq	$4,%rdx
1440	jb	.Lctr32_loop3
1441	je	.Lctr32_loop4
1442
1443
1444	shll	$4,%eax
1445	movdqa	96(%rsp),%xmm8
1446	pxor	%xmm9,%xmm9
1447
1448	movups	16(%rcx),%xmm0
1449.byte	102,15,56,220,209
1450.byte	102,15,56,220,217
1451	leaq	32-16(%rcx,%rax,1),%rcx
1452	negq	%rax
1453.byte	102,15,56,220,225
1454	addq	$16,%rax
1455	movups	(%rdi),%xmm10
1456.byte	102,15,56,220,233
1457.byte	102,15,56,220,241
1458	movups	16(%rdi),%xmm11
1459	movups	32(%rdi),%xmm12
1460.byte	102,15,56,220,249
1461.byte	102,68,15,56,220,193
1462
1463	call	.Lenc_loop8_enter
1464
1465	movdqu	48(%rdi),%xmm13
1466	pxor	%xmm10,%xmm2
1467	movdqu	64(%rdi),%xmm10
1468	pxor	%xmm11,%xmm3
1469	movdqu	%xmm2,(%rsi)
1470	pxor	%xmm12,%xmm4
1471	movdqu	%xmm3,16(%rsi)
1472	pxor	%xmm13,%xmm5
1473	movdqu	%xmm4,32(%rsi)
1474	pxor	%xmm10,%xmm6
1475	movdqu	%xmm5,48(%rsi)
1476	movdqu	%xmm6,64(%rsi)
1477	cmpq	$6,%rdx
1478	jb	.Lctr32_done
1479
1480	movups	80(%rdi),%xmm11
1481	xorps	%xmm11,%xmm7
1482	movups	%xmm7,80(%rsi)
1483	je	.Lctr32_done
1484
1485	movups	96(%rdi),%xmm12
1486	xorps	%xmm12,%xmm8
1487	movups	%xmm8,96(%rsi)
1488	jmp	.Lctr32_done
1489
1490.align	32
1491.Lctr32_loop4:
1492.byte	102,15,56,220,209
1493	leaq	16(%rcx),%rcx
1494	decl	%eax
1495.byte	102,15,56,220,217
1496.byte	102,15,56,220,225
1497.byte	102,15,56,220,233
1498	movups	(%rcx),%xmm1
1499	jnz	.Lctr32_loop4
1500.byte	102,15,56,221,209
1501.byte	102,15,56,221,217
1502	movups	(%rdi),%xmm10
1503	movups	16(%rdi),%xmm11
1504.byte	102,15,56,221,225
1505.byte	102,15,56,221,233
1506	movups	32(%rdi),%xmm12
1507	movups	48(%rdi),%xmm13
1508
1509	xorps	%xmm10,%xmm2
1510	movups	%xmm2,(%rsi)
1511	xorps	%xmm11,%xmm3
1512	movups	%xmm3,16(%rsi)
1513	pxor	%xmm12,%xmm4
1514	movdqu	%xmm4,32(%rsi)
1515	pxor	%xmm13,%xmm5
1516	movdqu	%xmm5,48(%rsi)
1517	jmp	.Lctr32_done
1518
1519.align	32
1520.Lctr32_loop3:
1521.byte	102,15,56,220,209
1522	leaq	16(%rcx),%rcx
1523	decl	%eax
1524.byte	102,15,56,220,217
1525.byte	102,15,56,220,225
1526	movups	(%rcx),%xmm1
1527	jnz	.Lctr32_loop3
1528.byte	102,15,56,221,209
1529.byte	102,15,56,221,217
1530.byte	102,15,56,221,225
1531
1532	movups	(%rdi),%xmm10
1533	xorps	%xmm10,%xmm2
1534	movups	%xmm2,(%rsi)
1535	cmpq	$2,%rdx
1536	jb	.Lctr32_done
1537
1538	movups	16(%rdi),%xmm11
1539	xorps	%xmm11,%xmm3
1540	movups	%xmm3,16(%rsi)
1541	je	.Lctr32_done
1542
1543	movups	32(%rdi),%xmm12
1544	xorps	%xmm12,%xmm4
1545	movups	%xmm4,32(%rsi)
1546
1547.Lctr32_done:
1548	xorps	%xmm0,%xmm0
1549	xorl	%ebp,%ebp
1550	pxor	%xmm1,%xmm1
1551	pxor	%xmm2,%xmm2
1552	pxor	%xmm3,%xmm3
1553	pxor	%xmm4,%xmm4
1554	pxor	%xmm5,%xmm5
1555	pxor	%xmm6,%xmm6
1556	pxor	%xmm7,%xmm7
1557	movaps	%xmm0,0(%rsp)
1558	pxor	%xmm8,%xmm8
1559	movaps	%xmm0,16(%rsp)
1560	pxor	%xmm9,%xmm9
1561	movaps	%xmm0,32(%rsp)
1562	pxor	%xmm10,%xmm10
1563	movaps	%xmm0,48(%rsp)
1564	pxor	%xmm11,%xmm11
1565	movaps	%xmm0,64(%rsp)
1566	pxor	%xmm12,%xmm12
1567	movaps	%xmm0,80(%rsp)
1568	pxor	%xmm13,%xmm13
1569	movaps	%xmm0,96(%rsp)
1570	pxor	%xmm14,%xmm14
1571	movaps	%xmm0,112(%rsp)
1572	pxor	%xmm15,%xmm15
1573	movq	-8(%r11),%rbp
1574.cfi_restore	%rbp
1575	leaq	(%r11),%rsp
1576.cfi_def_cfa_register	%rsp
1577.Lctr32_epilogue:
1578	.byte	0xf3,0xc3
1579.cfi_endproc
1580.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
1581.globl	aes_hw_xts_encrypt
1582.hidden aes_hw_xts_encrypt
1583.type	aes_hw_xts_encrypt,@function
1584.align	16
1585aes_hw_xts_encrypt:
1586.cfi_startproc
1587	leaq	(%rsp),%r11
1588.cfi_def_cfa_register	%r11
1589	pushq	%rbp
1590.cfi_offset	%rbp,-16
1591	subq	$112,%rsp
1592	andq	$-16,%rsp
1593	movups	(%r9),%xmm2
1594	movl	240(%r8),%eax
1595	movl	240(%rcx),%r10d
1596	movups	(%r8),%xmm0
1597	movups	16(%r8),%xmm1
1598	leaq	32(%r8),%r8
1599	xorps	%xmm0,%xmm2
1600.Loop_enc1_8:
1601.byte	102,15,56,220,209
1602	decl	%eax
1603	movups	(%r8),%xmm1
1604	leaq	16(%r8),%r8
1605	jnz	.Loop_enc1_8
1606.byte	102,15,56,221,209
1607	movups	(%rcx),%xmm0
1608	movq	%rcx,%rbp
1609	movl	%r10d,%eax
1610	shll	$4,%r10d
1611	movq	%rdx,%r9
1612	andq	$-16,%rdx
1613
1614	movups	16(%rcx,%r10,1),%xmm1
1615
1616	movdqa	.Lxts_magic(%rip),%xmm8
1617	movdqa	%xmm2,%xmm15
1618	pshufd	$0x5f,%xmm2,%xmm9
1619	pxor	%xmm0,%xmm1
1620	movdqa	%xmm9,%xmm14
1621	paddd	%xmm9,%xmm9
1622	movdqa	%xmm15,%xmm10
1623	psrad	$31,%xmm14
1624	paddq	%xmm15,%xmm15
1625	pand	%xmm8,%xmm14
1626	pxor	%xmm0,%xmm10
1627	pxor	%xmm14,%xmm15
1628	movdqa	%xmm9,%xmm14
1629	paddd	%xmm9,%xmm9
1630	movdqa	%xmm15,%xmm11
1631	psrad	$31,%xmm14
1632	paddq	%xmm15,%xmm15
1633	pand	%xmm8,%xmm14
1634	pxor	%xmm0,%xmm11
1635	pxor	%xmm14,%xmm15
1636	movdqa	%xmm9,%xmm14
1637	paddd	%xmm9,%xmm9
1638	movdqa	%xmm15,%xmm12
1639	psrad	$31,%xmm14
1640	paddq	%xmm15,%xmm15
1641	pand	%xmm8,%xmm14
1642	pxor	%xmm0,%xmm12
1643	pxor	%xmm14,%xmm15
1644	movdqa	%xmm9,%xmm14
1645	paddd	%xmm9,%xmm9
1646	movdqa	%xmm15,%xmm13
1647	psrad	$31,%xmm14
1648	paddq	%xmm15,%xmm15
1649	pand	%xmm8,%xmm14
1650	pxor	%xmm0,%xmm13
1651	pxor	%xmm14,%xmm15
1652	movdqa	%xmm15,%xmm14
1653	psrad	$31,%xmm9
1654	paddq	%xmm15,%xmm15
1655	pand	%xmm8,%xmm9
1656	pxor	%xmm0,%xmm14
1657	pxor	%xmm9,%xmm15
1658	movaps	%xmm1,96(%rsp)
1659
1660	subq	$96,%rdx
1661	jc	.Lxts_enc_short
1662
1663	movl	$16+96,%eax
1664	leaq	32(%rbp,%r10,1),%rcx
1665	subq	%r10,%rax
1666	movups	16(%rbp),%xmm1
1667	movq	%rax,%r10
1668	leaq	.Lxts_magic(%rip),%r8
1669	jmp	.Lxts_enc_grandloop
1670
1671.align	32
1672.Lxts_enc_grandloop:
1673	movdqu	0(%rdi),%xmm2
1674	movdqa	%xmm0,%xmm8
1675	movdqu	16(%rdi),%xmm3
1676	pxor	%xmm10,%xmm2
1677	movdqu	32(%rdi),%xmm4
1678	pxor	%xmm11,%xmm3
1679.byte	102,15,56,220,209
1680	movdqu	48(%rdi),%xmm5
1681	pxor	%xmm12,%xmm4
1682.byte	102,15,56,220,217
1683	movdqu	64(%rdi),%xmm6
1684	pxor	%xmm13,%xmm5
1685.byte	102,15,56,220,225
1686	movdqu	80(%rdi),%xmm7
1687	pxor	%xmm15,%xmm8
1688	movdqa	96(%rsp),%xmm9
1689	pxor	%xmm14,%xmm6
1690.byte	102,15,56,220,233
1691	movups	32(%rbp),%xmm0
1692	leaq	96(%rdi),%rdi
1693	pxor	%xmm8,%xmm7
1694
1695	pxor	%xmm9,%xmm10
1696.byte	102,15,56,220,241
1697	pxor	%xmm9,%xmm11
1698	movdqa	%xmm10,0(%rsp)
1699.byte	102,15,56,220,249
1700	movups	48(%rbp),%xmm1
1701	pxor	%xmm9,%xmm12
1702
1703.byte	102,15,56,220,208
1704	pxor	%xmm9,%xmm13
1705	movdqa	%xmm11,16(%rsp)
1706.byte	102,15,56,220,216
1707	pxor	%xmm9,%xmm14
1708	movdqa	%xmm12,32(%rsp)
1709.byte	102,15,56,220,224
1710.byte	102,15,56,220,232
1711	pxor	%xmm9,%xmm8
1712	movdqa	%xmm14,64(%rsp)
1713.byte	102,15,56,220,240
1714.byte	102,15,56,220,248
1715	movups	64(%rbp),%xmm0
1716	movdqa	%xmm8,80(%rsp)
1717	pshufd	$0x5f,%xmm15,%xmm9
1718	jmp	.Lxts_enc_loop6
1719.align	32
1720.Lxts_enc_loop6:
1721.byte	102,15,56,220,209
1722.byte	102,15,56,220,217
1723.byte	102,15,56,220,225
1724.byte	102,15,56,220,233
1725.byte	102,15,56,220,241
1726.byte	102,15,56,220,249
1727	movups	-64(%rcx,%rax,1),%xmm1
1728	addq	$32,%rax
1729
1730.byte	102,15,56,220,208
1731.byte	102,15,56,220,216
1732.byte	102,15,56,220,224
1733.byte	102,15,56,220,232
1734.byte	102,15,56,220,240
1735.byte	102,15,56,220,248
1736	movups	-80(%rcx,%rax,1),%xmm0
1737	jnz	.Lxts_enc_loop6
1738
1739	movdqa	(%r8),%xmm8
1740	movdqa	%xmm9,%xmm14
1741	paddd	%xmm9,%xmm9
1742.byte	102,15,56,220,209
1743	paddq	%xmm15,%xmm15
1744	psrad	$31,%xmm14
1745.byte	102,15,56,220,217
1746	pand	%xmm8,%xmm14
1747	movups	(%rbp),%xmm10
1748.byte	102,15,56,220,225
1749.byte	102,15,56,220,233
1750.byte	102,15,56,220,241
1751	pxor	%xmm14,%xmm15
1752	movaps	%xmm10,%xmm11
1753.byte	102,15,56,220,249
1754	movups	-64(%rcx),%xmm1
1755
1756	movdqa	%xmm9,%xmm14
1757.byte	102,15,56,220,208
1758	paddd	%xmm9,%xmm9
1759	pxor	%xmm15,%xmm10
1760.byte	102,15,56,220,216
1761	psrad	$31,%xmm14
1762	paddq	%xmm15,%xmm15
1763.byte	102,15,56,220,224
1764.byte	102,15,56,220,232
1765	pand	%xmm8,%xmm14
1766	movaps	%xmm11,%xmm12
1767.byte	102,15,56,220,240
1768	pxor	%xmm14,%xmm15
1769	movdqa	%xmm9,%xmm14
1770.byte	102,15,56,220,248
1771	movups	-48(%rcx),%xmm0
1772
1773	paddd	%xmm9,%xmm9
1774.byte	102,15,56,220,209
1775	pxor	%xmm15,%xmm11
1776	psrad	$31,%xmm14
1777.byte	102,15,56,220,217
1778	paddq	%xmm15,%xmm15
1779	pand	%xmm8,%xmm14
1780.byte	102,15,56,220,225
1781.byte	102,15,56,220,233
1782	movdqa	%xmm13,48(%rsp)
1783	pxor	%xmm14,%xmm15
1784.byte	102,15,56,220,241
1785	movaps	%xmm12,%xmm13
1786	movdqa	%xmm9,%xmm14
1787.byte	102,15,56,220,249
1788	movups	-32(%rcx),%xmm1
1789
1790	paddd	%xmm9,%xmm9
1791.byte	102,15,56,220,208
1792	pxor	%xmm15,%xmm12
1793	psrad	$31,%xmm14
1794.byte	102,15,56,220,216
1795	paddq	%xmm15,%xmm15
1796	pand	%xmm8,%xmm14
1797.byte	102,15,56,220,224
1798.byte	102,15,56,220,232
1799.byte	102,15,56,220,240
1800	pxor	%xmm14,%xmm15
1801	movaps	%xmm13,%xmm14
1802.byte	102,15,56,220,248
1803
1804	movdqa	%xmm9,%xmm0
1805	paddd	%xmm9,%xmm9
1806.byte	102,15,56,220,209
1807	pxor	%xmm15,%xmm13
1808	psrad	$31,%xmm0
1809.byte	102,15,56,220,217
1810	paddq	%xmm15,%xmm15
1811	pand	%xmm8,%xmm0
1812.byte	102,15,56,220,225
1813.byte	102,15,56,220,233
1814	pxor	%xmm0,%xmm15
1815	movups	(%rbp),%xmm0
1816.byte	102,15,56,220,241
1817.byte	102,15,56,220,249
1818	movups	16(%rbp),%xmm1
1819
1820	pxor	%xmm15,%xmm14
1821.byte	102,15,56,221,84,36,0
1822	psrad	$31,%xmm9
1823	paddq	%xmm15,%xmm15
1824.byte	102,15,56,221,92,36,16
1825.byte	102,15,56,221,100,36,32
1826	pand	%xmm8,%xmm9
1827	movq	%r10,%rax
1828.byte	102,15,56,221,108,36,48
1829.byte	102,15,56,221,116,36,64
1830.byte	102,15,56,221,124,36,80
1831	pxor	%xmm9,%xmm15
1832
1833	leaq	96(%rsi),%rsi
1834	movups	%xmm2,-96(%rsi)
1835	movups	%xmm3,-80(%rsi)
1836	movups	%xmm4,-64(%rsi)
1837	movups	%xmm5,-48(%rsi)
1838	movups	%xmm6,-32(%rsi)
1839	movups	%xmm7,-16(%rsi)
1840	subq	$96,%rdx
1841	jnc	.Lxts_enc_grandloop
1842
1843	movl	$16+96,%eax
1844	subl	%r10d,%eax
1845	movq	%rbp,%rcx
1846	shrl	$4,%eax
1847
1848.Lxts_enc_short:
1849
1850	movl	%eax,%r10d
1851	pxor	%xmm0,%xmm10
1852	addq	$96,%rdx
1853	jz	.Lxts_enc_done
1854
1855	pxor	%xmm0,%xmm11
1856	cmpq	$0x20,%rdx
1857	jb	.Lxts_enc_one
1858	pxor	%xmm0,%xmm12
1859	je	.Lxts_enc_two
1860
1861	pxor	%xmm0,%xmm13
1862	cmpq	$0x40,%rdx
1863	jb	.Lxts_enc_three
1864	pxor	%xmm0,%xmm14
1865	je	.Lxts_enc_four
1866
1867	movdqu	(%rdi),%xmm2
1868	movdqu	16(%rdi),%xmm3
1869	movdqu	32(%rdi),%xmm4
1870	pxor	%xmm10,%xmm2
1871	movdqu	48(%rdi),%xmm5
1872	pxor	%xmm11,%xmm3
1873	movdqu	64(%rdi),%xmm6
1874	leaq	80(%rdi),%rdi
1875	pxor	%xmm12,%xmm4
1876	pxor	%xmm13,%xmm5
1877	pxor	%xmm14,%xmm6
1878	pxor	%xmm7,%xmm7
1879
1880	call	_aesni_encrypt6
1881
1882	xorps	%xmm10,%xmm2
1883	movdqa	%xmm15,%xmm10
1884	xorps	%xmm11,%xmm3
1885	xorps	%xmm12,%xmm4
1886	movdqu	%xmm2,(%rsi)
1887	xorps	%xmm13,%xmm5
1888	movdqu	%xmm3,16(%rsi)
1889	xorps	%xmm14,%xmm6
1890	movdqu	%xmm4,32(%rsi)
1891	movdqu	%xmm5,48(%rsi)
1892	movdqu	%xmm6,64(%rsi)
1893	leaq	80(%rsi),%rsi
1894	jmp	.Lxts_enc_done
1895
1896.align	16
1897.Lxts_enc_one:
1898	movups	(%rdi),%xmm2
1899	leaq	16(%rdi),%rdi
1900	xorps	%xmm10,%xmm2
1901	movups	(%rcx),%xmm0
1902	movups	16(%rcx),%xmm1
1903	leaq	32(%rcx),%rcx
1904	xorps	%xmm0,%xmm2
1905.Loop_enc1_9:
1906.byte	102,15,56,220,209
1907	decl	%eax
1908	movups	(%rcx),%xmm1
1909	leaq	16(%rcx),%rcx
1910	jnz	.Loop_enc1_9
1911.byte	102,15,56,221,209
1912	xorps	%xmm10,%xmm2
1913	movdqa	%xmm11,%xmm10
1914	movups	%xmm2,(%rsi)
1915	leaq	16(%rsi),%rsi
1916	jmp	.Lxts_enc_done
1917
1918.align	16
1919.Lxts_enc_two:
1920	movups	(%rdi),%xmm2
1921	movups	16(%rdi),%xmm3
1922	leaq	32(%rdi),%rdi
1923	xorps	%xmm10,%xmm2
1924	xorps	%xmm11,%xmm3
1925
1926	call	_aesni_encrypt2
1927
1928	xorps	%xmm10,%xmm2
1929	movdqa	%xmm12,%xmm10
1930	xorps	%xmm11,%xmm3
1931	movups	%xmm2,(%rsi)
1932	movups	%xmm3,16(%rsi)
1933	leaq	32(%rsi),%rsi
1934	jmp	.Lxts_enc_done
1935
1936.align	16
1937.Lxts_enc_three:
1938	movups	(%rdi),%xmm2
1939	movups	16(%rdi),%xmm3
1940	movups	32(%rdi),%xmm4
1941	leaq	48(%rdi),%rdi
1942	xorps	%xmm10,%xmm2
1943	xorps	%xmm11,%xmm3
1944	xorps	%xmm12,%xmm4
1945
1946	call	_aesni_encrypt3
1947
1948	xorps	%xmm10,%xmm2
1949	movdqa	%xmm13,%xmm10
1950	xorps	%xmm11,%xmm3
1951	xorps	%xmm12,%xmm4
1952	movups	%xmm2,(%rsi)
1953	movups	%xmm3,16(%rsi)
1954	movups	%xmm4,32(%rsi)
1955	leaq	48(%rsi),%rsi
1956	jmp	.Lxts_enc_done
1957
1958.align	16
1959.Lxts_enc_four:
1960	movups	(%rdi),%xmm2
1961	movups	16(%rdi),%xmm3
1962	movups	32(%rdi),%xmm4
1963	xorps	%xmm10,%xmm2
1964	movups	48(%rdi),%xmm5
1965	leaq	64(%rdi),%rdi
1966	xorps	%xmm11,%xmm3
1967	xorps	%xmm12,%xmm4
1968	xorps	%xmm13,%xmm5
1969
1970	call	_aesni_encrypt4
1971
1972	pxor	%xmm10,%xmm2
1973	movdqa	%xmm14,%xmm10
1974	pxor	%xmm11,%xmm3
1975	pxor	%xmm12,%xmm4
1976	movdqu	%xmm2,(%rsi)
1977	pxor	%xmm13,%xmm5
1978	movdqu	%xmm3,16(%rsi)
1979	movdqu	%xmm4,32(%rsi)
1980	movdqu	%xmm5,48(%rsi)
1981	leaq	64(%rsi),%rsi
1982	jmp	.Lxts_enc_done
1983
1984.align	16
1985.Lxts_enc_done:
1986	andq	$15,%r9
1987	jz	.Lxts_enc_ret
1988	movq	%r9,%rdx
1989
1990.Lxts_enc_steal:
1991	movzbl	(%rdi),%eax
1992	movzbl	-16(%rsi),%ecx
1993	leaq	1(%rdi),%rdi
1994	movb	%al,-16(%rsi)
1995	movb	%cl,0(%rsi)
1996	leaq	1(%rsi),%rsi
1997	subq	$1,%rdx
1998	jnz	.Lxts_enc_steal
1999
2000	subq	%r9,%rsi
2001	movq	%rbp,%rcx
2002	movl	%r10d,%eax
2003
2004	movups	-16(%rsi),%xmm2
2005	xorps	%xmm10,%xmm2
2006	movups	(%rcx),%xmm0
2007	movups	16(%rcx),%xmm1
2008	leaq	32(%rcx),%rcx
2009	xorps	%xmm0,%xmm2
2010.Loop_enc1_10:
2011.byte	102,15,56,220,209
2012	decl	%eax
2013	movups	(%rcx),%xmm1
2014	leaq	16(%rcx),%rcx
2015	jnz	.Loop_enc1_10
2016.byte	102,15,56,221,209
2017	xorps	%xmm10,%xmm2
2018	movups	%xmm2,-16(%rsi)
2019
2020.Lxts_enc_ret:
2021	xorps	%xmm0,%xmm0
2022	pxor	%xmm1,%xmm1
2023	pxor	%xmm2,%xmm2
2024	pxor	%xmm3,%xmm3
2025	pxor	%xmm4,%xmm4
2026	pxor	%xmm5,%xmm5
2027	pxor	%xmm6,%xmm6
2028	pxor	%xmm7,%xmm7
2029	movaps	%xmm0,0(%rsp)
2030	pxor	%xmm8,%xmm8
2031	movaps	%xmm0,16(%rsp)
2032	pxor	%xmm9,%xmm9
2033	movaps	%xmm0,32(%rsp)
2034	pxor	%xmm10,%xmm10
2035	movaps	%xmm0,48(%rsp)
2036	pxor	%xmm11,%xmm11
2037	movaps	%xmm0,64(%rsp)
2038	pxor	%xmm12,%xmm12
2039	movaps	%xmm0,80(%rsp)
2040	pxor	%xmm13,%xmm13
2041	movaps	%xmm0,96(%rsp)
2042	pxor	%xmm14,%xmm14
2043	pxor	%xmm15,%xmm15
2044	movq	-8(%r11),%rbp
2045.cfi_restore	%rbp
2046	leaq	(%r11),%rsp
2047.cfi_def_cfa_register	%rsp
2048.Lxts_enc_epilogue:
2049	.byte	0xf3,0xc3
2050.cfi_endproc
2051.size	aes_hw_xts_encrypt,.-aes_hw_xts_encrypt
2052.globl	aes_hw_xts_decrypt
2053.hidden aes_hw_xts_decrypt
2054.type	aes_hw_xts_decrypt,@function
2055.align	16
2056aes_hw_xts_decrypt:
2057.cfi_startproc
2058	leaq	(%rsp),%r11
2059.cfi_def_cfa_register	%r11
2060	pushq	%rbp
2061.cfi_offset	%rbp,-16
2062	subq	$112,%rsp
2063	andq	$-16,%rsp
2064	movups	(%r9),%xmm2
2065	movl	240(%r8),%eax
2066	movl	240(%rcx),%r10d
2067	movups	(%r8),%xmm0
2068	movups	16(%r8),%xmm1
2069	leaq	32(%r8),%r8
2070	xorps	%xmm0,%xmm2
2071.Loop_enc1_11:
2072.byte	102,15,56,220,209
2073	decl	%eax
2074	movups	(%r8),%xmm1
2075	leaq	16(%r8),%r8
2076	jnz	.Loop_enc1_11
2077.byte	102,15,56,221,209
2078	xorl	%eax,%eax
2079	testq	$15,%rdx
2080	setnz	%al
2081	shlq	$4,%rax
2082	subq	%rax,%rdx
2083
2084	movups	(%rcx),%xmm0
2085	movq	%rcx,%rbp
2086	movl	%r10d,%eax
2087	shll	$4,%r10d
2088	movq	%rdx,%r9
2089	andq	$-16,%rdx
2090
2091	movups	16(%rcx,%r10,1),%xmm1
2092
2093	movdqa	.Lxts_magic(%rip),%xmm8
2094	movdqa	%xmm2,%xmm15
2095	pshufd	$0x5f,%xmm2,%xmm9
2096	pxor	%xmm0,%xmm1
2097	movdqa	%xmm9,%xmm14
2098	paddd	%xmm9,%xmm9
2099	movdqa	%xmm15,%xmm10
2100	psrad	$31,%xmm14
2101	paddq	%xmm15,%xmm15
2102	pand	%xmm8,%xmm14
2103	pxor	%xmm0,%xmm10
2104	pxor	%xmm14,%xmm15
2105	movdqa	%xmm9,%xmm14
2106	paddd	%xmm9,%xmm9
2107	movdqa	%xmm15,%xmm11
2108	psrad	$31,%xmm14
2109	paddq	%xmm15,%xmm15
2110	pand	%xmm8,%xmm14
2111	pxor	%xmm0,%xmm11
2112	pxor	%xmm14,%xmm15
2113	movdqa	%xmm9,%xmm14
2114	paddd	%xmm9,%xmm9
2115	movdqa	%xmm15,%xmm12
2116	psrad	$31,%xmm14
2117	paddq	%xmm15,%xmm15
2118	pand	%xmm8,%xmm14
2119	pxor	%xmm0,%xmm12
2120	pxor	%xmm14,%xmm15
2121	movdqa	%xmm9,%xmm14
2122	paddd	%xmm9,%xmm9
2123	movdqa	%xmm15,%xmm13
2124	psrad	$31,%xmm14
2125	paddq	%xmm15,%xmm15
2126	pand	%xmm8,%xmm14
2127	pxor	%xmm0,%xmm13
2128	pxor	%xmm14,%xmm15
2129	movdqa	%xmm15,%xmm14
2130	psrad	$31,%xmm9
2131	paddq	%xmm15,%xmm15
2132	pand	%xmm8,%xmm9
2133	pxor	%xmm0,%xmm14
2134	pxor	%xmm9,%xmm15
2135	movaps	%xmm1,96(%rsp)
2136
2137	subq	$96,%rdx
2138	jc	.Lxts_dec_short
2139
2140	movl	$16+96,%eax
2141	leaq	32(%rbp,%r10,1),%rcx
2142	subq	%r10,%rax
2143	movups	16(%rbp),%xmm1
2144	movq	%rax,%r10
2145	leaq	.Lxts_magic(%rip),%r8
2146	jmp	.Lxts_dec_grandloop
2147
2148.align	32
2149.Lxts_dec_grandloop:
2150	movdqu	0(%rdi),%xmm2
2151	movdqa	%xmm0,%xmm8
2152	movdqu	16(%rdi),%xmm3
2153	pxor	%xmm10,%xmm2
2154	movdqu	32(%rdi),%xmm4
2155	pxor	%xmm11,%xmm3
2156.byte	102,15,56,222,209
2157	movdqu	48(%rdi),%xmm5
2158	pxor	%xmm12,%xmm4
2159.byte	102,15,56,222,217
2160	movdqu	64(%rdi),%xmm6
2161	pxor	%xmm13,%xmm5
2162.byte	102,15,56,222,225
2163	movdqu	80(%rdi),%xmm7
2164	pxor	%xmm15,%xmm8
2165	movdqa	96(%rsp),%xmm9
2166	pxor	%xmm14,%xmm6
2167.byte	102,15,56,222,233
2168	movups	32(%rbp),%xmm0
2169	leaq	96(%rdi),%rdi
2170	pxor	%xmm8,%xmm7
2171
2172	pxor	%xmm9,%xmm10
2173.byte	102,15,56,222,241
2174	pxor	%xmm9,%xmm11
2175	movdqa	%xmm10,0(%rsp)
2176.byte	102,15,56,222,249
2177	movups	48(%rbp),%xmm1
2178	pxor	%xmm9,%xmm12
2179
2180.byte	102,15,56,222,208
2181	pxor	%xmm9,%xmm13
2182	movdqa	%xmm11,16(%rsp)
2183.byte	102,15,56,222,216
2184	pxor	%xmm9,%xmm14
2185	movdqa	%xmm12,32(%rsp)
2186.byte	102,15,56,222,224
2187.byte	102,15,56,222,232
2188	pxor	%xmm9,%xmm8
2189	movdqa	%xmm14,64(%rsp)
2190.byte	102,15,56,222,240
2191.byte	102,15,56,222,248
2192	movups	64(%rbp),%xmm0
2193	movdqa	%xmm8,80(%rsp)
2194	pshufd	$0x5f,%xmm15,%xmm9
2195	jmp	.Lxts_dec_loop6
2196.align	32
2197.Lxts_dec_loop6:
2198.byte	102,15,56,222,209
2199.byte	102,15,56,222,217
2200.byte	102,15,56,222,225
2201.byte	102,15,56,222,233
2202.byte	102,15,56,222,241
2203.byte	102,15,56,222,249
2204	movups	-64(%rcx,%rax,1),%xmm1
2205	addq	$32,%rax
2206
2207.byte	102,15,56,222,208
2208.byte	102,15,56,222,216
2209.byte	102,15,56,222,224
2210.byte	102,15,56,222,232
2211.byte	102,15,56,222,240
2212.byte	102,15,56,222,248
2213	movups	-80(%rcx,%rax,1),%xmm0
2214	jnz	.Lxts_dec_loop6
2215
2216	movdqa	(%r8),%xmm8
2217	movdqa	%xmm9,%xmm14
2218	paddd	%xmm9,%xmm9
2219.byte	102,15,56,222,209
2220	paddq	%xmm15,%xmm15
2221	psrad	$31,%xmm14
2222.byte	102,15,56,222,217
2223	pand	%xmm8,%xmm14
2224	movups	(%rbp),%xmm10
2225.byte	102,15,56,222,225
2226.byte	102,15,56,222,233
2227.byte	102,15,56,222,241
2228	pxor	%xmm14,%xmm15
2229	movaps	%xmm10,%xmm11
2230.byte	102,15,56,222,249
2231	movups	-64(%rcx),%xmm1
2232
2233	movdqa	%xmm9,%xmm14
2234.byte	102,15,56,222,208
2235	paddd	%xmm9,%xmm9
2236	pxor	%xmm15,%xmm10
2237.byte	102,15,56,222,216
2238	psrad	$31,%xmm14
2239	paddq	%xmm15,%xmm15
2240.byte	102,15,56,222,224
2241.byte	102,15,56,222,232
2242	pand	%xmm8,%xmm14
2243	movaps	%xmm11,%xmm12
2244.byte	102,15,56,222,240
2245	pxor	%xmm14,%xmm15
2246	movdqa	%xmm9,%xmm14
2247.byte	102,15,56,222,248
2248	movups	-48(%rcx),%xmm0
2249
2250	paddd	%xmm9,%xmm9
2251.byte	102,15,56,222,209
2252	pxor	%xmm15,%xmm11
2253	psrad	$31,%xmm14
2254.byte	102,15,56,222,217
2255	paddq	%xmm15,%xmm15
2256	pand	%xmm8,%xmm14
2257.byte	102,15,56,222,225
2258.byte	102,15,56,222,233
2259	movdqa	%xmm13,48(%rsp)
2260	pxor	%xmm14,%xmm15
2261.byte	102,15,56,222,241
2262	movaps	%xmm12,%xmm13
2263	movdqa	%xmm9,%xmm14
2264.byte	102,15,56,222,249
2265	movups	-32(%rcx),%xmm1
2266
2267	paddd	%xmm9,%xmm9
2268.byte	102,15,56,222,208
2269	pxor	%xmm15,%xmm12
2270	psrad	$31,%xmm14
2271.byte	102,15,56,222,216
2272	paddq	%xmm15,%xmm15
2273	pand	%xmm8,%xmm14
2274.byte	102,15,56,222,224
2275.byte	102,15,56,222,232
2276.byte	102,15,56,222,240
2277	pxor	%xmm14,%xmm15
2278	movaps	%xmm13,%xmm14
2279.byte	102,15,56,222,248
2280
2281	movdqa	%xmm9,%xmm0
2282	paddd	%xmm9,%xmm9
2283.byte	102,15,56,222,209
2284	pxor	%xmm15,%xmm13
2285	psrad	$31,%xmm0
2286.byte	102,15,56,222,217
2287	paddq	%xmm15,%xmm15
2288	pand	%xmm8,%xmm0
2289.byte	102,15,56,222,225
2290.byte	102,15,56,222,233
2291	pxor	%xmm0,%xmm15
2292	movups	(%rbp),%xmm0
2293.byte	102,15,56,222,241
2294.byte	102,15,56,222,249
2295	movups	16(%rbp),%xmm1
2296
2297	pxor	%xmm15,%xmm14
2298.byte	102,15,56,223,84,36,0
2299	psrad	$31,%xmm9
2300	paddq	%xmm15,%xmm15
2301.byte	102,15,56,223,92,36,16
2302.byte	102,15,56,223,100,36,32
2303	pand	%xmm8,%xmm9
2304	movq	%r10,%rax
2305.byte	102,15,56,223,108,36,48
2306.byte	102,15,56,223,116,36,64
2307.byte	102,15,56,223,124,36,80
2308	pxor	%xmm9,%xmm15
2309
2310	leaq	96(%rsi),%rsi
2311	movups	%xmm2,-96(%rsi)
2312	movups	%xmm3,-80(%rsi)
2313	movups	%xmm4,-64(%rsi)
2314	movups	%xmm5,-48(%rsi)
2315	movups	%xmm6,-32(%rsi)
2316	movups	%xmm7,-16(%rsi)
2317	subq	$96,%rdx
2318	jnc	.Lxts_dec_grandloop
2319
2320	movl	$16+96,%eax
2321	subl	%r10d,%eax
2322	movq	%rbp,%rcx
2323	shrl	$4,%eax
2324
2325.Lxts_dec_short:
2326
2327	movl	%eax,%r10d
2328	pxor	%xmm0,%xmm10
2329	pxor	%xmm0,%xmm11
2330	addq	$96,%rdx
2331	jz	.Lxts_dec_done
2332
2333	pxor	%xmm0,%xmm12
2334	cmpq	$0x20,%rdx
2335	jb	.Lxts_dec_one
2336	pxor	%xmm0,%xmm13
2337	je	.Lxts_dec_two
2338
2339	pxor	%xmm0,%xmm14
2340	cmpq	$0x40,%rdx
2341	jb	.Lxts_dec_three
2342	je	.Lxts_dec_four
2343
2344	movdqu	(%rdi),%xmm2
2345	movdqu	16(%rdi),%xmm3
2346	movdqu	32(%rdi),%xmm4
2347	pxor	%xmm10,%xmm2
2348	movdqu	48(%rdi),%xmm5
2349	pxor	%xmm11,%xmm3
2350	movdqu	64(%rdi),%xmm6
2351	leaq	80(%rdi),%rdi
2352	pxor	%xmm12,%xmm4
2353	pxor	%xmm13,%xmm5
2354	pxor	%xmm14,%xmm6
2355
2356	call	_aesni_decrypt6
2357
2358	xorps	%xmm10,%xmm2
2359	xorps	%xmm11,%xmm3
2360	xorps	%xmm12,%xmm4
2361	movdqu	%xmm2,(%rsi)
2362	xorps	%xmm13,%xmm5
2363	movdqu	%xmm3,16(%rsi)
2364	xorps	%xmm14,%xmm6
2365	movdqu	%xmm4,32(%rsi)
2366	pxor	%xmm14,%xmm14
2367	movdqu	%xmm5,48(%rsi)
2368	pcmpgtd	%xmm15,%xmm14
2369	movdqu	%xmm6,64(%rsi)
2370	leaq	80(%rsi),%rsi
2371	pshufd	$0x13,%xmm14,%xmm11
2372	andq	$15,%r9
2373	jz	.Lxts_dec_ret
2374
2375	movdqa	%xmm15,%xmm10
2376	paddq	%xmm15,%xmm15
2377	pand	%xmm8,%xmm11
2378	pxor	%xmm15,%xmm11
2379	jmp	.Lxts_dec_done2
2380
2381.align	16
2382.Lxts_dec_one:
2383	movups	(%rdi),%xmm2
2384	leaq	16(%rdi),%rdi
2385	xorps	%xmm10,%xmm2
2386	movups	(%rcx),%xmm0
2387	movups	16(%rcx),%xmm1
2388	leaq	32(%rcx),%rcx
2389	xorps	%xmm0,%xmm2
2390.Loop_dec1_12:
2391.byte	102,15,56,222,209
2392	decl	%eax
2393	movups	(%rcx),%xmm1
2394	leaq	16(%rcx),%rcx
2395	jnz	.Loop_dec1_12
2396.byte	102,15,56,223,209
2397	xorps	%xmm10,%xmm2
2398	movdqa	%xmm11,%xmm10
2399	movups	%xmm2,(%rsi)
2400	movdqa	%xmm12,%xmm11
2401	leaq	16(%rsi),%rsi
2402	jmp	.Lxts_dec_done
2403
2404.align	16
2405.Lxts_dec_two:
2406	movups	(%rdi),%xmm2
2407	movups	16(%rdi),%xmm3
2408	leaq	32(%rdi),%rdi
2409	xorps	%xmm10,%xmm2
2410	xorps	%xmm11,%xmm3
2411
2412	call	_aesni_decrypt2
2413
2414	xorps	%xmm10,%xmm2
2415	movdqa	%xmm12,%xmm10
2416	xorps	%xmm11,%xmm3
2417	movdqa	%xmm13,%xmm11
2418	movups	%xmm2,(%rsi)
2419	movups	%xmm3,16(%rsi)
2420	leaq	32(%rsi),%rsi
2421	jmp	.Lxts_dec_done
2422
2423.align	16
2424.Lxts_dec_three:
2425	movups	(%rdi),%xmm2
2426	movups	16(%rdi),%xmm3
2427	movups	32(%rdi),%xmm4
2428	leaq	48(%rdi),%rdi
2429	xorps	%xmm10,%xmm2
2430	xorps	%xmm11,%xmm3
2431	xorps	%xmm12,%xmm4
2432
2433	call	_aesni_decrypt3
2434
2435	xorps	%xmm10,%xmm2
2436	movdqa	%xmm13,%xmm10
2437	xorps	%xmm11,%xmm3
2438	movdqa	%xmm14,%xmm11
2439	xorps	%xmm12,%xmm4
2440	movups	%xmm2,(%rsi)
2441	movups	%xmm3,16(%rsi)
2442	movups	%xmm4,32(%rsi)
2443	leaq	48(%rsi),%rsi
2444	jmp	.Lxts_dec_done
2445
2446.align	16
2447.Lxts_dec_four:
2448	movups	(%rdi),%xmm2
2449	movups	16(%rdi),%xmm3
2450	movups	32(%rdi),%xmm4
2451	xorps	%xmm10,%xmm2
2452	movups	48(%rdi),%xmm5
2453	leaq	64(%rdi),%rdi
2454	xorps	%xmm11,%xmm3
2455	xorps	%xmm12,%xmm4
2456	xorps	%xmm13,%xmm5
2457
2458	call	_aesni_decrypt4
2459
2460	pxor	%xmm10,%xmm2
2461	movdqa	%xmm14,%xmm10
2462	pxor	%xmm11,%xmm3
2463	movdqa	%xmm15,%xmm11
2464	pxor	%xmm12,%xmm4
2465	movdqu	%xmm2,(%rsi)
2466	pxor	%xmm13,%xmm5
2467	movdqu	%xmm3,16(%rsi)
2468	movdqu	%xmm4,32(%rsi)
2469	movdqu	%xmm5,48(%rsi)
2470	leaq	64(%rsi),%rsi
2471	jmp	.Lxts_dec_done
2472
2473.align	16
2474.Lxts_dec_done:
2475	andq	$15,%r9
2476	jz	.Lxts_dec_ret
2477.Lxts_dec_done2:
2478	movq	%r9,%rdx
2479	movq	%rbp,%rcx
2480	movl	%r10d,%eax
2481
2482	movups	(%rdi),%xmm2
2483	xorps	%xmm11,%xmm2
2484	movups	(%rcx),%xmm0
2485	movups	16(%rcx),%xmm1
2486	leaq	32(%rcx),%rcx
2487	xorps	%xmm0,%xmm2
2488.Loop_dec1_13:
2489.byte	102,15,56,222,209
2490	decl	%eax
2491	movups	(%rcx),%xmm1
2492	leaq	16(%rcx),%rcx
2493	jnz	.Loop_dec1_13
2494.byte	102,15,56,223,209
2495	xorps	%xmm11,%xmm2
2496	movups	%xmm2,(%rsi)
2497
2498.Lxts_dec_steal:
2499	movzbl	16(%rdi),%eax
2500	movzbl	(%rsi),%ecx
2501	leaq	1(%rdi),%rdi
2502	movb	%al,(%rsi)
2503	movb	%cl,16(%rsi)
2504	leaq	1(%rsi),%rsi
2505	subq	$1,%rdx
2506	jnz	.Lxts_dec_steal
2507
2508	subq	%r9,%rsi
2509	movq	%rbp,%rcx
2510	movl	%r10d,%eax
2511
2512	movups	(%rsi),%xmm2
2513	xorps	%xmm10,%xmm2
2514	movups	(%rcx),%xmm0
2515	movups	16(%rcx),%xmm1
2516	leaq	32(%rcx),%rcx
2517	xorps	%xmm0,%xmm2
2518.Loop_dec1_14:
2519.byte	102,15,56,222,209
2520	decl	%eax
2521	movups	(%rcx),%xmm1
2522	leaq	16(%rcx),%rcx
2523	jnz	.Loop_dec1_14
2524.byte	102,15,56,223,209
2525	xorps	%xmm10,%xmm2
2526	movups	%xmm2,(%rsi)
2527
2528.Lxts_dec_ret:
2529	xorps	%xmm0,%xmm0
2530	pxor	%xmm1,%xmm1
2531	pxor	%xmm2,%xmm2
2532	pxor	%xmm3,%xmm3
2533	pxor	%xmm4,%xmm4
2534	pxor	%xmm5,%xmm5
2535	pxor	%xmm6,%xmm6
2536	pxor	%xmm7,%xmm7
2537	movaps	%xmm0,0(%rsp)
2538	pxor	%xmm8,%xmm8
2539	movaps	%xmm0,16(%rsp)
2540	pxor	%xmm9,%xmm9
2541	movaps	%xmm0,32(%rsp)
2542	pxor	%xmm10,%xmm10
2543	movaps	%xmm0,48(%rsp)
2544	pxor	%xmm11,%xmm11
2545	movaps	%xmm0,64(%rsp)
2546	pxor	%xmm12,%xmm12
2547	movaps	%xmm0,80(%rsp)
2548	pxor	%xmm13,%xmm13
2549	movaps	%xmm0,96(%rsp)
2550	pxor	%xmm14,%xmm14
2551	pxor	%xmm15,%xmm15
2552	movq	-8(%r11),%rbp
2553.cfi_restore	%rbp
2554	leaq	(%r11),%rsp
2555.cfi_def_cfa_register	%rsp
2556.Lxts_dec_epilogue:
2557	.byte	0xf3,0xc3
2558.cfi_endproc
2559.size	aes_hw_xts_decrypt,.-aes_hw_xts_decrypt
2560.globl	aes_hw_ocb_encrypt
2561.hidden aes_hw_ocb_encrypt
2562.type	aes_hw_ocb_encrypt,@function
2563.align	32
2564aes_hw_ocb_encrypt:
2565.cfi_startproc
2566	leaq	(%rsp),%rax
2567	pushq	%rbx
2568.cfi_adjust_cfa_offset	8
2569.cfi_offset	%rbx,-16
2570	pushq	%rbp
2571.cfi_adjust_cfa_offset	8
2572.cfi_offset	%rbp,-24
2573	pushq	%r12
2574.cfi_adjust_cfa_offset	8
2575.cfi_offset	%r12,-32
2576	pushq	%r13
2577.cfi_adjust_cfa_offset	8
2578.cfi_offset	%r13,-40
2579	pushq	%r14
2580.cfi_adjust_cfa_offset	8
2581.cfi_offset	%r14,-48
2582	movq	8(%rax),%rbx
2583	movq	8+8(%rax),%rbp
2584
2585	movl	240(%rcx),%r10d
2586	movq	%rcx,%r11
2587	shll	$4,%r10d
2588	movups	(%rcx),%xmm9
2589	movups	16(%rcx,%r10,1),%xmm1
2590
2591	movdqu	(%r9),%xmm15
2592	pxor	%xmm1,%xmm9
2593	pxor	%xmm1,%xmm15
2594
2595	movl	$16+32,%eax
2596	leaq	32(%r11,%r10,1),%rcx
2597	movups	16(%r11),%xmm1
2598	subq	%r10,%rax
2599	movq	%rax,%r10
2600
2601	movdqu	(%rbx),%xmm10
2602	movdqu	(%rbp),%xmm8
2603
2604	testq	$1,%r8
2605	jnz	.Locb_enc_odd
2606
2607	bsfq	%r8,%r12
2608	addq	$1,%r8
2609	shlq	$4,%r12
2610	movdqu	(%rbx,%r12,1),%xmm7
2611	movdqu	(%rdi),%xmm2
2612	leaq	16(%rdi),%rdi
2613
2614	call	__ocb_encrypt1
2615
2616	movdqa	%xmm7,%xmm15
2617	movups	%xmm2,(%rsi)
2618	leaq	16(%rsi),%rsi
2619	subq	$1,%rdx
2620	jz	.Locb_enc_done
2621
2622.Locb_enc_odd:
2623	leaq	1(%r8),%r12
2624	leaq	3(%r8),%r13
2625	leaq	5(%r8),%r14
2626	leaq	6(%r8),%r8
2627	bsfq	%r12,%r12
2628	bsfq	%r13,%r13
2629	bsfq	%r14,%r14
2630	shlq	$4,%r12
2631	shlq	$4,%r13
2632	shlq	$4,%r14
2633
2634	subq	$6,%rdx
2635	jc	.Locb_enc_short
2636	jmp	.Locb_enc_grandloop
2637
2638.align	32
2639.Locb_enc_grandloop:
2640	movdqu	0(%rdi),%xmm2
2641	movdqu	16(%rdi),%xmm3
2642	movdqu	32(%rdi),%xmm4
2643	movdqu	48(%rdi),%xmm5
2644	movdqu	64(%rdi),%xmm6
2645	movdqu	80(%rdi),%xmm7
2646	leaq	96(%rdi),%rdi
2647
2648	call	__ocb_encrypt6
2649
2650	movups	%xmm2,0(%rsi)
2651	movups	%xmm3,16(%rsi)
2652	movups	%xmm4,32(%rsi)
2653	movups	%xmm5,48(%rsi)
2654	movups	%xmm6,64(%rsi)
2655	movups	%xmm7,80(%rsi)
2656	leaq	96(%rsi),%rsi
2657	subq	$6,%rdx
2658	jnc	.Locb_enc_grandloop
2659
2660.Locb_enc_short:
2661	addq	$6,%rdx
2662	jz	.Locb_enc_done
2663
2664	movdqu	0(%rdi),%xmm2
2665	cmpq	$2,%rdx
2666	jb	.Locb_enc_one
2667	movdqu	16(%rdi),%xmm3
2668	je	.Locb_enc_two
2669
2670	movdqu	32(%rdi),%xmm4
2671	cmpq	$4,%rdx
2672	jb	.Locb_enc_three
2673	movdqu	48(%rdi),%xmm5
2674	je	.Locb_enc_four
2675
2676	movdqu	64(%rdi),%xmm6
2677	pxor	%xmm7,%xmm7
2678
2679	call	__ocb_encrypt6
2680
2681	movdqa	%xmm14,%xmm15
2682	movups	%xmm2,0(%rsi)
2683	movups	%xmm3,16(%rsi)
2684	movups	%xmm4,32(%rsi)
2685	movups	%xmm5,48(%rsi)
2686	movups	%xmm6,64(%rsi)
2687
2688	jmp	.Locb_enc_done
2689
2690.align	16
2691.Locb_enc_one:
2692	movdqa	%xmm10,%xmm7
2693
2694	call	__ocb_encrypt1
2695
2696	movdqa	%xmm7,%xmm15
2697	movups	%xmm2,0(%rsi)
2698	jmp	.Locb_enc_done
2699
2700.align	16
2701.Locb_enc_two:
2702	pxor	%xmm4,%xmm4
2703	pxor	%xmm5,%xmm5
2704
2705	call	__ocb_encrypt4
2706
2707	movdqa	%xmm11,%xmm15
2708	movups	%xmm2,0(%rsi)
2709	movups	%xmm3,16(%rsi)
2710
2711	jmp	.Locb_enc_done
2712
2713.align	16
2714.Locb_enc_three:
2715	pxor	%xmm5,%xmm5
2716
2717	call	__ocb_encrypt4
2718
2719	movdqa	%xmm12,%xmm15
2720	movups	%xmm2,0(%rsi)
2721	movups	%xmm3,16(%rsi)
2722	movups	%xmm4,32(%rsi)
2723
2724	jmp	.Locb_enc_done
2725
2726.align	16
2727.Locb_enc_four:
2728	call	__ocb_encrypt4
2729
2730	movdqa	%xmm13,%xmm15
2731	movups	%xmm2,0(%rsi)
2732	movups	%xmm3,16(%rsi)
2733	movups	%xmm4,32(%rsi)
2734	movups	%xmm5,48(%rsi)
2735
2736.Locb_enc_done:
2737	pxor	%xmm0,%xmm15
2738	movdqu	%xmm8,(%rbp)
2739	movdqu	%xmm15,(%r9)
2740
2741	xorps	%xmm0,%xmm0
2742	pxor	%xmm1,%xmm1
2743	pxor	%xmm2,%xmm2
2744	pxor	%xmm3,%xmm3
2745	pxor	%xmm4,%xmm4
2746	pxor	%xmm5,%xmm5
2747	pxor	%xmm6,%xmm6
2748	pxor	%xmm7,%xmm7
2749	pxor	%xmm8,%xmm8
2750	pxor	%xmm9,%xmm9
2751	pxor	%xmm10,%xmm10
2752	pxor	%xmm11,%xmm11
2753	pxor	%xmm12,%xmm12
2754	pxor	%xmm13,%xmm13
2755	pxor	%xmm14,%xmm14
2756	pxor	%xmm15,%xmm15
2757	leaq	40(%rsp),%rax
2758.cfi_def_cfa	%rax,8
2759	movq	-40(%rax),%r14
2760.cfi_restore	%r14
2761	movq	-32(%rax),%r13
2762.cfi_restore	%r13
2763	movq	-24(%rax),%r12
2764.cfi_restore	%r12
2765	movq	-16(%rax),%rbp
2766.cfi_restore	%rbp
2767	movq	-8(%rax),%rbx
2768.cfi_restore	%rbx
2769	leaq	(%rax),%rsp
2770.cfi_def_cfa_register	%rsp
2771.Locb_enc_epilogue:
2772	.byte	0xf3,0xc3
2773.cfi_endproc
2774.size	aes_hw_ocb_encrypt,.-aes_hw_ocb_encrypt
2775
2776.type	__ocb_encrypt6,@function
2777.align	32
2778__ocb_encrypt6:
2779	pxor	%xmm9,%xmm15
2780	movdqu	(%rbx,%r12,1),%xmm11
2781	movdqa	%xmm10,%xmm12
2782	movdqu	(%rbx,%r13,1),%xmm13
2783	movdqa	%xmm10,%xmm14
2784	pxor	%xmm15,%xmm10
2785	movdqu	(%rbx,%r14,1),%xmm15
2786	pxor	%xmm10,%xmm11
2787	pxor	%xmm2,%xmm8
2788	pxor	%xmm10,%xmm2
2789	pxor	%xmm11,%xmm12
2790	pxor	%xmm3,%xmm8
2791	pxor	%xmm11,%xmm3
2792	pxor	%xmm12,%xmm13
2793	pxor	%xmm4,%xmm8
2794	pxor	%xmm12,%xmm4
2795	pxor	%xmm13,%xmm14
2796	pxor	%xmm5,%xmm8
2797	pxor	%xmm13,%xmm5
2798	pxor	%xmm14,%xmm15
2799	pxor	%xmm6,%xmm8
2800	pxor	%xmm14,%xmm6
2801	pxor	%xmm7,%xmm8
2802	pxor	%xmm15,%xmm7
2803	movups	32(%r11),%xmm0
2804
2805	leaq	1(%r8),%r12
2806	leaq	3(%r8),%r13
2807	leaq	5(%r8),%r14
2808	addq	$6,%r8
2809	pxor	%xmm9,%xmm10
2810	bsfq	%r12,%r12
2811	bsfq	%r13,%r13
2812	bsfq	%r14,%r14
2813
2814.byte	102,15,56,220,209
2815.byte	102,15,56,220,217
2816.byte	102,15,56,220,225
2817.byte	102,15,56,220,233
2818	pxor	%xmm9,%xmm11
2819	pxor	%xmm9,%xmm12
2820.byte	102,15,56,220,241
2821	pxor	%xmm9,%xmm13
2822	pxor	%xmm9,%xmm14
2823.byte	102,15,56,220,249
2824	movups	48(%r11),%xmm1
2825	pxor	%xmm9,%xmm15
2826
2827.byte	102,15,56,220,208
2828.byte	102,15,56,220,216
2829.byte	102,15,56,220,224
2830.byte	102,15,56,220,232
2831.byte	102,15,56,220,240
2832.byte	102,15,56,220,248
2833	movups	64(%r11),%xmm0
2834	shlq	$4,%r12
2835	shlq	$4,%r13
2836	jmp	.Locb_enc_loop6
2837
2838.align	32
2839.Locb_enc_loop6:
2840.byte	102,15,56,220,209
2841.byte	102,15,56,220,217
2842.byte	102,15,56,220,225
2843.byte	102,15,56,220,233
2844.byte	102,15,56,220,241
2845.byte	102,15,56,220,249
2846	movups	(%rcx,%rax,1),%xmm1
2847	addq	$32,%rax
2848
2849.byte	102,15,56,220,208
2850.byte	102,15,56,220,216
2851.byte	102,15,56,220,224
2852.byte	102,15,56,220,232
2853.byte	102,15,56,220,240
2854.byte	102,15,56,220,248
2855	movups	-16(%rcx,%rax,1),%xmm0
2856	jnz	.Locb_enc_loop6
2857
2858.byte	102,15,56,220,209
2859.byte	102,15,56,220,217
2860.byte	102,15,56,220,225
2861.byte	102,15,56,220,233
2862.byte	102,15,56,220,241
2863.byte	102,15,56,220,249
2864	movups	16(%r11),%xmm1
2865	shlq	$4,%r14
2866
2867.byte	102,65,15,56,221,210
2868	movdqu	(%rbx),%xmm10
2869	movq	%r10,%rax
2870.byte	102,65,15,56,221,219
2871.byte	102,65,15,56,221,228
2872.byte	102,65,15,56,221,237
2873.byte	102,65,15,56,221,246
2874.byte	102,65,15,56,221,255
2875	.byte	0xf3,0xc3
2876.size	__ocb_encrypt6,.-__ocb_encrypt6
2877
2878.type	__ocb_encrypt4,@function
2879.align	32
2880__ocb_encrypt4:
2881	pxor	%xmm9,%xmm15
2882	movdqu	(%rbx,%r12,1),%xmm11
2883	movdqa	%xmm10,%xmm12
2884	movdqu	(%rbx,%r13,1),%xmm13
2885	pxor	%xmm15,%xmm10
2886	pxor	%xmm10,%xmm11
2887	pxor	%xmm2,%xmm8
2888	pxor	%xmm10,%xmm2
2889	pxor	%xmm11,%xmm12
2890	pxor	%xmm3,%xmm8
2891	pxor	%xmm11,%xmm3
2892	pxor	%xmm12,%xmm13
2893	pxor	%xmm4,%xmm8
2894	pxor	%xmm12,%xmm4
2895	pxor	%xmm5,%xmm8
2896	pxor	%xmm13,%xmm5
2897	movups	32(%r11),%xmm0
2898
2899	pxor	%xmm9,%xmm10
2900	pxor	%xmm9,%xmm11
2901	pxor	%xmm9,%xmm12
2902	pxor	%xmm9,%xmm13
2903
2904.byte	102,15,56,220,209
2905.byte	102,15,56,220,217
2906.byte	102,15,56,220,225
2907.byte	102,15,56,220,233
2908	movups	48(%r11),%xmm1
2909
2910.byte	102,15,56,220,208
2911.byte	102,15,56,220,216
2912.byte	102,15,56,220,224
2913.byte	102,15,56,220,232
2914	movups	64(%r11),%xmm0
2915	jmp	.Locb_enc_loop4
2916
2917.align	32
2918.Locb_enc_loop4:
2919.byte	102,15,56,220,209
2920.byte	102,15,56,220,217
2921.byte	102,15,56,220,225
2922.byte	102,15,56,220,233
2923	movups	(%rcx,%rax,1),%xmm1
2924	addq	$32,%rax
2925
2926.byte	102,15,56,220,208
2927.byte	102,15,56,220,216
2928.byte	102,15,56,220,224
2929.byte	102,15,56,220,232
2930	movups	-16(%rcx,%rax,1),%xmm0
2931	jnz	.Locb_enc_loop4
2932
2933.byte	102,15,56,220,209
2934.byte	102,15,56,220,217
2935.byte	102,15,56,220,225
2936.byte	102,15,56,220,233
2937	movups	16(%r11),%xmm1
2938	movq	%r10,%rax
2939
2940.byte	102,65,15,56,221,210
2941.byte	102,65,15,56,221,219
2942.byte	102,65,15,56,221,228
2943.byte	102,65,15,56,221,237
2944	.byte	0xf3,0xc3
2945.size	__ocb_encrypt4,.-__ocb_encrypt4
2946
2947.type	__ocb_encrypt1,@function
2948.align	32
2949__ocb_encrypt1:
2950	pxor	%xmm15,%xmm7
2951	pxor	%xmm9,%xmm7
2952	pxor	%xmm2,%xmm8
2953	pxor	%xmm7,%xmm2
2954	movups	32(%r11),%xmm0
2955
2956.byte	102,15,56,220,209
2957	movups	48(%r11),%xmm1
2958	pxor	%xmm9,%xmm7
2959
2960.byte	102,15,56,220,208
2961	movups	64(%r11),%xmm0
2962	jmp	.Locb_enc_loop1
2963
2964.align	32
2965.Locb_enc_loop1:
2966.byte	102,15,56,220,209
2967	movups	(%rcx,%rax,1),%xmm1
2968	addq	$32,%rax
2969
2970.byte	102,15,56,220,208
2971	movups	-16(%rcx,%rax,1),%xmm0
2972	jnz	.Locb_enc_loop1
2973
2974.byte	102,15,56,220,209
2975	movups	16(%r11),%xmm1
2976	movq	%r10,%rax
2977
2978.byte	102,15,56,221,215
2979	.byte	0xf3,0xc3
2980.size	__ocb_encrypt1,.-__ocb_encrypt1
2981
2982.globl	aes_hw_ocb_decrypt
2983.hidden aes_hw_ocb_decrypt
2984.type	aes_hw_ocb_decrypt,@function
2985.align	32
2986aes_hw_ocb_decrypt:
2987.cfi_startproc
2988	leaq	(%rsp),%rax
2989	pushq	%rbx
2990.cfi_adjust_cfa_offset	8
2991.cfi_offset	%rbx,-16
2992	pushq	%rbp
2993.cfi_adjust_cfa_offset	8
2994.cfi_offset	%rbp,-24
2995	pushq	%r12
2996.cfi_adjust_cfa_offset	8
2997.cfi_offset	%r12,-32
2998	pushq	%r13
2999.cfi_adjust_cfa_offset	8
3000.cfi_offset	%r13,-40
3001	pushq	%r14
3002.cfi_adjust_cfa_offset	8
3003.cfi_offset	%r14,-48
3004	movq	8(%rax),%rbx
3005	movq	8+8(%rax),%rbp
3006
3007	movl	240(%rcx),%r10d
3008	movq	%rcx,%r11
3009	shll	$4,%r10d
3010	movups	(%rcx),%xmm9
3011	movups	16(%rcx,%r10,1),%xmm1
3012
3013	movdqu	(%r9),%xmm15
3014	pxor	%xmm1,%xmm9
3015	pxor	%xmm1,%xmm15
3016
3017	movl	$16+32,%eax
3018	leaq	32(%r11,%r10,1),%rcx
3019	movups	16(%r11),%xmm1
3020	subq	%r10,%rax
3021	movq	%rax,%r10
3022
3023	movdqu	(%rbx),%xmm10
3024	movdqu	(%rbp),%xmm8
3025
3026	testq	$1,%r8
3027	jnz	.Locb_dec_odd
3028
3029	bsfq	%r8,%r12
3030	addq	$1,%r8
3031	shlq	$4,%r12
3032	movdqu	(%rbx,%r12,1),%xmm7
3033	movdqu	(%rdi),%xmm2
3034	leaq	16(%rdi),%rdi
3035
3036	call	__ocb_decrypt1
3037
3038	movdqa	%xmm7,%xmm15
3039	movups	%xmm2,(%rsi)
3040	xorps	%xmm2,%xmm8
3041	leaq	16(%rsi),%rsi
3042	subq	$1,%rdx
3043	jz	.Locb_dec_done
3044
3045.Locb_dec_odd:
3046	leaq	1(%r8),%r12
3047	leaq	3(%r8),%r13
3048	leaq	5(%r8),%r14
3049	leaq	6(%r8),%r8
3050	bsfq	%r12,%r12
3051	bsfq	%r13,%r13
3052	bsfq	%r14,%r14
3053	shlq	$4,%r12
3054	shlq	$4,%r13
3055	shlq	$4,%r14
3056
3057	subq	$6,%rdx
3058	jc	.Locb_dec_short
3059	jmp	.Locb_dec_grandloop
3060
3061.align	32
3062.Locb_dec_grandloop:
3063	movdqu	0(%rdi),%xmm2
3064	movdqu	16(%rdi),%xmm3
3065	movdqu	32(%rdi),%xmm4
3066	movdqu	48(%rdi),%xmm5
3067	movdqu	64(%rdi),%xmm6
3068	movdqu	80(%rdi),%xmm7
3069	leaq	96(%rdi),%rdi
3070
3071	call	__ocb_decrypt6
3072
3073	movups	%xmm2,0(%rsi)
3074	pxor	%xmm2,%xmm8
3075	movups	%xmm3,16(%rsi)
3076	pxor	%xmm3,%xmm8
3077	movups	%xmm4,32(%rsi)
3078	pxor	%xmm4,%xmm8
3079	movups	%xmm5,48(%rsi)
3080	pxor	%xmm5,%xmm8
3081	movups	%xmm6,64(%rsi)
3082	pxor	%xmm6,%xmm8
3083	movups	%xmm7,80(%rsi)
3084	pxor	%xmm7,%xmm8
3085	leaq	96(%rsi),%rsi
3086	subq	$6,%rdx
3087	jnc	.Locb_dec_grandloop
3088
3089.Locb_dec_short:
3090	addq	$6,%rdx
3091	jz	.Locb_dec_done
3092
3093	movdqu	0(%rdi),%xmm2
3094	cmpq	$2,%rdx
3095	jb	.Locb_dec_one
3096	movdqu	16(%rdi),%xmm3
3097	je	.Locb_dec_two
3098
3099	movdqu	32(%rdi),%xmm4
3100	cmpq	$4,%rdx
3101	jb	.Locb_dec_three
3102	movdqu	48(%rdi),%xmm5
3103	je	.Locb_dec_four
3104
3105	movdqu	64(%rdi),%xmm6
3106	pxor	%xmm7,%xmm7
3107
3108	call	__ocb_decrypt6
3109
3110	movdqa	%xmm14,%xmm15
3111	movups	%xmm2,0(%rsi)
3112	pxor	%xmm2,%xmm8
3113	movups	%xmm3,16(%rsi)
3114	pxor	%xmm3,%xmm8
3115	movups	%xmm4,32(%rsi)
3116	pxor	%xmm4,%xmm8
3117	movups	%xmm5,48(%rsi)
3118	pxor	%xmm5,%xmm8
3119	movups	%xmm6,64(%rsi)
3120	pxor	%xmm6,%xmm8
3121
3122	jmp	.Locb_dec_done
3123
3124.align	16
3125.Locb_dec_one:
3126	movdqa	%xmm10,%xmm7
3127
3128	call	__ocb_decrypt1
3129
3130	movdqa	%xmm7,%xmm15
3131	movups	%xmm2,0(%rsi)
3132	xorps	%xmm2,%xmm8
3133	jmp	.Locb_dec_done
3134
3135.align	16
3136.Locb_dec_two:
3137	pxor	%xmm4,%xmm4
3138	pxor	%xmm5,%xmm5
3139
3140	call	__ocb_decrypt4
3141
3142	movdqa	%xmm11,%xmm15
3143	movups	%xmm2,0(%rsi)
3144	xorps	%xmm2,%xmm8
3145	movups	%xmm3,16(%rsi)
3146	xorps	%xmm3,%xmm8
3147
3148	jmp	.Locb_dec_done
3149
3150.align	16
3151.Locb_dec_three:
3152	pxor	%xmm5,%xmm5
3153
3154	call	__ocb_decrypt4
3155
3156	movdqa	%xmm12,%xmm15
3157	movups	%xmm2,0(%rsi)
3158	xorps	%xmm2,%xmm8
3159	movups	%xmm3,16(%rsi)
3160	xorps	%xmm3,%xmm8
3161	movups	%xmm4,32(%rsi)
3162	xorps	%xmm4,%xmm8
3163
3164	jmp	.Locb_dec_done
3165
3166.align	16
3167.Locb_dec_four:
3168	call	__ocb_decrypt4
3169
3170	movdqa	%xmm13,%xmm15
3171	movups	%xmm2,0(%rsi)
3172	pxor	%xmm2,%xmm8
3173	movups	%xmm3,16(%rsi)
3174	pxor	%xmm3,%xmm8
3175	movups	%xmm4,32(%rsi)
3176	pxor	%xmm4,%xmm8
3177	movups	%xmm5,48(%rsi)
3178	pxor	%xmm5,%xmm8
3179
3180.Locb_dec_done:
3181	pxor	%xmm0,%xmm15
3182	movdqu	%xmm8,(%rbp)
3183	movdqu	%xmm15,(%r9)
3184
3185	xorps	%xmm0,%xmm0
3186	pxor	%xmm1,%xmm1
3187	pxor	%xmm2,%xmm2
3188	pxor	%xmm3,%xmm3
3189	pxor	%xmm4,%xmm4
3190	pxor	%xmm5,%xmm5
3191	pxor	%xmm6,%xmm6
3192	pxor	%xmm7,%xmm7
3193	pxor	%xmm8,%xmm8
3194	pxor	%xmm9,%xmm9
3195	pxor	%xmm10,%xmm10
3196	pxor	%xmm11,%xmm11
3197	pxor	%xmm12,%xmm12
3198	pxor	%xmm13,%xmm13
3199	pxor	%xmm14,%xmm14
3200	pxor	%xmm15,%xmm15
3201	leaq	40(%rsp),%rax
3202.cfi_def_cfa	%rax,8
3203	movq	-40(%rax),%r14
3204.cfi_restore	%r14
3205	movq	-32(%rax),%r13
3206.cfi_restore	%r13
3207	movq	-24(%rax),%r12
3208.cfi_restore	%r12
3209	movq	-16(%rax),%rbp
3210.cfi_restore	%rbp
3211	movq	-8(%rax),%rbx
3212.cfi_restore	%rbx
3213	leaq	(%rax),%rsp
3214.cfi_def_cfa_register	%rsp
3215.Locb_dec_epilogue:
3216	.byte	0xf3,0xc3
3217.cfi_endproc
3218.size	aes_hw_ocb_decrypt,.-aes_hw_ocb_decrypt
3219
3220.type	__ocb_decrypt6,@function
3221.align	32
3222__ocb_decrypt6:
3223	pxor	%xmm9,%xmm15
3224	movdqu	(%rbx,%r12,1),%xmm11
3225	movdqa	%xmm10,%xmm12
3226	movdqu	(%rbx,%r13,1),%xmm13
3227	movdqa	%xmm10,%xmm14
3228	pxor	%xmm15,%xmm10
3229	movdqu	(%rbx,%r14,1),%xmm15
3230	pxor	%xmm10,%xmm11
3231	pxor	%xmm10,%xmm2
3232	pxor	%xmm11,%xmm12
3233	pxor	%xmm11,%xmm3
3234	pxor	%xmm12,%xmm13
3235	pxor	%xmm12,%xmm4
3236	pxor	%xmm13,%xmm14
3237	pxor	%xmm13,%xmm5
3238	pxor	%xmm14,%xmm15
3239	pxor	%xmm14,%xmm6
3240	pxor	%xmm15,%xmm7
3241	movups	32(%r11),%xmm0
3242
3243	leaq	1(%r8),%r12
3244	leaq	3(%r8),%r13
3245	leaq	5(%r8),%r14
3246	addq	$6,%r8
3247	pxor	%xmm9,%xmm10
3248	bsfq	%r12,%r12
3249	bsfq	%r13,%r13
3250	bsfq	%r14,%r14
3251
3252.byte	102,15,56,222,209
3253.byte	102,15,56,222,217
3254.byte	102,15,56,222,225
3255.byte	102,15,56,222,233
3256	pxor	%xmm9,%xmm11
3257	pxor	%xmm9,%xmm12
3258.byte	102,15,56,222,241
3259	pxor	%xmm9,%xmm13
3260	pxor	%xmm9,%xmm14
3261.byte	102,15,56,222,249
3262	movups	48(%r11),%xmm1
3263	pxor	%xmm9,%xmm15
3264
3265.byte	102,15,56,222,208
3266.byte	102,15,56,222,216
3267.byte	102,15,56,222,224
3268.byte	102,15,56,222,232
3269.byte	102,15,56,222,240
3270.byte	102,15,56,222,248
3271	movups	64(%r11),%xmm0
3272	shlq	$4,%r12
3273	shlq	$4,%r13
3274	jmp	.Locb_dec_loop6
3275
3276.align	32
3277.Locb_dec_loop6:
3278.byte	102,15,56,222,209
3279.byte	102,15,56,222,217
3280.byte	102,15,56,222,225
3281.byte	102,15,56,222,233
3282.byte	102,15,56,222,241
3283.byte	102,15,56,222,249
3284	movups	(%rcx,%rax,1),%xmm1
3285	addq	$32,%rax
3286
3287.byte	102,15,56,222,208
3288.byte	102,15,56,222,216
3289.byte	102,15,56,222,224
3290.byte	102,15,56,222,232
3291.byte	102,15,56,222,240
3292.byte	102,15,56,222,248
3293	movups	-16(%rcx,%rax,1),%xmm0
3294	jnz	.Locb_dec_loop6
3295
3296.byte	102,15,56,222,209
3297.byte	102,15,56,222,217
3298.byte	102,15,56,222,225
3299.byte	102,15,56,222,233
3300.byte	102,15,56,222,241
3301.byte	102,15,56,222,249
3302	movups	16(%r11),%xmm1
3303	shlq	$4,%r14
3304
3305.byte	102,65,15,56,223,210
3306	movdqu	(%rbx),%xmm10
3307	movq	%r10,%rax
3308.byte	102,65,15,56,223,219
3309.byte	102,65,15,56,223,228
3310.byte	102,65,15,56,223,237
3311.byte	102,65,15,56,223,246
3312.byte	102,65,15,56,223,255
3313	.byte	0xf3,0xc3
3314.size	__ocb_decrypt6,.-__ocb_decrypt6
3315
3316.type	__ocb_decrypt4,@function
3317.align	32
3318__ocb_decrypt4:
3319	pxor	%xmm9,%xmm15
3320	movdqu	(%rbx,%r12,1),%xmm11
3321	movdqa	%xmm10,%xmm12
3322	movdqu	(%rbx,%r13,1),%xmm13
3323	pxor	%xmm15,%xmm10
3324	pxor	%xmm10,%xmm11
3325	pxor	%xmm10,%xmm2
3326	pxor	%xmm11,%xmm12
3327	pxor	%xmm11,%xmm3
3328	pxor	%xmm12,%xmm13
3329	pxor	%xmm12,%xmm4
3330	pxor	%xmm13,%xmm5
3331	movups	32(%r11),%xmm0
3332
3333	pxor	%xmm9,%xmm10
3334	pxor	%xmm9,%xmm11
3335	pxor	%xmm9,%xmm12
3336	pxor	%xmm9,%xmm13
3337
3338.byte	102,15,56,222,209
3339.byte	102,15,56,222,217
3340.byte	102,15,56,222,225
3341.byte	102,15,56,222,233
3342	movups	48(%r11),%xmm1
3343
3344.byte	102,15,56,222,208
3345.byte	102,15,56,222,216
3346.byte	102,15,56,222,224
3347.byte	102,15,56,222,232
3348	movups	64(%r11),%xmm0
3349	jmp	.Locb_dec_loop4
3350
3351.align	32
3352.Locb_dec_loop4:
3353.byte	102,15,56,222,209
3354.byte	102,15,56,222,217
3355.byte	102,15,56,222,225
3356.byte	102,15,56,222,233
3357	movups	(%rcx,%rax,1),%xmm1
3358	addq	$32,%rax
3359
3360.byte	102,15,56,222,208
3361.byte	102,15,56,222,216
3362.byte	102,15,56,222,224
3363.byte	102,15,56,222,232
3364	movups	-16(%rcx,%rax,1),%xmm0
3365	jnz	.Locb_dec_loop4
3366
3367.byte	102,15,56,222,209
3368.byte	102,15,56,222,217
3369.byte	102,15,56,222,225
3370.byte	102,15,56,222,233
3371	movups	16(%r11),%xmm1
3372	movq	%r10,%rax
3373
3374.byte	102,65,15,56,223,210
3375.byte	102,65,15,56,223,219
3376.byte	102,65,15,56,223,228
3377.byte	102,65,15,56,223,237
3378	.byte	0xf3,0xc3
3379.size	__ocb_decrypt4,.-__ocb_decrypt4
3380
3381.type	__ocb_decrypt1,@function
3382.align	32
3383__ocb_decrypt1:
3384	pxor	%xmm15,%xmm7
3385	pxor	%xmm9,%xmm7
3386	pxor	%xmm7,%xmm2
3387	movups	32(%r11),%xmm0
3388
3389.byte	102,15,56,222,209
3390	movups	48(%r11),%xmm1
3391	pxor	%xmm9,%xmm7
3392
3393.byte	102,15,56,222,208
3394	movups	64(%r11),%xmm0
3395	jmp	.Locb_dec_loop1
3396
3397.align	32
3398.Locb_dec_loop1:
3399.byte	102,15,56,222,209
3400	movups	(%rcx,%rax,1),%xmm1
3401	addq	$32,%rax
3402
3403.byte	102,15,56,222,208
3404	movups	-16(%rcx,%rax,1),%xmm0
3405	jnz	.Locb_dec_loop1
3406
3407.byte	102,15,56,222,209
3408	movups	16(%r11),%xmm1
3409	movq	%r10,%rax
3410
3411.byte	102,15,56,223,215
3412	.byte	0xf3,0xc3
3413.size	__ocb_decrypt1,.-__ocb_decrypt1
3414.globl	aes_hw_cbc_encrypt
3415.hidden aes_hw_cbc_encrypt
3416.type	aes_hw_cbc_encrypt,@function
3417.align	16
3418aes_hw_cbc_encrypt:
3419.cfi_startproc
3420	testq	%rdx,%rdx
3421	jz	.Lcbc_ret
3422
3423	movl	240(%rcx),%r10d
3424	movq	%rcx,%r11
3425	testl	%r9d,%r9d
3426	jz	.Lcbc_decrypt
3427
3428	movups	(%r8),%xmm2
3429	movl	%r10d,%eax
3430	cmpq	$16,%rdx
3431	jb	.Lcbc_enc_tail
3432	subq	$16,%rdx
3433	jmp	.Lcbc_enc_loop
3434.align	16
3435.Lcbc_enc_loop:
3436	movups	(%rdi),%xmm3
3437	leaq	16(%rdi),%rdi
3438
3439	movups	(%rcx),%xmm0
3440	movups	16(%rcx),%xmm1
3441	xorps	%xmm0,%xmm3
3442	leaq	32(%rcx),%rcx
3443	xorps	%xmm3,%xmm2
3444.Loop_enc1_15:
3445.byte	102,15,56,220,209
3446	decl	%eax
3447	movups	(%rcx),%xmm1
3448	leaq	16(%rcx),%rcx
3449	jnz	.Loop_enc1_15
3450.byte	102,15,56,221,209
3451	movl	%r10d,%eax
3452	movq	%r11,%rcx
3453	movups	%xmm2,0(%rsi)
3454	leaq	16(%rsi),%rsi
3455	subq	$16,%rdx
3456	jnc	.Lcbc_enc_loop
3457	addq	$16,%rdx
3458	jnz	.Lcbc_enc_tail
3459	pxor	%xmm0,%xmm0
3460	pxor	%xmm1,%xmm1
3461	movups	%xmm2,(%r8)
3462	pxor	%xmm2,%xmm2
3463	pxor	%xmm3,%xmm3
3464	jmp	.Lcbc_ret
3465
3466.Lcbc_enc_tail:
3467	movq	%rdx,%rcx
3468	xchgq	%rdi,%rsi
3469.long	0x9066A4F3
3470	movl	$16,%ecx
3471	subq	%rdx,%rcx
3472	xorl	%eax,%eax
3473.long	0x9066AAF3
3474	leaq	-16(%rdi),%rdi
3475	movl	%r10d,%eax
3476	movq	%rdi,%rsi
3477	movq	%r11,%rcx
3478	xorq	%rdx,%rdx
3479	jmp	.Lcbc_enc_loop
3480
3481.align	16
3482.Lcbc_decrypt:
3483	cmpq	$16,%rdx
3484	jne	.Lcbc_decrypt_bulk
3485
3486
3487
3488	movdqu	(%rdi),%xmm2
3489	movdqu	(%r8),%xmm3
3490	movdqa	%xmm2,%xmm4
3491	movups	(%rcx),%xmm0
3492	movups	16(%rcx),%xmm1
3493	leaq	32(%rcx),%rcx
3494	xorps	%xmm0,%xmm2
3495.Loop_dec1_16:
3496.byte	102,15,56,222,209
3497	decl	%r10d
3498	movups	(%rcx),%xmm1
3499	leaq	16(%rcx),%rcx
3500	jnz	.Loop_dec1_16
3501.byte	102,15,56,223,209
3502	pxor	%xmm0,%xmm0
3503	pxor	%xmm1,%xmm1
3504	movdqu	%xmm4,(%r8)
3505	xorps	%xmm3,%xmm2
3506	pxor	%xmm3,%xmm3
3507	movups	%xmm2,(%rsi)
3508	pxor	%xmm2,%xmm2
3509	jmp	.Lcbc_ret
3510.align	16
3511.Lcbc_decrypt_bulk:
3512	leaq	(%rsp),%r11
3513.cfi_def_cfa_register	%r11
3514	pushq	%rbp
3515.cfi_offset	%rbp,-16
3516	subq	$16,%rsp
3517	andq	$-16,%rsp
3518	movq	%rcx,%rbp
3519	movups	(%r8),%xmm10
3520	movl	%r10d,%eax
3521	cmpq	$0x50,%rdx
3522	jbe	.Lcbc_dec_tail
3523
3524	movups	(%rcx),%xmm0
3525	movdqu	0(%rdi),%xmm2
3526	movdqu	16(%rdi),%xmm3
3527	movdqa	%xmm2,%xmm11
3528	movdqu	32(%rdi),%xmm4
3529	movdqa	%xmm3,%xmm12
3530	movdqu	48(%rdi),%xmm5
3531	movdqa	%xmm4,%xmm13
3532	movdqu	64(%rdi),%xmm6
3533	movdqa	%xmm5,%xmm14
3534	movdqu	80(%rdi),%xmm7
3535	movdqa	%xmm6,%xmm15
3536	leaq	OPENSSL_ia32cap_P(%rip),%r9
3537	movl	4(%r9),%r9d
3538	cmpq	$0x70,%rdx
3539	jbe	.Lcbc_dec_six_or_seven
3540
3541	andl	$71303168,%r9d
3542	subq	$0x50,%rdx
3543	cmpl	$4194304,%r9d
3544	je	.Lcbc_dec_loop6_enter
3545	subq	$0x20,%rdx
3546	leaq	112(%rcx),%rcx
3547	jmp	.Lcbc_dec_loop8_enter
3548.align	16
3549.Lcbc_dec_loop8:
3550	movups	%xmm9,(%rsi)
3551	leaq	16(%rsi),%rsi
3552.Lcbc_dec_loop8_enter:
3553	movdqu	96(%rdi),%xmm8
3554	pxor	%xmm0,%xmm2
3555	movdqu	112(%rdi),%xmm9
3556	pxor	%xmm0,%xmm3
3557	movups	16-112(%rcx),%xmm1
3558	pxor	%xmm0,%xmm4
3559	movq	$-1,%rbp
3560	cmpq	$0x70,%rdx
3561	pxor	%xmm0,%xmm5
3562	pxor	%xmm0,%xmm6
3563	pxor	%xmm0,%xmm7
3564	pxor	%xmm0,%xmm8
3565
3566.byte	102,15,56,222,209
3567	pxor	%xmm0,%xmm9
3568	movups	32-112(%rcx),%xmm0
3569.byte	102,15,56,222,217
3570.byte	102,15,56,222,225
3571.byte	102,15,56,222,233
3572.byte	102,15,56,222,241
3573.byte	102,15,56,222,249
3574.byte	102,68,15,56,222,193
3575	adcq	$0,%rbp
3576	andq	$128,%rbp
3577.byte	102,68,15,56,222,201
3578	addq	%rdi,%rbp
3579	movups	48-112(%rcx),%xmm1
3580.byte	102,15,56,222,208
3581.byte	102,15,56,222,216
3582.byte	102,15,56,222,224
3583.byte	102,15,56,222,232
3584.byte	102,15,56,222,240
3585.byte	102,15,56,222,248
3586.byte	102,68,15,56,222,192
3587.byte	102,68,15,56,222,200
3588	movups	64-112(%rcx),%xmm0
3589	nop
3590.byte	102,15,56,222,209
3591.byte	102,15,56,222,217
3592.byte	102,15,56,222,225
3593.byte	102,15,56,222,233
3594.byte	102,15,56,222,241
3595.byte	102,15,56,222,249
3596.byte	102,68,15,56,222,193
3597.byte	102,68,15,56,222,201
3598	movups	80-112(%rcx),%xmm1
3599	nop
3600.byte	102,15,56,222,208
3601.byte	102,15,56,222,216
3602.byte	102,15,56,222,224
3603.byte	102,15,56,222,232
3604.byte	102,15,56,222,240
3605.byte	102,15,56,222,248
3606.byte	102,68,15,56,222,192
3607.byte	102,68,15,56,222,200
3608	movups	96-112(%rcx),%xmm0
3609	nop
3610.byte	102,15,56,222,209
3611.byte	102,15,56,222,217
3612.byte	102,15,56,222,225
3613.byte	102,15,56,222,233
3614.byte	102,15,56,222,241
3615.byte	102,15,56,222,249
3616.byte	102,68,15,56,222,193
3617.byte	102,68,15,56,222,201
3618	movups	112-112(%rcx),%xmm1
3619	nop
3620.byte	102,15,56,222,208
3621.byte	102,15,56,222,216
3622.byte	102,15,56,222,224
3623.byte	102,15,56,222,232
3624.byte	102,15,56,222,240
3625.byte	102,15,56,222,248
3626.byte	102,68,15,56,222,192
3627.byte	102,68,15,56,222,200
3628	movups	128-112(%rcx),%xmm0
3629	nop
3630.byte	102,15,56,222,209
3631.byte	102,15,56,222,217
3632.byte	102,15,56,222,225
3633.byte	102,15,56,222,233
3634.byte	102,15,56,222,241
3635.byte	102,15,56,222,249
3636.byte	102,68,15,56,222,193
3637.byte	102,68,15,56,222,201
3638	movups	144-112(%rcx),%xmm1
3639	cmpl	$11,%eax
3640.byte	102,15,56,222,208
3641.byte	102,15,56,222,216
3642.byte	102,15,56,222,224
3643.byte	102,15,56,222,232
3644.byte	102,15,56,222,240
3645.byte	102,15,56,222,248
3646.byte	102,68,15,56,222,192
3647.byte	102,68,15,56,222,200
3648	movups	160-112(%rcx),%xmm0
3649	jb	.Lcbc_dec_done
3650.byte	102,15,56,222,209
3651.byte	102,15,56,222,217
3652.byte	102,15,56,222,225
3653.byte	102,15,56,222,233
3654.byte	102,15,56,222,241
3655.byte	102,15,56,222,249
3656.byte	102,68,15,56,222,193
3657.byte	102,68,15,56,222,201
3658	movups	176-112(%rcx),%xmm1
3659	nop
3660.byte	102,15,56,222,208
3661.byte	102,15,56,222,216
3662.byte	102,15,56,222,224
3663.byte	102,15,56,222,232
3664.byte	102,15,56,222,240
3665.byte	102,15,56,222,248
3666.byte	102,68,15,56,222,192
3667.byte	102,68,15,56,222,200
3668	movups	192-112(%rcx),%xmm0
3669	je	.Lcbc_dec_done
3670.byte	102,15,56,222,209
3671.byte	102,15,56,222,217
3672.byte	102,15,56,222,225
3673.byte	102,15,56,222,233
3674.byte	102,15,56,222,241
3675.byte	102,15,56,222,249
3676.byte	102,68,15,56,222,193
3677.byte	102,68,15,56,222,201
3678	movups	208-112(%rcx),%xmm1
3679	nop
3680.byte	102,15,56,222,208
3681.byte	102,15,56,222,216
3682.byte	102,15,56,222,224
3683.byte	102,15,56,222,232
3684.byte	102,15,56,222,240
3685.byte	102,15,56,222,248
3686.byte	102,68,15,56,222,192
3687.byte	102,68,15,56,222,200
3688	movups	224-112(%rcx),%xmm0
3689	jmp	.Lcbc_dec_done
3690.align	16
3691.Lcbc_dec_done:
3692.byte	102,15,56,222,209
3693.byte	102,15,56,222,217
3694	pxor	%xmm0,%xmm10
3695	pxor	%xmm0,%xmm11
3696.byte	102,15,56,222,225
3697.byte	102,15,56,222,233
3698	pxor	%xmm0,%xmm12
3699	pxor	%xmm0,%xmm13
3700.byte	102,15,56,222,241
3701.byte	102,15,56,222,249
3702	pxor	%xmm0,%xmm14
3703	pxor	%xmm0,%xmm15
3704.byte	102,68,15,56,222,193
3705.byte	102,68,15,56,222,201
3706	movdqu	80(%rdi),%xmm1
3707
3708.byte	102,65,15,56,223,210
3709	movdqu	96(%rdi),%xmm10
3710	pxor	%xmm0,%xmm1
3711.byte	102,65,15,56,223,219
3712	pxor	%xmm0,%xmm10
3713	movdqu	112(%rdi),%xmm0
3714.byte	102,65,15,56,223,228
3715	leaq	128(%rdi),%rdi
3716	movdqu	0(%rbp),%xmm11
3717.byte	102,65,15,56,223,237
3718.byte	102,65,15,56,223,246
3719	movdqu	16(%rbp),%xmm12
3720	movdqu	32(%rbp),%xmm13
3721.byte	102,65,15,56,223,255
3722.byte	102,68,15,56,223,193
3723	movdqu	48(%rbp),%xmm14
3724	movdqu	64(%rbp),%xmm15
3725.byte	102,69,15,56,223,202
3726	movdqa	%xmm0,%xmm10
3727	movdqu	80(%rbp),%xmm1
3728	movups	-112(%rcx),%xmm0
3729
3730	movups	%xmm2,(%rsi)
3731	movdqa	%xmm11,%xmm2
3732	movups	%xmm3,16(%rsi)
3733	movdqa	%xmm12,%xmm3
3734	movups	%xmm4,32(%rsi)
3735	movdqa	%xmm13,%xmm4
3736	movups	%xmm5,48(%rsi)
3737	movdqa	%xmm14,%xmm5
3738	movups	%xmm6,64(%rsi)
3739	movdqa	%xmm15,%xmm6
3740	movups	%xmm7,80(%rsi)
3741	movdqa	%xmm1,%xmm7
3742	movups	%xmm8,96(%rsi)
3743	leaq	112(%rsi),%rsi
3744
3745	subq	$0x80,%rdx
3746	ja	.Lcbc_dec_loop8
3747
3748	movaps	%xmm9,%xmm2
3749	leaq	-112(%rcx),%rcx
3750	addq	$0x70,%rdx
3751	jle	.Lcbc_dec_clear_tail_collected
3752	movups	%xmm9,(%rsi)
3753	leaq	16(%rsi),%rsi
3754	cmpq	$0x50,%rdx
3755	jbe	.Lcbc_dec_tail
3756
3757	movaps	%xmm11,%xmm2
3758.Lcbc_dec_six_or_seven:
3759	cmpq	$0x60,%rdx
3760	ja	.Lcbc_dec_seven
3761
3762	movaps	%xmm7,%xmm8
3763	call	_aesni_decrypt6
3764	pxor	%xmm10,%xmm2
3765	movaps	%xmm8,%xmm10
3766	pxor	%xmm11,%xmm3
3767	movdqu	%xmm2,(%rsi)
3768	pxor	%xmm12,%xmm4
3769	movdqu	%xmm3,16(%rsi)
3770	pxor	%xmm3,%xmm3
3771	pxor	%xmm13,%xmm5
3772	movdqu	%xmm4,32(%rsi)
3773	pxor	%xmm4,%xmm4
3774	pxor	%xmm14,%xmm6
3775	movdqu	%xmm5,48(%rsi)
3776	pxor	%xmm5,%xmm5
3777	pxor	%xmm15,%xmm7
3778	movdqu	%xmm6,64(%rsi)
3779	pxor	%xmm6,%xmm6
3780	leaq	80(%rsi),%rsi
3781	movdqa	%xmm7,%xmm2
3782	pxor	%xmm7,%xmm7
3783	jmp	.Lcbc_dec_tail_collected
3784
3785.align	16
3786.Lcbc_dec_seven:
3787	movups	96(%rdi),%xmm8
3788	xorps	%xmm9,%xmm9
3789	call	_aesni_decrypt8
3790	movups	80(%rdi),%xmm9
3791	pxor	%xmm10,%xmm2
3792	movups	96(%rdi),%xmm10
3793	pxor	%xmm11,%xmm3
3794	movdqu	%xmm2,(%rsi)
3795	pxor	%xmm12,%xmm4
3796	movdqu	%xmm3,16(%rsi)
3797	pxor	%xmm3,%xmm3
3798	pxor	%xmm13,%xmm5
3799	movdqu	%xmm4,32(%rsi)
3800	pxor	%xmm4,%xmm4
3801	pxor	%xmm14,%xmm6
3802	movdqu	%xmm5,48(%rsi)
3803	pxor	%xmm5,%xmm5
3804	pxor	%xmm15,%xmm7
3805	movdqu	%xmm6,64(%rsi)
3806	pxor	%xmm6,%xmm6
3807	pxor	%xmm9,%xmm8
3808	movdqu	%xmm7,80(%rsi)
3809	pxor	%xmm7,%xmm7
3810	leaq	96(%rsi),%rsi
3811	movdqa	%xmm8,%xmm2
3812	pxor	%xmm8,%xmm8
3813	pxor	%xmm9,%xmm9
3814	jmp	.Lcbc_dec_tail_collected
3815
3816.align	16
3817.Lcbc_dec_loop6:
3818	movups	%xmm7,(%rsi)
3819	leaq	16(%rsi),%rsi
3820	movdqu	0(%rdi),%xmm2
3821	movdqu	16(%rdi),%xmm3
3822	movdqa	%xmm2,%xmm11
3823	movdqu	32(%rdi),%xmm4
3824	movdqa	%xmm3,%xmm12
3825	movdqu	48(%rdi),%xmm5
3826	movdqa	%xmm4,%xmm13
3827	movdqu	64(%rdi),%xmm6
3828	movdqa	%xmm5,%xmm14
3829	movdqu	80(%rdi),%xmm7
3830	movdqa	%xmm6,%xmm15
3831.Lcbc_dec_loop6_enter:
3832	leaq	96(%rdi),%rdi
3833	movdqa	%xmm7,%xmm8
3834
3835	call	_aesni_decrypt6
3836
3837	pxor	%xmm10,%xmm2
3838	movdqa	%xmm8,%xmm10
3839	pxor	%xmm11,%xmm3
3840	movdqu	%xmm2,(%rsi)
3841	pxor	%xmm12,%xmm4
3842	movdqu	%xmm3,16(%rsi)
3843	pxor	%xmm13,%xmm5
3844	movdqu	%xmm4,32(%rsi)
3845	pxor	%xmm14,%xmm6
3846	movq	%rbp,%rcx
3847	movdqu	%xmm5,48(%rsi)
3848	pxor	%xmm15,%xmm7
3849	movl	%r10d,%eax
3850	movdqu	%xmm6,64(%rsi)
3851	leaq	80(%rsi),%rsi
3852	subq	$0x60,%rdx
3853	ja	.Lcbc_dec_loop6
3854
3855	movdqa	%xmm7,%xmm2
3856	addq	$0x50,%rdx
3857	jle	.Lcbc_dec_clear_tail_collected
3858	movups	%xmm7,(%rsi)
3859	leaq	16(%rsi),%rsi
3860
3861.Lcbc_dec_tail:
3862	movups	(%rdi),%xmm2
3863	subq	$0x10,%rdx
3864	jbe	.Lcbc_dec_one
3865
3866	movups	16(%rdi),%xmm3
3867	movaps	%xmm2,%xmm11
3868	subq	$0x10,%rdx
3869	jbe	.Lcbc_dec_two
3870
3871	movups	32(%rdi),%xmm4
3872	movaps	%xmm3,%xmm12
3873	subq	$0x10,%rdx
3874	jbe	.Lcbc_dec_three
3875
3876	movups	48(%rdi),%xmm5
3877	movaps	%xmm4,%xmm13
3878	subq	$0x10,%rdx
3879	jbe	.Lcbc_dec_four
3880
3881	movups	64(%rdi),%xmm6
3882	movaps	%xmm5,%xmm14
3883	movaps	%xmm6,%xmm15
3884	xorps	%xmm7,%xmm7
3885	call	_aesni_decrypt6
3886	pxor	%xmm10,%xmm2
3887	movaps	%xmm15,%xmm10
3888	pxor	%xmm11,%xmm3
3889	movdqu	%xmm2,(%rsi)
3890	pxor	%xmm12,%xmm4
3891	movdqu	%xmm3,16(%rsi)
3892	pxor	%xmm3,%xmm3
3893	pxor	%xmm13,%xmm5
3894	movdqu	%xmm4,32(%rsi)
3895	pxor	%xmm4,%xmm4
3896	pxor	%xmm14,%xmm6
3897	movdqu	%xmm5,48(%rsi)
3898	pxor	%xmm5,%xmm5
3899	leaq	64(%rsi),%rsi
3900	movdqa	%xmm6,%xmm2
3901	pxor	%xmm6,%xmm6
3902	pxor	%xmm7,%xmm7
3903	subq	$0x10,%rdx
3904	jmp	.Lcbc_dec_tail_collected
3905
3906.align	16
3907.Lcbc_dec_one:
3908	movaps	%xmm2,%xmm11
3909	movups	(%rcx),%xmm0
3910	movups	16(%rcx),%xmm1
3911	leaq	32(%rcx),%rcx
3912	xorps	%xmm0,%xmm2
3913.Loop_dec1_17:
3914.byte	102,15,56,222,209
3915	decl	%eax
3916	movups	(%rcx),%xmm1
3917	leaq	16(%rcx),%rcx
3918	jnz	.Loop_dec1_17
3919.byte	102,15,56,223,209
3920	xorps	%xmm10,%xmm2
3921	movaps	%xmm11,%xmm10
3922	jmp	.Lcbc_dec_tail_collected
3923.align	16
3924.Lcbc_dec_two:
3925	movaps	%xmm3,%xmm12
3926	call	_aesni_decrypt2
3927	pxor	%xmm10,%xmm2
3928	movaps	%xmm12,%xmm10
3929	pxor	%xmm11,%xmm3
3930	movdqu	%xmm2,(%rsi)
3931	movdqa	%xmm3,%xmm2
3932	pxor	%xmm3,%xmm3
3933	leaq	16(%rsi),%rsi
3934	jmp	.Lcbc_dec_tail_collected
3935.align	16
3936.Lcbc_dec_three:
3937	movaps	%xmm4,%xmm13
3938	call	_aesni_decrypt3
3939	pxor	%xmm10,%xmm2
3940	movaps	%xmm13,%xmm10
3941	pxor	%xmm11,%xmm3
3942	movdqu	%xmm2,(%rsi)
3943	pxor	%xmm12,%xmm4
3944	movdqu	%xmm3,16(%rsi)
3945	pxor	%xmm3,%xmm3
3946	movdqa	%xmm4,%xmm2
3947	pxor	%xmm4,%xmm4
3948	leaq	32(%rsi),%rsi
3949	jmp	.Lcbc_dec_tail_collected
3950.align	16
3951.Lcbc_dec_four:
3952	movaps	%xmm5,%xmm14
3953	call	_aesni_decrypt4
3954	pxor	%xmm10,%xmm2
3955	movaps	%xmm14,%xmm10
3956	pxor	%xmm11,%xmm3
3957	movdqu	%xmm2,(%rsi)
3958	pxor	%xmm12,%xmm4
3959	movdqu	%xmm3,16(%rsi)
3960	pxor	%xmm3,%xmm3
3961	pxor	%xmm13,%xmm5
3962	movdqu	%xmm4,32(%rsi)
3963	pxor	%xmm4,%xmm4
3964	movdqa	%xmm5,%xmm2
3965	pxor	%xmm5,%xmm5
3966	leaq	48(%rsi),%rsi
3967	jmp	.Lcbc_dec_tail_collected
3968
3969.align	16
3970.Lcbc_dec_clear_tail_collected:
3971	pxor	%xmm3,%xmm3
3972	pxor	%xmm4,%xmm4
3973	pxor	%xmm5,%xmm5
3974	pxor	%xmm6,%xmm6
3975	pxor	%xmm7,%xmm7
3976	pxor	%xmm8,%xmm8
3977	pxor	%xmm9,%xmm9
3978.Lcbc_dec_tail_collected:
3979	movups	%xmm10,(%r8)
3980	andq	$15,%rdx
3981	jnz	.Lcbc_dec_tail_partial
3982	movups	%xmm2,(%rsi)
3983	pxor	%xmm2,%xmm2
3984	jmp	.Lcbc_dec_ret
3985.align	16
3986.Lcbc_dec_tail_partial:
3987	movaps	%xmm2,(%rsp)
3988	pxor	%xmm2,%xmm2
3989	movq	$16,%rcx
3990	movq	%rsi,%rdi
3991	subq	%rdx,%rcx
3992	leaq	(%rsp),%rsi
3993.long	0x9066A4F3
3994	movdqa	%xmm2,(%rsp)
3995
3996.Lcbc_dec_ret:
3997	xorps	%xmm0,%xmm0
3998	pxor	%xmm1,%xmm1
3999	movq	-8(%r11),%rbp
4000.cfi_restore	%rbp
4001	leaq	(%r11),%rsp
4002.cfi_def_cfa_register	%rsp
4003.Lcbc_ret:
4004	.byte	0xf3,0xc3
4005.cfi_endproc
4006.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
4007.globl	aes_hw_set_decrypt_key
4008.hidden aes_hw_set_decrypt_key
4009.type	aes_hw_set_decrypt_key,@function
4010.align	16
4011aes_hw_set_decrypt_key:
4012.cfi_startproc
4013.byte	0x48,0x83,0xEC,0x08
4014.cfi_adjust_cfa_offset	8
4015	call	__aesni_set_encrypt_key
4016	shll	$4,%esi
4017	testl	%eax,%eax
4018	jnz	.Ldec_key_ret
4019	leaq	16(%rdx,%rsi,1),%rdi
4020
4021	movups	(%rdx),%xmm0
4022	movups	(%rdi),%xmm1
4023	movups	%xmm0,(%rdi)
4024	movups	%xmm1,(%rdx)
4025	leaq	16(%rdx),%rdx
4026	leaq	-16(%rdi),%rdi
4027
4028.Ldec_key_inverse:
4029	movups	(%rdx),%xmm0
4030	movups	(%rdi),%xmm1
4031.byte	102,15,56,219,192
4032.byte	102,15,56,219,201
4033	leaq	16(%rdx),%rdx
4034	leaq	-16(%rdi),%rdi
4035	movups	%xmm0,16(%rdi)
4036	movups	%xmm1,-16(%rdx)
4037	cmpq	%rdx,%rdi
4038	ja	.Ldec_key_inverse
4039
4040	movups	(%rdx),%xmm0
4041.byte	102,15,56,219,192
4042	pxor	%xmm1,%xmm1
4043	movups	%xmm0,(%rdi)
4044	pxor	%xmm0,%xmm0
4045.Ldec_key_ret:
4046	addq	$8,%rsp
4047.cfi_adjust_cfa_offset	-8
4048	.byte	0xf3,0xc3
4049.cfi_endproc
4050.LSEH_end_set_decrypt_key:
4051.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
4052.globl	aes_hw_set_encrypt_key
4053.hidden aes_hw_set_encrypt_key
4054.type	aes_hw_set_encrypt_key,@function
4055.align	16
4056aes_hw_set_encrypt_key:
4057__aesni_set_encrypt_key:
4058.cfi_startproc
4059.byte	0x48,0x83,0xEC,0x08
4060.cfi_adjust_cfa_offset	8
4061	movq	$-1,%rax
4062	testq	%rdi,%rdi
4063	jz	.Lenc_key_ret
4064	testq	%rdx,%rdx
4065	jz	.Lenc_key_ret
4066
4067	movups	(%rdi),%xmm0
4068	xorps	%xmm4,%xmm4
4069	leaq	OPENSSL_ia32cap_P(%rip),%r10
4070	movl	4(%r10),%r10d
4071	andl	$268437504,%r10d
4072	leaq	16(%rdx),%rax
4073	cmpl	$256,%esi
4074	je	.L14rounds
4075	cmpl	$192,%esi
4076	je	.L12rounds
4077	cmpl	$128,%esi
4078	jne	.Lbad_keybits
4079
4080.L10rounds:
4081	movl	$9,%esi
4082	cmpl	$268435456,%r10d
4083	je	.L10rounds_alt
4084
4085	movups	%xmm0,(%rdx)
4086.byte	102,15,58,223,200,1
4087	call	.Lkey_expansion_128_cold
4088.byte	102,15,58,223,200,2
4089	call	.Lkey_expansion_128
4090.byte	102,15,58,223,200,4
4091	call	.Lkey_expansion_128
4092.byte	102,15,58,223,200,8
4093	call	.Lkey_expansion_128
4094.byte	102,15,58,223,200,16
4095	call	.Lkey_expansion_128
4096.byte	102,15,58,223,200,32
4097	call	.Lkey_expansion_128
4098.byte	102,15,58,223,200,64
4099	call	.Lkey_expansion_128
4100.byte	102,15,58,223,200,128
4101	call	.Lkey_expansion_128
4102.byte	102,15,58,223,200,27
4103	call	.Lkey_expansion_128
4104.byte	102,15,58,223,200,54
4105	call	.Lkey_expansion_128
4106	movups	%xmm0,(%rax)
4107	movl	%esi,80(%rax)
4108	xorl	%eax,%eax
4109	jmp	.Lenc_key_ret
4110
4111.align	16
4112.L10rounds_alt:
4113	movdqa	.Lkey_rotate(%rip),%xmm5
4114	movl	$8,%r10d
4115	movdqa	.Lkey_rcon1(%rip),%xmm4
4116	movdqa	%xmm0,%xmm2
4117	movdqu	%xmm0,(%rdx)
4118	jmp	.Loop_key128
4119
4120.align	16
4121.Loop_key128:
4122.byte	102,15,56,0,197
4123.byte	102,15,56,221,196
4124	pslld	$1,%xmm4
4125	leaq	16(%rax),%rax
4126
4127	movdqa	%xmm2,%xmm3
4128	pslldq	$4,%xmm2
4129	pxor	%xmm2,%xmm3
4130	pslldq	$4,%xmm2
4131	pxor	%xmm2,%xmm3
4132	pslldq	$4,%xmm2
4133	pxor	%xmm3,%xmm2
4134
4135	pxor	%xmm2,%xmm0
4136	movdqu	%xmm0,-16(%rax)
4137	movdqa	%xmm0,%xmm2
4138
4139	decl	%r10d
4140	jnz	.Loop_key128
4141
4142	movdqa	.Lkey_rcon1b(%rip),%xmm4
4143
4144.byte	102,15,56,0,197
4145.byte	102,15,56,221,196
4146	pslld	$1,%xmm4
4147
4148	movdqa	%xmm2,%xmm3
4149	pslldq	$4,%xmm2
4150	pxor	%xmm2,%xmm3
4151	pslldq	$4,%xmm2
4152	pxor	%xmm2,%xmm3
4153	pslldq	$4,%xmm2
4154	pxor	%xmm3,%xmm2
4155
4156	pxor	%xmm2,%xmm0
4157	movdqu	%xmm0,(%rax)
4158
4159	movdqa	%xmm0,%xmm2
4160.byte	102,15,56,0,197
4161.byte	102,15,56,221,196
4162
4163	movdqa	%xmm2,%xmm3
4164	pslldq	$4,%xmm2
4165	pxor	%xmm2,%xmm3
4166	pslldq	$4,%xmm2
4167	pxor	%xmm2,%xmm3
4168	pslldq	$4,%xmm2
4169	pxor	%xmm3,%xmm2
4170
4171	pxor	%xmm2,%xmm0
4172	movdqu	%xmm0,16(%rax)
4173
4174	movl	%esi,96(%rax)
4175	xorl	%eax,%eax
4176	jmp	.Lenc_key_ret
4177
4178.align	16
4179.L12rounds:
4180	movq	16(%rdi),%xmm2
4181	movl	$11,%esi
4182	cmpl	$268435456,%r10d
4183	je	.L12rounds_alt
4184
4185	movups	%xmm0,(%rdx)
4186.byte	102,15,58,223,202,1
4187	call	.Lkey_expansion_192a_cold
4188.byte	102,15,58,223,202,2
4189	call	.Lkey_expansion_192b
4190.byte	102,15,58,223,202,4
4191	call	.Lkey_expansion_192a
4192.byte	102,15,58,223,202,8
4193	call	.Lkey_expansion_192b
4194.byte	102,15,58,223,202,16
4195	call	.Lkey_expansion_192a
4196.byte	102,15,58,223,202,32
4197	call	.Lkey_expansion_192b
4198.byte	102,15,58,223,202,64
4199	call	.Lkey_expansion_192a
4200.byte	102,15,58,223,202,128
4201	call	.Lkey_expansion_192b
4202	movups	%xmm0,(%rax)
4203	movl	%esi,48(%rax)
4204	xorq	%rax,%rax
4205	jmp	.Lenc_key_ret
4206
4207.align	16
4208.L12rounds_alt:
4209	movdqa	.Lkey_rotate192(%rip),%xmm5
4210	movdqa	.Lkey_rcon1(%rip),%xmm4
4211	movl	$8,%r10d
4212	movdqu	%xmm0,(%rdx)
4213	jmp	.Loop_key192
4214
4215.align	16
4216.Loop_key192:
4217	movq	%xmm2,0(%rax)
4218	movdqa	%xmm2,%xmm1
4219.byte	102,15,56,0,213
4220.byte	102,15,56,221,212
4221	pslld	$1,%xmm4
4222	leaq	24(%rax),%rax
4223
4224	movdqa	%xmm0,%xmm3
4225	pslldq	$4,%xmm0
4226	pxor	%xmm0,%xmm3
4227	pslldq	$4,%xmm0
4228	pxor	%xmm0,%xmm3
4229	pslldq	$4,%xmm0
4230	pxor	%xmm3,%xmm0
4231
4232	pshufd	$0xff,%xmm0,%xmm3
4233	pxor	%xmm1,%xmm3
4234	pslldq	$4,%xmm1
4235	pxor	%xmm1,%xmm3
4236
4237	pxor	%xmm2,%xmm0
4238	pxor	%xmm3,%xmm2
4239	movdqu	%xmm0,-16(%rax)
4240
4241	decl	%r10d
4242	jnz	.Loop_key192
4243
4244	movl	%esi,32(%rax)
4245	xorl	%eax,%eax
4246	jmp	.Lenc_key_ret
4247
4248.align	16
4249.L14rounds:
4250	movups	16(%rdi),%xmm2
4251	movl	$13,%esi
4252	leaq	16(%rax),%rax
4253	cmpl	$268435456,%r10d
4254	je	.L14rounds_alt
4255
4256	movups	%xmm0,(%rdx)
4257	movups	%xmm2,16(%rdx)
4258.byte	102,15,58,223,202,1
4259	call	.Lkey_expansion_256a_cold
4260.byte	102,15,58,223,200,1
4261	call	.Lkey_expansion_256b
4262.byte	102,15,58,223,202,2
4263	call	.Lkey_expansion_256a
4264.byte	102,15,58,223,200,2
4265	call	.Lkey_expansion_256b
4266.byte	102,15,58,223,202,4
4267	call	.Lkey_expansion_256a
4268.byte	102,15,58,223,200,4
4269	call	.Lkey_expansion_256b
4270.byte	102,15,58,223,202,8
4271	call	.Lkey_expansion_256a
4272.byte	102,15,58,223,200,8
4273	call	.Lkey_expansion_256b
4274.byte	102,15,58,223,202,16
4275	call	.Lkey_expansion_256a
4276.byte	102,15,58,223,200,16
4277	call	.Lkey_expansion_256b
4278.byte	102,15,58,223,202,32
4279	call	.Lkey_expansion_256a
4280.byte	102,15,58,223,200,32
4281	call	.Lkey_expansion_256b
4282.byte	102,15,58,223,202,64
4283	call	.Lkey_expansion_256a
4284	movups	%xmm0,(%rax)
4285	movl	%esi,16(%rax)
4286	xorq	%rax,%rax
4287	jmp	.Lenc_key_ret
4288
4289.align	16
4290.L14rounds_alt:
4291	movdqa	.Lkey_rotate(%rip),%xmm5
4292	movdqa	.Lkey_rcon1(%rip),%xmm4
4293	movl	$7,%r10d
4294	movdqu	%xmm0,0(%rdx)
4295	movdqa	%xmm2,%xmm1
4296	movdqu	%xmm2,16(%rdx)
4297	jmp	.Loop_key256
4298
4299.align	16
4300.Loop_key256:
4301.byte	102,15,56,0,213
4302.byte	102,15,56,221,212
4303
4304	movdqa	%xmm0,%xmm3
4305	pslldq	$4,%xmm0
4306	pxor	%xmm0,%xmm3
4307	pslldq	$4,%xmm0
4308	pxor	%xmm0,%xmm3
4309	pslldq	$4,%xmm0
4310	pxor	%xmm3,%xmm0
4311	pslld	$1,%xmm4
4312
4313	pxor	%xmm2,%xmm0
4314	movdqu	%xmm0,(%rax)
4315
4316	decl	%r10d
4317	jz	.Ldone_key256
4318
4319	pshufd	$0xff,%xmm0,%xmm2
4320	pxor	%xmm3,%xmm3
4321.byte	102,15,56,221,211
4322
4323	movdqa	%xmm1,%xmm3
4324	pslldq	$4,%xmm1
4325	pxor	%xmm1,%xmm3
4326	pslldq	$4,%xmm1
4327	pxor	%xmm1,%xmm3
4328	pslldq	$4,%xmm1
4329	pxor	%xmm3,%xmm1
4330
4331	pxor	%xmm1,%xmm2
4332	movdqu	%xmm2,16(%rax)
4333	leaq	32(%rax),%rax
4334	movdqa	%xmm2,%xmm1
4335
4336	jmp	.Loop_key256
4337
4338.Ldone_key256:
4339	movl	%esi,16(%rax)
4340	xorl	%eax,%eax
4341	jmp	.Lenc_key_ret
4342
4343.align	16
4344.Lbad_keybits:
4345	movq	$-2,%rax
4346.Lenc_key_ret:
4347	pxor	%xmm0,%xmm0
4348	pxor	%xmm1,%xmm1
4349	pxor	%xmm2,%xmm2
4350	pxor	%xmm3,%xmm3
4351	pxor	%xmm4,%xmm4
4352	pxor	%xmm5,%xmm5
4353	addq	$8,%rsp
4354.cfi_adjust_cfa_offset	-8
4355	.byte	0xf3,0xc3
4356.cfi_endproc
4357.LSEH_end_set_encrypt_key:
4358
4359.align	16
4360.Lkey_expansion_128:
4361	movups	%xmm0,(%rax)
4362	leaq	16(%rax),%rax
4363.Lkey_expansion_128_cold:
4364	shufps	$16,%xmm0,%xmm4
4365	xorps	%xmm4,%xmm0
4366	shufps	$140,%xmm0,%xmm4
4367	xorps	%xmm4,%xmm0
4368	shufps	$255,%xmm1,%xmm1
4369	xorps	%xmm1,%xmm0
4370	.byte	0xf3,0xc3
4371
4372.align	16
4373.Lkey_expansion_192a:
4374	movups	%xmm0,(%rax)
4375	leaq	16(%rax),%rax
4376.Lkey_expansion_192a_cold:
4377	movaps	%xmm2,%xmm5
4378.Lkey_expansion_192b_warm:
4379	shufps	$16,%xmm0,%xmm4
4380	movdqa	%xmm2,%xmm3
4381	xorps	%xmm4,%xmm0
4382	shufps	$140,%xmm0,%xmm4
4383	pslldq	$4,%xmm3
4384	xorps	%xmm4,%xmm0
4385	pshufd	$85,%xmm1,%xmm1
4386	pxor	%xmm3,%xmm2
4387	pxor	%xmm1,%xmm0
4388	pshufd	$255,%xmm0,%xmm3
4389	pxor	%xmm3,%xmm2
4390	.byte	0xf3,0xc3
4391
4392.align	16
4393.Lkey_expansion_192b:
4394	movaps	%xmm0,%xmm3
4395	shufps	$68,%xmm0,%xmm5
4396	movups	%xmm5,(%rax)
4397	shufps	$78,%xmm2,%xmm3
4398	movups	%xmm3,16(%rax)
4399	leaq	32(%rax),%rax
4400	jmp	.Lkey_expansion_192b_warm
4401
4402.align	16
4403.Lkey_expansion_256a:
4404	movups	%xmm2,(%rax)
4405	leaq	16(%rax),%rax
4406.Lkey_expansion_256a_cold:
4407	shufps	$16,%xmm0,%xmm4
4408	xorps	%xmm4,%xmm0
4409	shufps	$140,%xmm0,%xmm4
4410	xorps	%xmm4,%xmm0
4411	shufps	$255,%xmm1,%xmm1
4412	xorps	%xmm1,%xmm0
4413	.byte	0xf3,0xc3
4414
4415.align	16
4416.Lkey_expansion_256b:
4417	movups	%xmm0,(%rax)
4418	leaq	16(%rax),%rax
4419
4420	shufps	$16,%xmm2,%xmm4
4421	xorps	%xmm4,%xmm2
4422	shufps	$140,%xmm2,%xmm4
4423	xorps	%xmm4,%xmm2
4424	shufps	$170,%xmm1,%xmm1
4425	xorps	%xmm1,%xmm2
4426	.byte	0xf3,0xc3
4427.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
4428.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4429.align	64
4430.Lbswap_mask:
4431.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4432.Lincrement32:
4433.long	6,6,6,0
4434.Lincrement64:
4435.long	1,0,0,0
4436.Lxts_magic:
4437.long	0x87,0,1,0
4438.Lincrement1:
4439.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4440.Lkey_rotate:
4441.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4442.Lkey_rotate192:
4443.long	0x04070605,0x04070605,0x04070605,0x04070605
4444.Lkey_rcon1:
4445.long	1,1,1,1
4446.Lkey_rcon1b:
4447.long	0x1b,0x1b,0x1b,0x1b
4448
4449.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4450.align	64
4451#endif
4452