• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/arch/x86/crypto/
1/*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 *    Author: Huang Ying <ying.huang@intel.com>
9 *            Vinodh Gopal <vinodh.gopal@intel.com>
10 *            Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include <linux/linkage.h>
19#include <asm/inst.h>
20
21.text
22
23#define STATE1	%xmm0
24#define STATE2	%xmm4
25#define STATE3	%xmm5
26#define STATE4	%xmm6
27#define STATE	STATE1
28#define IN1	%xmm1
29#define IN2	%xmm7
30#define IN3	%xmm8
31#define IN4	%xmm9
32#define IN	IN1
33#define KEY	%xmm2
34#define IV	%xmm3
35#define BSWAP_MASK %xmm10
36#define CTR	%xmm11
37#define INC	%xmm12
38
39#define KEYP	%rdi
40#define OUTP	%rsi
41#define INP	%rdx
42#define LEN	%rcx
43#define IVP	%r8
44#define KLEN	%r9d
45#define T1	%r10
46#define TKEYP	T1
47#define T2	%r11
48#define TCTR_LOW T2
49
50_key_expansion_128:
51_key_expansion_256a:
52	pshufd $0b11111111, %xmm1, %xmm1
53	shufps $0b00010000, %xmm0, %xmm4
54	pxor %xmm4, %xmm0
55	shufps $0b10001100, %xmm0, %xmm4
56	pxor %xmm4, %xmm0
57	pxor %xmm1, %xmm0
58	movaps %xmm0, (%rcx)
59	add $0x10, %rcx
60	ret
61
62_key_expansion_192a:
63	pshufd $0b01010101, %xmm1, %xmm1
64	shufps $0b00010000, %xmm0, %xmm4
65	pxor %xmm4, %xmm0
66	shufps $0b10001100, %xmm0, %xmm4
67	pxor %xmm4, %xmm0
68	pxor %xmm1, %xmm0
69
70	movaps %xmm2, %xmm5
71	movaps %xmm2, %xmm6
72	pslldq $4, %xmm5
73	pshufd $0b11111111, %xmm0, %xmm3
74	pxor %xmm3, %xmm2
75	pxor %xmm5, %xmm2
76
77	movaps %xmm0, %xmm1
78	shufps $0b01000100, %xmm0, %xmm6
79	movaps %xmm6, (%rcx)
80	shufps $0b01001110, %xmm2, %xmm1
81	movaps %xmm1, 16(%rcx)
82	add $0x20, %rcx
83	ret
84
85_key_expansion_192b:
86	pshufd $0b01010101, %xmm1, %xmm1
87	shufps $0b00010000, %xmm0, %xmm4
88	pxor %xmm4, %xmm0
89	shufps $0b10001100, %xmm0, %xmm4
90	pxor %xmm4, %xmm0
91	pxor %xmm1, %xmm0
92
93	movaps %xmm2, %xmm5
94	pslldq $4, %xmm5
95	pshufd $0b11111111, %xmm0, %xmm3
96	pxor %xmm3, %xmm2
97	pxor %xmm5, %xmm2
98
99	movaps %xmm0, (%rcx)
100	add $0x10, %rcx
101	ret
102
103_key_expansion_256b:
104	pshufd $0b10101010, %xmm1, %xmm1
105	shufps $0b00010000, %xmm2, %xmm4
106	pxor %xmm4, %xmm2
107	shufps $0b10001100, %xmm2, %xmm4
108	pxor %xmm4, %xmm2
109	pxor %xmm1, %xmm2
110	movaps %xmm2, (%rcx)
111	add $0x10, %rcx
112	ret
113
114/*
115 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
116 *                   unsigned int key_len)
117 */
118ENTRY(aesni_set_key)
119	movups (%rsi), %xmm0		# user key (first 16 bytes)
120	movaps %xmm0, (%rdi)
121	lea 0x10(%rdi), %rcx		# key addr
122	movl %edx, 480(%rdi)
123	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
124	cmp $24, %dl
125	jb .Lenc_key128
126	je .Lenc_key192
127	movups 0x10(%rsi), %xmm2	# other user key
128	movaps %xmm2, (%rcx)
129	add $0x10, %rcx
130	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
131	call _key_expansion_256a
132	AESKEYGENASSIST 0x1 %xmm0 %xmm1
133	call _key_expansion_256b
134	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
135	call _key_expansion_256a
136	AESKEYGENASSIST 0x2 %xmm0 %xmm1
137	call _key_expansion_256b
138	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
139	call _key_expansion_256a
140	AESKEYGENASSIST 0x4 %xmm0 %xmm1
141	call _key_expansion_256b
142	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
143	call _key_expansion_256a
144	AESKEYGENASSIST 0x8 %xmm0 %xmm1
145	call _key_expansion_256b
146	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
147	call _key_expansion_256a
148	AESKEYGENASSIST 0x10 %xmm0 %xmm1
149	call _key_expansion_256b
150	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
151	call _key_expansion_256a
152	AESKEYGENASSIST 0x20 %xmm0 %xmm1
153	call _key_expansion_256b
154	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
155	call _key_expansion_256a
156	jmp .Ldec_key
157.Lenc_key192:
158	movq 0x10(%rsi), %xmm2		# other user key
159	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
160	call _key_expansion_192a
161	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
162	call _key_expansion_192b
163	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
164	call _key_expansion_192a
165	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
166	call _key_expansion_192b
167	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
168	call _key_expansion_192a
169	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
170	call _key_expansion_192b
171	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
172	call _key_expansion_192a
173	AESKEYGENASSIST 0x80 %xmm2 %xmm1	# round 8
174	call _key_expansion_192b
175	jmp .Ldec_key
176.Lenc_key128:
177	AESKEYGENASSIST 0x1 %xmm0 %xmm1		# round 1
178	call _key_expansion_128
179	AESKEYGENASSIST 0x2 %xmm0 %xmm1		# round 2
180	call _key_expansion_128
181	AESKEYGENASSIST 0x4 %xmm0 %xmm1		# round 3
182	call _key_expansion_128
183	AESKEYGENASSIST 0x8 %xmm0 %xmm1		# round 4
184	call _key_expansion_128
185	AESKEYGENASSIST 0x10 %xmm0 %xmm1	# round 5
186	call _key_expansion_128
187	AESKEYGENASSIST 0x20 %xmm0 %xmm1	# round 6
188	call _key_expansion_128
189	AESKEYGENASSIST 0x40 %xmm0 %xmm1	# round 7
190	call _key_expansion_128
191	AESKEYGENASSIST 0x80 %xmm0 %xmm1	# round 8
192	call _key_expansion_128
193	AESKEYGENASSIST 0x1b %xmm0 %xmm1	# round 9
194	call _key_expansion_128
195	AESKEYGENASSIST 0x36 %xmm0 %xmm1	# round 10
196	call _key_expansion_128
197.Ldec_key:
198	sub $0x10, %rcx
199	movaps (%rdi), %xmm0
200	movaps (%rcx), %xmm1
201	movaps %xmm0, 240(%rcx)
202	movaps %xmm1, 240(%rdi)
203	add $0x10, %rdi
204	lea 240-16(%rcx), %rsi
205.align 4
206.Ldec_key_loop:
207	movaps (%rdi), %xmm0
208	AESIMC %xmm0 %xmm1
209	movaps %xmm1, (%rsi)
210	add $0x10, %rdi
211	sub $0x10, %rsi
212	cmp %rcx, %rdi
213	jb .Ldec_key_loop
214	xor %rax, %rax
215	ret
216
217/*
218 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
219 */
220ENTRY(aesni_enc)
221	movl 480(KEYP), KLEN		# key length
222	movups (INP), STATE		# input
223	call _aesni_enc1
224	movups STATE, (OUTP)		# output
225	ret
226
227/*
228 * _aesni_enc1:		internal ABI
229 * input:
230 *	KEYP:		key struct pointer
231 *	KLEN:		round count
232 *	STATE:		initial state (input)
233 * output:
234 *	STATE:		finial state (output)
235 * changed:
236 *	KEY
237 *	TKEYP (T1)
238 */
239_aesni_enc1:
240	movaps (KEYP), KEY		# key
241	mov KEYP, TKEYP
242	pxor KEY, STATE		# round 0
243	add $0x30, TKEYP
244	cmp $24, KLEN
245	jb .Lenc128
246	lea 0x20(TKEYP), TKEYP
247	je .Lenc192
248	add $0x20, TKEYP
249	movaps -0x60(TKEYP), KEY
250	AESENC KEY STATE
251	movaps -0x50(TKEYP), KEY
252	AESENC KEY STATE
253.align 4
254.Lenc192:
255	movaps -0x40(TKEYP), KEY
256	AESENC KEY STATE
257	movaps -0x30(TKEYP), KEY
258	AESENC KEY STATE
259.align 4
260.Lenc128:
261	movaps -0x20(TKEYP), KEY
262	AESENC KEY STATE
263	movaps -0x10(TKEYP), KEY
264	AESENC KEY STATE
265	movaps (TKEYP), KEY
266	AESENC KEY STATE
267	movaps 0x10(TKEYP), KEY
268	AESENC KEY STATE
269	movaps 0x20(TKEYP), KEY
270	AESENC KEY STATE
271	movaps 0x30(TKEYP), KEY
272	AESENC KEY STATE
273	movaps 0x40(TKEYP), KEY
274	AESENC KEY STATE
275	movaps 0x50(TKEYP), KEY
276	AESENC KEY STATE
277	movaps 0x60(TKEYP), KEY
278	AESENC KEY STATE
279	movaps 0x70(TKEYP), KEY
280	AESENCLAST KEY STATE
281	ret
282
283/*
284 * _aesni_enc4:	internal ABI
285 * input:
286 *	KEYP:		key struct pointer
287 *	KLEN:		round count
288 *	STATE1:		initial state (input)
289 *	STATE2
290 *	STATE3
291 *	STATE4
292 * output:
293 *	STATE1:		finial state (output)
294 *	STATE2
295 *	STATE3
296 *	STATE4
297 * changed:
298 *	KEY
299 *	TKEYP (T1)
300 */
301_aesni_enc4:
302	movaps (KEYP), KEY		# key
303	mov KEYP, TKEYP
304	pxor KEY, STATE1		# round 0
305	pxor KEY, STATE2
306	pxor KEY, STATE3
307	pxor KEY, STATE4
308	add $0x30, TKEYP
309	cmp $24, KLEN
310	jb .L4enc128
311	lea 0x20(TKEYP), TKEYP
312	je .L4enc192
313	add $0x20, TKEYP
314	movaps -0x60(TKEYP), KEY
315	AESENC KEY STATE1
316	AESENC KEY STATE2
317	AESENC KEY STATE3
318	AESENC KEY STATE4
319	movaps -0x50(TKEYP), KEY
320	AESENC KEY STATE1
321	AESENC KEY STATE2
322	AESENC KEY STATE3
323	AESENC KEY STATE4
324#.align 4
325.L4enc192:
326	movaps -0x40(TKEYP), KEY
327	AESENC KEY STATE1
328	AESENC KEY STATE2
329	AESENC KEY STATE3
330	AESENC KEY STATE4
331	movaps -0x30(TKEYP), KEY
332	AESENC KEY STATE1
333	AESENC KEY STATE2
334	AESENC KEY STATE3
335	AESENC KEY STATE4
336#.align 4
337.L4enc128:
338	movaps -0x20(TKEYP), KEY
339	AESENC KEY STATE1
340	AESENC KEY STATE2
341	AESENC KEY STATE3
342	AESENC KEY STATE4
343	movaps -0x10(TKEYP), KEY
344	AESENC KEY STATE1
345	AESENC KEY STATE2
346	AESENC KEY STATE3
347	AESENC KEY STATE4
348	movaps (TKEYP), KEY
349	AESENC KEY STATE1
350	AESENC KEY STATE2
351	AESENC KEY STATE3
352	AESENC KEY STATE4
353	movaps 0x10(TKEYP), KEY
354	AESENC KEY STATE1
355	AESENC KEY STATE2
356	AESENC KEY STATE3
357	AESENC KEY STATE4
358	movaps 0x20(TKEYP), KEY
359	AESENC KEY STATE1
360	AESENC KEY STATE2
361	AESENC KEY STATE3
362	AESENC KEY STATE4
363	movaps 0x30(TKEYP), KEY
364	AESENC KEY STATE1
365	AESENC KEY STATE2
366	AESENC KEY STATE3
367	AESENC KEY STATE4
368	movaps 0x40(TKEYP), KEY
369	AESENC KEY STATE1
370	AESENC KEY STATE2
371	AESENC KEY STATE3
372	AESENC KEY STATE4
373	movaps 0x50(TKEYP), KEY
374	AESENC KEY STATE1
375	AESENC KEY STATE2
376	AESENC KEY STATE3
377	AESENC KEY STATE4
378	movaps 0x60(TKEYP), KEY
379	AESENC KEY STATE1
380	AESENC KEY STATE2
381	AESENC KEY STATE3
382	AESENC KEY STATE4
383	movaps 0x70(TKEYP), KEY
384	AESENCLAST KEY STATE1		# last round
385	AESENCLAST KEY STATE2
386	AESENCLAST KEY STATE3
387	AESENCLAST KEY STATE4
388	ret
389
390/*
391 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
392 */
393ENTRY(aesni_dec)
394	mov 480(KEYP), KLEN		# key length
395	add $240, KEYP
396	movups (INP), STATE		# input
397	call _aesni_dec1
398	movups STATE, (OUTP)		#output
399	ret
400
401/*
402 * _aesni_dec1:		internal ABI
403 * input:
404 *	KEYP:		key struct pointer
405 *	KLEN:		key length
406 *	STATE:		initial state (input)
407 * output:
408 *	STATE:		finial state (output)
409 * changed:
410 *	KEY
411 *	TKEYP (T1)
412 */
413_aesni_dec1:
414	movaps (KEYP), KEY		# key
415	mov KEYP, TKEYP
416	pxor KEY, STATE		# round 0
417	add $0x30, TKEYP
418	cmp $24, KLEN
419	jb .Ldec128
420	lea 0x20(TKEYP), TKEYP
421	je .Ldec192
422	add $0x20, TKEYP
423	movaps -0x60(TKEYP), KEY
424	AESDEC KEY STATE
425	movaps -0x50(TKEYP), KEY
426	AESDEC KEY STATE
427.align 4
428.Ldec192:
429	movaps -0x40(TKEYP), KEY
430	AESDEC KEY STATE
431	movaps -0x30(TKEYP), KEY
432	AESDEC KEY STATE
433.align 4
434.Ldec128:
435	movaps -0x20(TKEYP), KEY
436	AESDEC KEY STATE
437	movaps -0x10(TKEYP), KEY
438	AESDEC KEY STATE
439	movaps (TKEYP), KEY
440	AESDEC KEY STATE
441	movaps 0x10(TKEYP), KEY
442	AESDEC KEY STATE
443	movaps 0x20(TKEYP), KEY
444	AESDEC KEY STATE
445	movaps 0x30(TKEYP), KEY
446	AESDEC KEY STATE
447	movaps 0x40(TKEYP), KEY
448	AESDEC KEY STATE
449	movaps 0x50(TKEYP), KEY
450	AESDEC KEY STATE
451	movaps 0x60(TKEYP), KEY
452	AESDEC KEY STATE
453	movaps 0x70(TKEYP), KEY
454	AESDECLAST KEY STATE
455	ret
456
457/*
458 * _aesni_dec4:	internal ABI
459 * input:
460 *	KEYP:		key struct pointer
461 *	KLEN:		key length
462 *	STATE1:		initial state (input)
463 *	STATE2
464 *	STATE3
465 *	STATE4
466 * output:
467 *	STATE1:		finial state (output)
468 *	STATE2
469 *	STATE3
470 *	STATE4
471 * changed:
472 *	KEY
473 *	TKEYP (T1)
474 */
475_aesni_dec4:
476	movaps (KEYP), KEY		# key
477	mov KEYP, TKEYP
478	pxor KEY, STATE1		# round 0
479	pxor KEY, STATE2
480	pxor KEY, STATE3
481	pxor KEY, STATE4
482	add $0x30, TKEYP
483	cmp $24, KLEN
484	jb .L4dec128
485	lea 0x20(TKEYP), TKEYP
486	je .L4dec192
487	add $0x20, TKEYP
488	movaps -0x60(TKEYP), KEY
489	AESDEC KEY STATE1
490	AESDEC KEY STATE2
491	AESDEC KEY STATE3
492	AESDEC KEY STATE4
493	movaps -0x50(TKEYP), KEY
494	AESDEC KEY STATE1
495	AESDEC KEY STATE2
496	AESDEC KEY STATE3
497	AESDEC KEY STATE4
498.align 4
499.L4dec192:
500	movaps -0x40(TKEYP), KEY
501	AESDEC KEY STATE1
502	AESDEC KEY STATE2
503	AESDEC KEY STATE3
504	AESDEC KEY STATE4
505	movaps -0x30(TKEYP), KEY
506	AESDEC KEY STATE1
507	AESDEC KEY STATE2
508	AESDEC KEY STATE3
509	AESDEC KEY STATE4
510.align 4
511.L4dec128:
512	movaps -0x20(TKEYP), KEY
513	AESDEC KEY STATE1
514	AESDEC KEY STATE2
515	AESDEC KEY STATE3
516	AESDEC KEY STATE4
517	movaps -0x10(TKEYP), KEY
518	AESDEC KEY STATE1
519	AESDEC KEY STATE2
520	AESDEC KEY STATE3
521	AESDEC KEY STATE4
522	movaps (TKEYP), KEY
523	AESDEC KEY STATE1
524	AESDEC KEY STATE2
525	AESDEC KEY STATE3
526	AESDEC KEY STATE4
527	movaps 0x10(TKEYP), KEY
528	AESDEC KEY STATE1
529	AESDEC KEY STATE2
530	AESDEC KEY STATE3
531	AESDEC KEY STATE4
532	movaps 0x20(TKEYP), KEY
533	AESDEC KEY STATE1
534	AESDEC KEY STATE2
535	AESDEC KEY STATE3
536	AESDEC KEY STATE4
537	movaps 0x30(TKEYP), KEY
538	AESDEC KEY STATE1
539	AESDEC KEY STATE2
540	AESDEC KEY STATE3
541	AESDEC KEY STATE4
542	movaps 0x40(TKEYP), KEY
543	AESDEC KEY STATE1
544	AESDEC KEY STATE2
545	AESDEC KEY STATE3
546	AESDEC KEY STATE4
547	movaps 0x50(TKEYP), KEY
548	AESDEC KEY STATE1
549	AESDEC KEY STATE2
550	AESDEC KEY STATE3
551	AESDEC KEY STATE4
552	movaps 0x60(TKEYP), KEY
553	AESDEC KEY STATE1
554	AESDEC KEY STATE2
555	AESDEC KEY STATE3
556	AESDEC KEY STATE4
557	movaps 0x70(TKEYP), KEY
558	AESDECLAST KEY STATE1		# last round
559	AESDECLAST KEY STATE2
560	AESDECLAST KEY STATE3
561	AESDECLAST KEY STATE4
562	ret
563
564/*
565 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
566 *		      size_t len)
567 */
568ENTRY(aesni_ecb_enc)
569	test LEN, LEN		# check length
570	jz .Lecb_enc_ret
571	mov 480(KEYP), KLEN
572	cmp $16, LEN
573	jb .Lecb_enc_ret
574	cmp $64, LEN
575	jb .Lecb_enc_loop1
576.align 4
577.Lecb_enc_loop4:
578	movups (INP), STATE1
579	movups 0x10(INP), STATE2
580	movups 0x20(INP), STATE3
581	movups 0x30(INP), STATE4
582	call _aesni_enc4
583	movups STATE1, (OUTP)
584	movups STATE2, 0x10(OUTP)
585	movups STATE3, 0x20(OUTP)
586	movups STATE4, 0x30(OUTP)
587	sub $64, LEN
588	add $64, INP
589	add $64, OUTP
590	cmp $64, LEN
591	jge .Lecb_enc_loop4
592	cmp $16, LEN
593	jb .Lecb_enc_ret
594.align 4
595.Lecb_enc_loop1:
596	movups (INP), STATE1
597	call _aesni_enc1
598	movups STATE1, (OUTP)
599	sub $16, LEN
600	add $16, INP
601	add $16, OUTP
602	cmp $16, LEN
603	jge .Lecb_enc_loop1
604.Lecb_enc_ret:
605	ret
606
607/*
608 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
609 *		      size_t len);
610 */
611ENTRY(aesni_ecb_dec)
612	test LEN, LEN
613	jz .Lecb_dec_ret
614	mov 480(KEYP), KLEN
615	add $240, KEYP
616	cmp $16, LEN
617	jb .Lecb_dec_ret
618	cmp $64, LEN
619	jb .Lecb_dec_loop1
620.align 4
621.Lecb_dec_loop4:
622	movups (INP), STATE1
623	movups 0x10(INP), STATE2
624	movups 0x20(INP), STATE3
625	movups 0x30(INP), STATE4
626	call _aesni_dec4
627	movups STATE1, (OUTP)
628	movups STATE2, 0x10(OUTP)
629	movups STATE3, 0x20(OUTP)
630	movups STATE4, 0x30(OUTP)
631	sub $64, LEN
632	add $64, INP
633	add $64, OUTP
634	cmp $64, LEN
635	jge .Lecb_dec_loop4
636	cmp $16, LEN
637	jb .Lecb_dec_ret
638.align 4
639.Lecb_dec_loop1:
640	movups (INP), STATE1
641	call _aesni_dec1
642	movups STATE1, (OUTP)
643	sub $16, LEN
644	add $16, INP
645	add $16, OUTP
646	cmp $16, LEN
647	jge .Lecb_dec_loop1
648.Lecb_dec_ret:
649	ret
650
651/*
652 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
653 *		      size_t len, u8 *iv)
654 */
655ENTRY(aesni_cbc_enc)
656	cmp $16, LEN
657	jb .Lcbc_enc_ret
658	mov 480(KEYP), KLEN
659	movups (IVP), STATE	# load iv as initial state
660.align 4
661.Lcbc_enc_loop:
662	movups (INP), IN	# load input
663	pxor IN, STATE
664	call _aesni_enc1
665	movups STATE, (OUTP)	# store output
666	sub $16, LEN
667	add $16, INP
668	add $16, OUTP
669	cmp $16, LEN
670	jge .Lcbc_enc_loop
671	movups STATE, (IVP)
672.Lcbc_enc_ret:
673	ret
674
675/*
676 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
677 *		      size_t len, u8 *iv)
678 */
679ENTRY(aesni_cbc_dec)
680	cmp $16, LEN
681	jb .Lcbc_dec_just_ret
682	mov 480(KEYP), KLEN
683	add $240, KEYP
684	movups (IVP), IV
685	cmp $64, LEN
686	jb .Lcbc_dec_loop1
687.align 4
688.Lcbc_dec_loop4:
689	movups (INP), IN1
690	movaps IN1, STATE1
691	movups 0x10(INP), IN2
692	movaps IN2, STATE2
693	movups 0x20(INP), IN3
694	movaps IN3, STATE3
695	movups 0x30(INP), IN4
696	movaps IN4, STATE4
697	call _aesni_dec4
698	pxor IV, STATE1
699	pxor IN1, STATE2
700	pxor IN2, STATE3
701	pxor IN3, STATE4
702	movaps IN4, IV
703	movups STATE1, (OUTP)
704	movups STATE2, 0x10(OUTP)
705	movups STATE3, 0x20(OUTP)
706	movups STATE4, 0x30(OUTP)
707	sub $64, LEN
708	add $64, INP
709	add $64, OUTP
710	cmp $64, LEN
711	jge .Lcbc_dec_loop4
712	cmp $16, LEN
713	jb .Lcbc_dec_ret
714.align 4
715.Lcbc_dec_loop1:
716	movups (INP), IN
717	movaps IN, STATE
718	call _aesni_dec1
719	pxor IV, STATE
720	movups STATE, (OUTP)
721	movaps IN, IV
722	sub $16, LEN
723	add $16, INP
724	add $16, OUTP
725	cmp $16, LEN
726	jge .Lcbc_dec_loop1
727.Lcbc_dec_ret:
728	movups IV, (IVP)
729.Lcbc_dec_just_ret:
730	ret
731
732.align 16
733.Lbswap_mask:
734	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
735
736/*
737 * _aesni_inc_init:	internal ABI
738 *	setup registers used by _aesni_inc
739 * input:
740 *	IV
741 * output:
742 *	CTR:	== IV, in little endian
743 *	TCTR_LOW: == lower qword of CTR
744 *	INC:	== 1, in little endian
745 *	BSWAP_MASK == endian swapping mask
746 */
747_aesni_inc_init:
748	movaps .Lbswap_mask, BSWAP_MASK
749	movaps IV, CTR
750	PSHUFB_XMM BSWAP_MASK CTR
751	mov $1, TCTR_LOW
752	MOVQ_R64_XMM TCTR_LOW INC
753	MOVQ_R64_XMM CTR TCTR_LOW
754	ret
755
756/*
757 * _aesni_inc:		internal ABI
758 *	Increase IV by 1, IV is in big endian
759 * input:
760 *	IV
761 *	CTR:	== IV, in little endian
762 *	TCTR_LOW: == lower qword of CTR
763 *	INC:	== 1, in little endian
764 *	BSWAP_MASK == endian swapping mask
765 * output:
766 *	IV:	Increase by 1
767 * changed:
768 *	CTR:	== output IV, in little endian
769 *	TCTR_LOW: == lower qword of CTR
770 */
771_aesni_inc:
772	paddq INC, CTR
773	add $1, TCTR_LOW
774	jnc .Linc_low
775	pslldq $8, INC
776	paddq INC, CTR
777	psrldq $8, INC
778.Linc_low:
779	movaps CTR, IV
780	PSHUFB_XMM BSWAP_MASK IV
781	ret
782
783/*
784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
785 *		      size_t len, u8 *iv)
786 */
787ENTRY(aesni_ctr_enc)
788	cmp $16, LEN
789	jb .Lctr_enc_just_ret
790	mov 480(KEYP), KLEN
791	movups (IVP), IV
792	call _aesni_inc_init
793	cmp $64, LEN
794	jb .Lctr_enc_loop1
795.align 4
796.Lctr_enc_loop4:
797	movaps IV, STATE1
798	call _aesni_inc
799	movups (INP), IN1
800	movaps IV, STATE2
801	call _aesni_inc
802	movups 0x10(INP), IN2
803	movaps IV, STATE3
804	call _aesni_inc
805	movups 0x20(INP), IN3
806	movaps IV, STATE4
807	call _aesni_inc
808	movups 0x30(INP), IN4
809	call _aesni_enc4
810	pxor IN1, STATE1
811	movups STATE1, (OUTP)
812	pxor IN2, STATE2
813	movups STATE2, 0x10(OUTP)
814	pxor IN3, STATE3
815	movups STATE3, 0x20(OUTP)
816	pxor IN4, STATE4
817	movups STATE4, 0x30(OUTP)
818	sub $64, LEN
819	add $64, INP
820	add $64, OUTP
821	cmp $64, LEN
822	jge .Lctr_enc_loop4
823	cmp $16, LEN
824	jb .Lctr_enc_ret
825.align 4
826.Lctr_enc_loop1:
827	movaps IV, STATE
828	call _aesni_inc
829	movups (INP), IN
830	call _aesni_enc1
831	pxor IN, STATE
832	movups STATE, (OUTP)
833	sub $16, LEN
834	add $16, INP
835	add $16, OUTP
836	cmp $16, LEN
837	jge .Lctr_enc_loop1
838.Lctr_enc_ret:
839	movups IV, (IVP)
840.Lctr_enc_just_ret:
841	ret
842