1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11.extern aes_ft_tab
12.extern aes_it_tab
13.extern aes_fl_tab
14.extern aes_il_tab
15
16.text
17
18#include <asm/asm-offsets.h>
19
20#define BASE crypto_tfm_ctx_offset
21
22#define R1	%rax
23#define R1E	%eax
24#define R1X	%ax
25#define R1H	%ah
26#define R1L	%al
27#define R2	%rbx
28#define R2E	%ebx
29#define R2X	%bx
30#define R2H	%bh
31#define R2L	%bl
32#define R3	%rcx
33#define R3E	%ecx
34#define R3X	%cx
35#define R3H	%ch
36#define R3L	%cl
37#define R4	%rdx
38#define R4E	%edx
39#define R4X	%dx
40#define R4H	%dh
41#define R4L	%dl
42#define R5	%rsi
43#define R5E	%esi
44#define R6	%rdi
45#define R6E	%edi
46#define R7	%rbp
47#define R7E	%ebp
48#define R8	%r8
49#define R9	%r9
50#define R10	%r10
51#define R11	%r11
52
53#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
54	.global	FUNC;			\
55	.type	FUNC,@function;		\
56	.align	8;			\
57FUNC:	movq	r1,r2;			\
58	movq	r3,r4;			\
59	leaq	BASE+KEY+52(r8),r9;	\
60	movq	r10,r11;		\
61	movl	(r7),r5 ## E;		\
62	movl	4(r7),r1 ## E;		\
63	movl	8(r7),r6 ## E;		\
64	movl	12(r7),r7 ## E;		\
65	movl	BASE(r8),r10 ## E;	\
66	xorl	-48(r9),r5 ## E;	\
67	xorl	-44(r9),r1 ## E;	\
68	xorl	-40(r9),r6 ## E;	\
69	xorl	-36(r9),r7 ## E;	\
70	cmpl	$24,r10 ## E;		\
71	jb	B128;			\
72	leaq	32(r9),r9;		\
73	je	B192;			\
74	leaq	32(r9),r9;
75
76#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
77	movq	r1,r2;			\
78	movq	r3,r4;			\
79	movl	r5 ## E,(r9);		\
80	movl	r6 ## E,4(r9);		\
81	movl	r7 ## E,8(r9);		\
82	movl	r8 ## E,12(r9);		\
83	ret;
84
85#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
86	movzbl	r2 ## H,r5 ## E;	\
87	movzbl	r2 ## L,r6 ## E;	\
88	movl	TAB+1024(,r5,4),r5 ## E;\
89	movw	r4 ## X,r2 ## X;	\
90	movl	TAB(,r6,4),r6 ## E;	\
91	roll	$16,r2 ## E;		\
92	shrl	$16,r4 ## E;		\
93	movzbl	r4 ## H,r7 ## E;	\
94	movzbl	r4 ## L,r4 ## E;	\
95	xorl	OFFSET(r8),ra ## E;	\
96	xorl	OFFSET+4(r8),rb ## E;	\
97	xorl	TAB+3072(,r7,4),r5 ## E;\
98	xorl	TAB+2048(,r4,4),r6 ## E;\
99	movzbl	r1 ## L,r7 ## E;	\
100	movzbl	r1 ## H,r4 ## E;	\
101	movl	TAB+1024(,r4,4),r4 ## E;\
102	movw	r3 ## X,r1 ## X;	\
103	roll	$16,r1 ## E;		\
104	shrl	$16,r3 ## E;		\
105	xorl	TAB(,r7,4),r5 ## E;	\
106	movzbl	r3 ## H,r7 ## E;	\
107	movzbl	r3 ## L,r3 ## E;	\
108	xorl	TAB+3072(,r7,4),r4 ## E;\
109	xorl	TAB+2048(,r3,4),r5 ## E;\
110	movzbl	r1 ## H,r7 ## E;	\
111	movzbl	r1 ## L,r3 ## E;	\
112	shrl	$16,r1 ## E;		\
113	xorl	TAB+3072(,r7,4),r6 ## E;\
114	movl	TAB+2048(,r3,4),r3 ## E;\
115	movzbl	r1 ## H,r7 ## E;	\
116	movzbl	r1 ## L,r1 ## E;	\
117	xorl	TAB+1024(,r7,4),r6 ## E;\
118	xorl	TAB(,r1,4),r3 ## E;	\
119	movzbl	r2 ## H,r1 ## E;	\
120	movzbl	r2 ## L,r7 ## E;	\
121	shrl	$16,r2 ## E;		\
122	xorl	TAB+3072(,r1,4),r3 ## E;\
123	xorl	TAB+2048(,r7,4),r4 ## E;\
124	movzbl	r2 ## H,r1 ## E;	\
125	movzbl	r2 ## L,r2 ## E;	\
126	xorl	OFFSET+8(r8),rc ## E;	\
127	xorl	OFFSET+12(r8),rd ## E;	\
128	xorl	TAB+1024(,r1,4),r3 ## E;\
129	xorl	TAB(,r2,4),r4 ## E;
130
131#define move_regs(r1,r2,r3,r4) \
132	movl	r3 ## E,r1 ## E;	\
133	movl	r4 ## E,r2 ## E;
134
135#define entry(FUNC,KEY,B128,B192) \
136	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
137
138#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
139
140#define encrypt_round(TAB,OFFSET) \
141	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
142	move_regs(R1,R2,R5,R6)
143
144#define encrypt_final(TAB,OFFSET) \
145	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
146
147#define decrypt_round(TAB,OFFSET) \
148	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
149	move_regs(R1,R2,R5,R6)
150
151#define decrypt_final(TAB,OFFSET) \
152	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
153
154/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
155
156	entry(aes_enc_blk,0,enc128,enc192)
157	encrypt_round(aes_ft_tab,-96)
158	encrypt_round(aes_ft_tab,-80)
159enc192:	encrypt_round(aes_ft_tab,-64)
160	encrypt_round(aes_ft_tab,-48)
161enc128:	encrypt_round(aes_ft_tab,-32)
162	encrypt_round(aes_ft_tab,-16)
163	encrypt_round(aes_ft_tab,  0)
164	encrypt_round(aes_ft_tab, 16)
165	encrypt_round(aes_ft_tab, 32)
166	encrypt_round(aes_ft_tab, 48)
167	encrypt_round(aes_ft_tab, 64)
168	encrypt_round(aes_ft_tab, 80)
169	encrypt_round(aes_ft_tab, 96)
170	encrypt_final(aes_fl_tab,112)
171	return
172
173/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
174
175	entry(aes_dec_blk,240,dec128,dec192)
176	decrypt_round(aes_it_tab,-96)
177	decrypt_round(aes_it_tab,-80)
178dec192:	decrypt_round(aes_it_tab,-64)
179	decrypt_round(aes_it_tab,-48)
180dec128:	decrypt_round(aes_it_tab,-32)
181	decrypt_round(aes_it_tab,-16)
182	decrypt_round(aes_it_tab,  0)
183	decrypt_round(aes_it_tab, 16)
184	decrypt_round(aes_it_tab, 32)
185	decrypt_round(aes_it_tab, 48)
186	decrypt_round(aes_it_tab, 64)
187	decrypt_round(aes_it_tab, 80)
188	decrypt_round(aes_it_tab, 96)
189	decrypt_final(aes_il_tab,112)
190	return
191