1// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10//   1. distributions of this source code include the above copyright
11//      notice, this list of conditions and the following disclaimer//
12//
13//   2. distributions in binary form include the above copyright
14//      notice, this list of conditions and the following disclaimer
15//      in the documentation and/or other associated materials//
16//
17//   3. the copyright holder's name is not used to endorse products
18//      built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
39#include <asm/asm-offsets.h>
40
41#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
42
43/* offsets to parameters with one register pushed onto stack */
44#define tfm 8
45#define out_blk 12
46#define in_blk 16
47
48/* offsets in crypto_tfm structure */
49#define ekey (crypto_tfm_ctx_offset + 0)
50#define nrnd (crypto_tfm_ctx_offset + 256)
51#define dkey (crypto_tfm_ctx_offset + 260)
52
53// register mapping for encrypt and decrypt subroutines
54
55#define r0  eax
56#define r1  ebx
57#define r2  ecx
58#define r3  edx
59#define r4  esi
60#define r5  edi
61
62#define eaxl  al
63#define eaxh  ah
64#define ebxl  bl
65#define ebxh  bh
66#define ecxl  cl
67#define ecxh  ch
68#define edxl  dl
69#define edxh  dh
70
71#define _h(reg) reg##h
72#define h(reg) _h(reg)
73
74#define _l(reg) reg##l
75#define l(reg) _l(reg)
76
77// This macro takes a 32-bit word representing a column and uses
78// each of its four bytes to index into four tables of 256 32-bit
79// words to obtain values that are then xored into the appropriate
80// output registers r0, r1, r4 or r5.
81
82// Parameters:
83// table table base address
84//   %1  out_state[0]
85//   %2  out_state[1]
86//   %3  out_state[2]
87//   %4  out_state[3]
88//   idx input register for the round (destroyed)
89//   tmp scratch register for the round
90// sched key schedule
91
92#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
93	movzx   %l(idx),%tmp;			\
94	xor     table(,%tmp,4),%a1;		\
95	movzx   %h(idx),%tmp;			\
96	shr     $16,%idx;			\
97	xor     table+tlen(,%tmp,4),%a2;	\
98	movzx   %l(idx),%tmp;			\
99	movzx   %h(idx),%idx;			\
100	xor     table+2*tlen(,%tmp,4),%a3;	\
101	xor     table+3*tlen(,%idx,4),%a4;
102
103// initialise output registers from the key schedule
104// NB1: original value of a3 is in idx on exit
105// NB2: original values of a1,a2,a4 aren't used
106#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
107	mov     0 sched,%a1;			\
108	movzx   %l(idx),%tmp;			\
109	mov     12 sched,%a2;			\
110	xor     table(,%tmp,4),%a1;		\
111	mov     4 sched,%a4;			\
112	movzx   %h(idx),%tmp;			\
113	shr     $16,%idx;			\
114	xor     table+tlen(,%tmp,4),%a2;	\
115	movzx   %l(idx),%tmp;			\
116	movzx   %h(idx),%idx;			\
117	xor     table+3*tlen(,%idx,4),%a4;	\
118	mov     %a3,%idx;			\
119	mov     8 sched,%a3;			\
120	xor     table+2*tlen(,%tmp,4),%a3;
121
122// initialise output registers from the key schedule
123// NB1: original value of a3 is in idx on exit
124// NB2: original values of a1,a2,a4 aren't used
125#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
126	mov     0 sched,%a1;			\
127	movzx   %l(idx),%tmp;			\
128	mov     4 sched,%a2;			\
129	xor     table(,%tmp,4),%a1;		\
130	mov     12 sched,%a4;			\
131	movzx   %h(idx),%tmp;			\
132	shr     $16,%idx;			\
133	xor     table+tlen(,%tmp,4),%a2;	\
134	movzx   %l(idx),%tmp;			\
135	movzx   %h(idx),%idx;			\
136	xor     table+3*tlen(,%idx,4),%a4;	\
137	mov     %a3,%idx;			\
138	mov     8 sched,%a3;			\
139	xor     table+2*tlen(,%tmp,4),%a3;
140
141
142// original Gladman had conditional saves to MMX regs.
143#define save(a1, a2)		\
144	mov     %a2,4*a1(%esp)
145
146#define restore(a1, a2)		\
147	mov     4*a2(%esp),%a1
148
149// These macros perform a forward encryption cycle. They are entered with
150// the first previous round column values in r0,r1,r4,r5 and
151// exit with the final values in the same registers, using stack
152// for temporary storage.
153
154// round column values
155// on entry: r0,r1,r4,r5
156// on exit:  r2,r1,r4,r5
157#define fwd_rnd1(arg, table)						\
158	save   (0,r1);							\
159	save   (1,r5);							\
160									\
161	/* compute new column values */					\
162	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
163	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
164	restore(r0,0);							\
165	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
166	restore(r0,1);							\
167	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
168
169// round column values
170// on entry: r2,r1,r4,r5
171// on exit:  r0,r1,r4,r5
172#define fwd_rnd2(arg, table)						\
173	save   (0,r1);							\
174	save   (1,r5);							\
175									\
176	/* compute new column values */					\
177	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
178	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
179	restore(r2,0);							\
180	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
181	restore(r2,1);							\
182	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
183
184// These macros performs an inverse encryption cycle. They are entered with
185// the first previous round column values in r0,r1,r4,r5 and
186// exit with the final values in the same registers, using stack
187// for temporary storage
188
189// round column values
190// on entry: r0,r1,r4,r5
191// on exit:  r2,r1,r4,r5
192#define inv_rnd1(arg, table)						\
193	save    (0,r1);							\
194	save    (1,r5);							\
195									\
196	/* compute new column values */					\
197	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
198	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
199	restore(r0,0);							\
200	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
201	restore(r0,1);							\
202	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
203
204// round column values
205// on entry: r2,r1,r4,r5
206// on exit:  r0,r1,r4,r5
207#define inv_rnd2(arg, table)						\
208	save    (0,r1);							\
209	save    (1,r5);							\
210									\
211	/* compute new column values */					\
212	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
213	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
214	restore(r2,0);							\
215	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
216	restore(r2,1);							\
217	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
218
219// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
221
222.global  aes_enc_blk
223
224.extern  ft_tab
225.extern  fl_tab
226
227.align 4
228
229aes_enc_blk:
230	push    %ebp
231	mov     tfm(%esp),%ebp
232
233// CAUTION: the order and the values used in these assigns
234// rely on the register mappings
235
2361:	push    %ebx
237	mov     in_blk+4(%esp),%r2
238	push    %esi
239	mov     nrnd(%ebp),%r3   // number of rounds
240	push    %edi
241#if ekey != 0
242	lea     ekey(%ebp),%ebp  // key pointer
243#endif
244
245// input four columns and xor in first round key
246
247	mov     (%r2),%r0
248	mov     4(%r2),%r1
249	mov     8(%r2),%r4
250	mov     12(%r2),%r5
251	xor     (%ebp),%r0
252	xor     4(%ebp),%r1
253	xor     8(%ebp),%r4
254	xor     12(%ebp),%r5
255
256	sub     $8,%esp		// space for register saves on stack
257	add     $16,%ebp	// increment to next round key
258	cmp     $12,%r3
259	jb      4f		// 10 rounds for 128-bit key
260	lea     32(%ebp),%ebp
261	je      3f		// 12 rounds for 192-bit key
262	lea     32(%ebp),%ebp
263
2642:	fwd_rnd1( -64(%ebp) ,ft_tab)	// 14 rounds for 256-bit key
265	fwd_rnd2( -48(%ebp) ,ft_tab)
2663:	fwd_rnd1( -32(%ebp) ,ft_tab)	// 12 rounds for 192-bit key
267	fwd_rnd2( -16(%ebp) ,ft_tab)
2684:	fwd_rnd1(    (%ebp) ,ft_tab)	// 10 rounds for 128-bit key
269	fwd_rnd2( +16(%ebp) ,ft_tab)
270	fwd_rnd1( +32(%ebp) ,ft_tab)
271	fwd_rnd2( +48(%ebp) ,ft_tab)
272	fwd_rnd1( +64(%ebp) ,ft_tab)
273	fwd_rnd2( +80(%ebp) ,ft_tab)
274	fwd_rnd1( +96(%ebp) ,ft_tab)
275	fwd_rnd2(+112(%ebp) ,ft_tab)
276	fwd_rnd1(+128(%ebp) ,ft_tab)
277	fwd_rnd2(+144(%ebp) ,fl_tab)	// last round uses a different table
278
279// move final values to the output array.  CAUTION: the
280// order of these assigns rely on the register mappings
281
282	add     $8,%esp
283	mov     out_blk+12(%esp),%ebp
284	mov     %r5,12(%ebp)
285	pop     %edi
286	mov     %r4,8(%ebp)
287	pop     %esi
288	mov     %r1,4(%ebp)
289	pop     %ebx
290	mov     %r0,(%ebp)
291	pop     %ebp
292	mov     $1,%eax
293	ret
294
295// AES (Rijndael) Decryption Subroutine
296/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
297
298.global  aes_dec_blk
299
300.extern  it_tab
301.extern  il_tab
302
303.align 4
304
305aes_dec_blk:
306	push    %ebp
307	mov     tfm(%esp),%ebp
308
309// CAUTION: the order and the values used in these assigns
310// rely on the register mappings
311
3121:	push    %ebx
313	mov     in_blk+4(%esp),%r2
314	push    %esi
315	mov     nrnd(%ebp),%r3   // number of rounds
316	push    %edi
317#if dkey != 0
318	lea     dkey(%ebp),%ebp  // key pointer
319#endif
320	mov     %r3,%r0
321	shl     $4,%r0
322	add     %r0,%ebp
323
324// input four columns and xor in first round key
325
326	mov     (%r2),%r0
327	mov     4(%r2),%r1
328	mov     8(%r2),%r4
329	mov     12(%r2),%r5
330	xor     (%ebp),%r0
331	xor     4(%ebp),%r1
332	xor     8(%ebp),%r4
333	xor     12(%ebp),%r5
334
335	sub     $8,%esp		// space for register saves on stack
336	sub     $16,%ebp	// increment to next round key
337	cmp     $12,%r3
338	jb      4f		// 10 rounds for 128-bit key
339	lea     -32(%ebp),%ebp
340	je      3f		// 12 rounds for 192-bit key
341	lea     -32(%ebp),%ebp
342
3432:	inv_rnd1( +64(%ebp), it_tab)	// 14 rounds for 256-bit key
344	inv_rnd2( +48(%ebp), it_tab)
3453:	inv_rnd1( +32(%ebp), it_tab)	// 12 rounds for 192-bit key
346	inv_rnd2( +16(%ebp), it_tab)
3474:	inv_rnd1(    (%ebp), it_tab)	// 10 rounds for 128-bit key
348	inv_rnd2( -16(%ebp), it_tab)
349	inv_rnd1( -32(%ebp), it_tab)
350	inv_rnd2( -48(%ebp), it_tab)
351	inv_rnd1( -64(%ebp), it_tab)
352	inv_rnd2( -80(%ebp), it_tab)
353	inv_rnd1( -96(%ebp), it_tab)
354	inv_rnd2(-112(%ebp), it_tab)
355	inv_rnd1(-128(%ebp), it_tab)
356	inv_rnd2(-144(%ebp), il_tab)	// last round uses a different table
357
358// move final values to the output array.  CAUTION: the
359// order of these assigns rely on the register mappings
360
361	add     $8,%esp
362	mov     out_blk+12(%esp),%ebp
363	mov     %r5,12(%ebp)
364	pop     %edi
365	mov     %r4,8(%ebp)
366	pop     %esi
367	mov     %r1,4(%ebp)
368	pop     %ebx
369	mov     %r0,(%ebp)
370	pop     %ebp
371	mov     $1,%eax
372	ret
373