1/*	$NetBSD: rijndael-alg-fst.c,v 1.4 2006/09/09 16:22:36 manu Exp $	*/
2
3/*	$KAME: rijndael-alg-fst.c,v 1.1.1.1 2001/08/08 09:56:23 sakane Exp $	*/
4
5/*
6 * rijndael-alg-fst.c   v2.3   April '2000
7 *
8 * Optimised ANSI C code
9 *
10 * authors: v1.0: Antoon Bosselaers
11 *          v2.0: Vincent Rijmen
12 *          v2.3: Paulo Barreto
13 *
14 * This code is placed in the public domain.
15 */
16
17#include "config.h"
18
19#include <sys/cdefs.h>
20#include <sys/types.h>
21#ifdef _KERNEL
22#include <sys/systm.h>
23#else
24#include <string.h>
25#endif
26#include <crypto/rijndael/rijndael-alg-fst.h>
27#include <crypto/rijndael/rijndael_local.h>
28
29#include <crypto/rijndael/boxes-fst.dat>
30
31#include <err.h>
32#define bcopy(a, b, c) memcpy((b), (a), (c))
33#define bzero(a, b) memset((a), 0, (b))
34#define panic(a) err(1, (a))
35
36int rijndaelKeySched(word8 k[MAXKC][4], word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
37	/* Calculate the necessary round keys
38	 * The number of calculations depends on keyBits and blockBits
39	 */
40	int j, r, t, rconpointer = 0;
41	union {
42		word8	x8[MAXKC][4];
43		word32	x32[MAXKC];
44	} xtk;
45#define	tk	xtk.x8
46	int KC = ROUNDS - 6;
47
48	for (j = KC-1; j >= 0; j--) {
49		*((word32*)tk[j]) = *((word32*)k[j]);
50	}
51	r = 0;
52	t = 0;
53	/* copy values into round key array */
54	for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
55		for (; (j < KC) && (t < 4); j++, t++) {
56			*((word32*)W[r][t]) = *((word32*)tk[j]);
57		}
58		if (t == 4) {
59			r++;
60			t = 0;
61		}
62	}
63
64	while (r < ROUNDS + 1) { /* while not enough round key material calculated */
65		/* calculate new values */
66		tk[0][0] ^= S[tk[KC-1][1]];
67		tk[0][1] ^= S[tk[KC-1][2]];
68		tk[0][2] ^= S[tk[KC-1][3]];
69		tk[0][3] ^= S[tk[KC-1][0]];
70		tk[0][0] ^= rcon[rconpointer++];
71
72		if (KC != 8) {
73			for (j = 1; j < KC; j++) {
74				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
75			}
76		} else {
77			for (j = 1; j < KC/2; j++) {
78				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
79			}
80			tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
81			tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
82			tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
83			tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
84			for (j = KC/2 + 1; j < KC; j++) {
85				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
86			}
87		}
88		/* copy values into round key array */
89		for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
90			for (; (j < KC) && (t < 4); j++, t++) {
91				*((word32*)W[r][t]) = *((word32*)tk[j]);
92			}
93			if (t == 4) {
94				r++;
95				t = 0;
96			}
97		}
98	}
99	return 0;
100#undef tk
101}
102
103int rijndaelKeyEncToDec(word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
104	int r;
105	word8 *w;
106
107	for (r = 1; r < ROUNDS; r++) {
108		w = W[r][0];
109		*((word32*)w) =
110			  *((const word32*)U1[w[0]])
111			^ *((const word32*)U2[w[1]])
112			^ *((const word32*)U3[w[2]])
113			^ *((const word32*)U4[w[3]]);
114
115		w = W[r][1];
116		*((word32*)w) =
117			  *((const word32*)U1[w[0]])
118			^ *((const word32*)U2[w[1]])
119			^ *((const word32*)U3[w[2]])
120			^ *((const word32*)U4[w[3]]);
121
122		w = W[r][2];
123		*((word32*)w) =
124			  *((const word32*)U1[w[0]])
125			^ *((const word32*)U2[w[1]])
126			^ *((const word32*)U3[w[2]])
127			^ *((const word32*)U4[w[3]]);
128
129		w = W[r][3];
130		*((word32*)w) =
131			  *((const word32*)U1[w[0]])
132			^ *((const word32*)U2[w[1]])
133			^ *((const word32*)U3[w[2]])
134			^ *((const word32*)U4[w[3]]);
135	}
136	return 0;
137}
138
139/**
140 * Encrypt a single block.
141 */
142int rijndaelEncrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
143	int r;
144	union {
145		word8	x8[16];
146		word32	x32[4];
147	} xa, xb;
148#define	a	xa.x8
149#define	b	xb.x8
150	union {
151		word8	x8[4][4];
152		word32	x32[4];
153	} xtemp;
154#define	temp	xtemp.x8
155
156    memcpy(a, in, sizeof a);
157
158    *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[0][0]);
159    *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[0][1]);
160    *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[0][2]);
161    *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[0][3]);
162    *((word32*)(b    )) = *((const word32*)T1[temp[0][0]])
163					^ *((const word32*)T2[temp[1][1]])
164					^ *((const word32*)T3[temp[2][2]])
165					^ *((const word32*)T4[temp[3][3]]);
166    *((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]])
167					^ *((const word32*)T2[temp[2][1]])
168					^ *((const word32*)T3[temp[3][2]])
169					^ *((const word32*)T4[temp[0][3]]);
170    *((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]])
171					^ *((const word32*)T2[temp[3][1]])
172					^ *((const word32*)T3[temp[0][2]])
173					^ *((const word32*)T4[temp[1][3]]);
174    *((word32*)(b +12)) = *((const word32*)T1[temp[3][0]])
175					^ *((const word32*)T2[temp[0][1]])
176					^ *((const word32*)T3[temp[1][2]])
177					^ *((const word32*)T4[temp[2][3]]);
178	for (r = 1; r < ROUNDS-1; r++) {
179		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
180		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
181		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
182		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
183
184		*((word32*)(b    )) = *((const word32*)T1[temp[0][0]])
185					^ *((const word32*)T2[temp[1][1]])
186					^ *((const word32*)T3[temp[2][2]])
187					^ *((const word32*)T4[temp[3][3]]);
188		*((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]])
189					^ *((const word32*)T2[temp[2][1]])
190					^ *((const word32*)T3[temp[3][2]])
191					^ *((const word32*)T4[temp[0][3]]);
192		*((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]])
193					^ *((const word32*)T2[temp[3][1]])
194					^ *((const word32*)T3[temp[0][2]])
195					^ *((const word32*)T4[temp[1][3]]);
196		*((word32*)(b +12)) = *((const word32*)T1[temp[3][0]])
197					^ *((const word32*)T2[temp[0][1]])
198					^ *((const word32*)T3[temp[1][2]])
199					^ *((const word32*)T4[temp[2][3]]);
200	}
201	/* last round is special */
202	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[ROUNDS-1][0]);
203	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[ROUNDS-1][1]);
204	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[ROUNDS-1][2]);
205	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[ROUNDS-1][3]);
206	b[ 0] = T1[temp[0][0]][1];
207	b[ 1] = T1[temp[1][1]][1];
208	b[ 2] = T1[temp[2][2]][1];
209	b[ 3] = T1[temp[3][3]][1];
210	b[ 4] = T1[temp[1][0]][1];
211	b[ 5] = T1[temp[2][1]][1];
212	b[ 6] = T1[temp[3][2]][1];
213	b[ 7] = T1[temp[0][3]][1];
214	b[ 8] = T1[temp[2][0]][1];
215	b[ 9] = T1[temp[3][1]][1];
216	b[10] = T1[temp[0][2]][1];
217	b[11] = T1[temp[1][3]][1];
218	b[12] = T1[temp[3][0]][1];
219	b[13] = T1[temp[0][1]][1];
220	b[14] = T1[temp[1][2]][1];
221	b[15] = T1[temp[2][3]][1];
222	*((word32*)(b   )) ^= *((word32*)rk[ROUNDS][0]);
223	*((word32*)(b+ 4)) ^= *((word32*)rk[ROUNDS][1]);
224	*((word32*)(b+ 8)) ^= *((word32*)rk[ROUNDS][2]);
225	*((word32*)(b+12)) ^= *((word32*)rk[ROUNDS][3]);
226
227	memcpy(out, b, sizeof b /* XXX out */);
228
229	return 0;
230#undef a
231#undef b
232#undef temp
233}
234
235#ifdef INTERMEDIATE_VALUE_KAT
236/**
237 * Encrypt only a certain number of rounds.
238 * Only used in the Intermediate Value Known Answer Test.
239 */
240int rijndaelEncryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
241	int r;
242	word8 temp[4][4];
243
244	/* make number of rounds sane */
245	if (rounds > ROUNDS) {
246		rounds = ROUNDS;
247	}
248
249	*((word32*)a[0]) = *((word32*)a[0]) ^ *((word32*)rk[0][0]);
250	*((word32*)a[1]) = *((word32*)a[1]) ^ *((word32*)rk[0][1]);
251	*((word32*)a[2]) = *((word32*)a[2]) ^ *((word32*)rk[0][2]);
252	*((word32*)a[3]) = *((word32*)a[3]) ^ *((word32*)rk[0][3]);
253
254	for (r = 1; (r <= rounds) && (r < ROUNDS); r++) {
255		*((word32*)temp[0]) = *((const word32*)T1[a[0][0]])
256					   ^ *((const word32*)T2[a[1][1]])
257					   ^ *((const word32*)T3[a[2][2]])
258					   ^ *((const word32*)T4[a[3][3]]);
259		*((word32*)temp[1]) = *((const word32*)T1[a[1][0]])
260					   ^ *((const word32*)T2[a[2][1]])
261					   ^ *((const word32*)T3[a[3][2]])
262					   ^ *((const word32*)T4[a[0][3]]);
263		*((word32*)temp[2]) = *((const word32*)T1[a[2][0]])
264					   ^ *((const word32*)T2[a[3][1]])
265					   ^ *((const word32*)T3[a[0][2]])
266					   ^ *((const word32*)T4[a[1][3]]);
267		*((word32*)temp[3]) = *((const word32*)T1[a[3][0]])
268					   ^ *((const word32*)T2[a[0][1]])
269					   ^ *((const word32*)T3[a[1][2]])
270					   ^ *((const word32*)T4[a[2][3]]);
271		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[r][0]);
272		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[r][1]);
273		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[r][2]);
274		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[r][3]);
275	}
276	if (rounds == ROUNDS) {
277	   	/* last round is special */
278	   	temp[0][0] = T1[a[0][0]][1];
279	   	temp[0][1] = T1[a[1][1]][1];
280	   	temp[0][2] = T1[a[2][2]][1];
281	   	temp[0][3] = T1[a[3][3]][1];
282	   	temp[1][0] = T1[a[1][0]][1];
283	   	temp[1][1] = T1[a[2][1]][1];
284	   	temp[1][2] = T1[a[3][2]][1];
285	   	temp[1][3] = T1[a[0][3]][1];
286	   	temp[2][0] = T1[a[2][0]][1];
287	   	temp[2][1] = T1[a[3][1]][1];
288	   	temp[2][2] = T1[a[0][2]][1];
289	   	temp[2][3] = T1[a[1][3]][1];
290	   	temp[3][0] = T1[a[3][0]][1];
291	   	temp[3][1] = T1[a[0][1]][1];
292	   	temp[3][2] = T1[a[1][2]][1];
293	   	temp[3][3] = T1[a[2][3]][1];
294		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[ROUNDS][0]);
295		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[ROUNDS][1]);
296		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[ROUNDS][2]);
297		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[ROUNDS][3]);
298	}
299
300	return 0;
301}
302#endif /* INTERMEDIATE_VALUE_KAT */
303
304/**
305 * Decrypt a single block.
306 */
307int rijndaelDecrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
308	int r;
309	union {
310		word8	x8[16];
311		word32	x32[4];
312	} xa, xb;
313#define	a	xa.x8
314#define	b	xb.x8
315	union {
316		word8	x8[4][4];
317		word32	x32[4];
318	} xtemp;
319#define	temp	xtemp.x8
320
321    memcpy(a, in, sizeof a);
322
323    *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[ROUNDS][0]);
324    *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[ROUNDS][1]);
325    *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[ROUNDS][2]);
326    *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[ROUNDS][3]);
327
328    *((word32*)(b   )) = *((const word32*)T5[temp[0][0]])
329           ^ *((const word32*)T6[temp[3][1]])
330           ^ *((const word32*)T7[temp[2][2]])
331           ^ *((const word32*)T8[temp[1][3]]);
332	*((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]])
333           ^ *((const word32*)T6[temp[0][1]])
334           ^ *((const word32*)T7[temp[3][2]])
335           ^ *((const word32*)T8[temp[2][3]]);
336	*((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]])
337           ^ *((const word32*)T6[temp[1][1]])
338           ^ *((const word32*)T7[temp[0][2]])
339           ^ *((const word32*)T8[temp[3][3]]);
340	*((word32*)(b+12)) = *((const word32*)T5[temp[3][0]])
341           ^ *((const word32*)T6[temp[2][1]])
342           ^ *((const word32*)T7[temp[1][2]])
343           ^ *((const word32*)T8[temp[0][3]]);
344	for (r = ROUNDS-1; r > 1; r--) {
345		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
346		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
347		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
348		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
349		*((word32*)(b   )) = *((const word32*)T5[temp[0][0]])
350		   ^ *((const word32*)T6[temp[3][1]])
351		   ^ *((const word32*)T7[temp[2][2]])
352		   ^ *((const word32*)T8[temp[1][3]]);
353		*((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]])
354		   ^ *((const word32*)T6[temp[0][1]])
355		   ^ *((const word32*)T7[temp[3][2]])
356		   ^ *((const word32*)T8[temp[2][3]]);
357		*((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]])
358		   ^ *((const word32*)T6[temp[1][1]])
359		   ^ *((const word32*)T7[temp[0][2]])
360		   ^ *((const word32*)T8[temp[3][3]]);
361		*((word32*)(b+12)) = *((const word32*)T5[temp[3][0]])
362		   ^ *((const word32*)T6[temp[2][1]])
363		   ^ *((const word32*)T7[temp[1][2]])
364		   ^ *((const word32*)T8[temp[0][3]]);
365	}
366	/* last round is special */
367	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[1][0]);
368	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[1][1]);
369	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[1][2]);
370	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[1][3]);
371	b[ 0] = S5[temp[0][0]];
372	b[ 1] = S5[temp[3][1]];
373	b[ 2] = S5[temp[2][2]];
374	b[ 3] = S5[temp[1][3]];
375	b[ 4] = S5[temp[1][0]];
376	b[ 5] = S5[temp[0][1]];
377	b[ 6] = S5[temp[3][2]];
378	b[ 7] = S5[temp[2][3]];
379	b[ 8] = S5[temp[2][0]];
380	b[ 9] = S5[temp[1][1]];
381	b[10] = S5[temp[0][2]];
382	b[11] = S5[temp[3][3]];
383	b[12] = S5[temp[3][0]];
384	b[13] = S5[temp[2][1]];
385	b[14] = S5[temp[1][2]];
386	b[15] = S5[temp[0][3]];
387	*((word32*)(b   )) ^= *((word32*)rk[0][0]);
388	*((word32*)(b+ 4)) ^= *((word32*)rk[0][1]);
389	*((word32*)(b+ 8)) ^= *((word32*)rk[0][2]);
390	*((word32*)(b+12)) ^= *((word32*)rk[0][3]);
391
392	memcpy(out, b, sizeof b /* XXX out */);
393
394	return 0;
395#undef a
396#undef b
397#undef temp
398}
399
400
401#ifdef INTERMEDIATE_VALUE_KAT
402/**
403 * Decrypt only a certain number of rounds.
404 * Only used in the Intermediate Value Known Answer Test.
405 * Operations rearranged such that the intermediate values
406 * of decryption correspond with the intermediate values
407 * of encryption.
408 */
409int rijndaelDecryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
410	int r, i;
411	word8 temp[4], shift;
412
413	/* make number of rounds sane */
414	if (rounds > ROUNDS) {
415		rounds = ROUNDS;
416	}
417    /* first round is special: */
418	*(word32 *)a[0] ^= *(word32 *)rk[ROUNDS][0];
419	*(word32 *)a[1] ^= *(word32 *)rk[ROUNDS][1];
420	*(word32 *)a[2] ^= *(word32 *)rk[ROUNDS][2];
421	*(word32 *)a[3] ^= *(word32 *)rk[ROUNDS][3];
422	for (i = 0; i < 4; i++) {
423		a[i][0] = Si[a[i][0]];
424		a[i][1] = Si[a[i][1]];
425		a[i][2] = Si[a[i][2]];
426		a[i][3] = Si[a[i][3]];
427	}
428	for (i = 1; i < 4; i++) {
429		shift = (4 - i) & 3;
430		temp[0] = a[(0 + shift) & 3][i];
431		temp[1] = a[(1 + shift) & 3][i];
432		temp[2] = a[(2 + shift) & 3][i];
433		temp[3] = a[(3 + shift) & 3][i];
434		a[0][i] = temp[0];
435		a[1][i] = temp[1];
436		a[2][i] = temp[2];
437		a[3][i] = temp[3];
438	}
439	/* ROUNDS-1 ordinary rounds */
440	for (r = ROUNDS-1; r > rounds; r--) {
441		*(word32 *)a[0] ^= *(word32 *)rk[r][0];
442		*(word32 *)a[1] ^= *(word32 *)rk[r][1];
443		*(word32 *)a[2] ^= *(word32 *)rk[r][2];
444		*(word32 *)a[3] ^= *(word32 *)rk[r][3];
445
446		*((word32*)a[0]) =
447			  *((const word32*)U1[a[0][0]])
448			^ *((const word32*)U2[a[0][1]])
449			^ *((const word32*)U3[a[0][2]])
450			^ *((const word32*)U4[a[0][3]]);
451
452		*((word32*)a[1]) =
453			  *((const word32*)U1[a[1][0]])
454			^ *((const word32*)U2[a[1][1]])
455			^ *((const word32*)U3[a[1][2]])
456			^ *((const word32*)U4[a[1][3]]);
457
458		*((word32*)a[2]) =
459			  *((const word32*)U1[a[2][0]])
460			^ *((const word32*)U2[a[2][1]])
461			^ *((const word32*)U3[a[2][2]])
462			^ *((const word32*)U4[a[2][3]]);
463
464		*((word32*)a[3]) =
465			  *((const word32*)U1[a[3][0]])
466			^ *((const word32*)U2[a[3][1]])
467			^ *((const word32*)U3[a[3][2]])
468			^ *((const word32*)U4[a[3][3]]);
469		for (i = 0; i < 4; i++) {
470			a[i][0] = Si[a[i][0]];
471			a[i][1] = Si[a[i][1]];
472			a[i][2] = Si[a[i][2]];
473			a[i][3] = Si[a[i][3]];
474		}
475		for (i = 1; i < 4; i++) {
476			shift = (4 - i) & 3;
477			temp[0] = a[(0 + shift) & 3][i];
478			temp[1] = a[(1 + shift) & 3][i];
479			temp[2] = a[(2 + shift) & 3][i];
480			temp[3] = a[(3 + shift) & 3][i];
481			a[0][i] = temp[0];
482			a[1][i] = temp[1];
483			a[2][i] = temp[2];
484			a[3][i] = temp[3];
485		}
486	}
487	if (rounds == 0) {
488		/* End with the extra key addition */
489		*(word32 *)a[0] ^= *(word32 *)rk[0][0];
490		*(word32 *)a[1] ^= *(word32 *)rk[0][1];
491		*(word32 *)a[2] ^= *(word32 *)rk[0][2];
492		*(word32 *)a[3] ^= *(word32 *)rk[0][3];
493	}
494	return 0;
495}
496#endif /* INTERMEDIATE_VALUE_KAT */
497