1/*
2 * Copyright (c) 2000-2001,2011-2012,2014 Apple Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19/* rijndael-alg-ref.c   v2.0   August '99
20 * Reference ANSI C code
21 * authors: Paulo Barreto
22 *          Vincent Rijmen
23 *
24 */
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "rijndael-alg-ref.h"
31#include <cspdebugging.h>
32
33#define SC	((BC - 4) >> 1)
34
35#include "boxes-ref.h"
36
37static const word8 shifts[3][4][2] = {
38 { { 0, 0 },
39   { 1, 3 },
40   { 2, 2 },
41   { 3, 1 }
42 },
43 { { 0, 0 },
44   { 1, 5 },
45   { 2, 4 },
46   { 3, 3 }
47 },
48 { { 0, 0 },
49   { 1, 7 },
50   { 3, 5 },
51   { 4, 4 }
52 }
53};
54
55#if 	!GLADMAN_AES_128_ENABLE
56
57/* 128 bit key/word shift table in bits */
58static const word8 shifts128[4][2] = {
59 { 0,  0 },
60 { 8,  24 },
61 { 16, 16 },
62 { 24, 8 }
63};
64
65#endif	/* GLADMAN_AES_128_ENABLE */
66
67#if		!AES_MUL_BY_LOOKUP
68/*
69 * Profiling measurements showed that the mul routine is where a large propertion of
70 * the time is spent. Since the first argument to  mul is always one of six
71 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
72 * do the multiplies. This eliminates the need for the log/antilog tables, so
73 * it's only adding one kilobyte of const data. Throughput improvement for this
74 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
75 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
76 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
77 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
78 * on a 500 MHz G4 with 1 MB of L2 cache.
79 */
80
81/*
82 * The mod 255 op in mul is really expensive...
83 *
84 * We know that b <= (254 * 2), so there are only two cases. Either return b,
85 * or return b-255.
86 *
87 * On a G4 this single optimization results in a 24% speedup for encrypt and
88 * a 25% speedup for decrypt.
89 */
90static inline word8 mod255(word32 b)
91{
92	if(b >= 255) {
93		b -= 255;
94	}
95	return b;
96}
97
98word8 mul(word8 a, word8 b) {
99   /* multiply two elements of GF(2^m)
100    * needed for MixColumn and InvMixColumn
101    */
102	if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
103	else return 0;
104}
105#endif	/* !AES_MUL_BY_LOOKUP */
106
107static
108void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
109	/* Exor corresponding text input and round key input bytes
110	 */
111	int i, j;
112
113	for(i = 0; i < 4; i++)
114   		for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
115}
116
117static
118void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
119	/* Row 0 remains unchanged
120	 * The other three rows are shifted a variable amount
121	 */
122	word8 tmp[MAXBC];
123	int i, j;
124
125	for(i = 1; i < 4; i++) {
126		for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
127		for(j = 0; j < BC; j++) a[i][j] = tmp[j];
128	}
129}
130
131static
132void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
133	/* Replace every byte of the input by the byte at that place
134	 * in the nonlinear S-box
135	 */
136	int i, j;
137
138	for(i = 0; i < 4; i++)
139		for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
140}
141
142static
143void MixColumn(word8 a[4][MAXBC], word8 BC) {
144	/* Mix the four bytes of every column in a linear way
145	 */
146	word8 b[4][MAXBC];
147	int i, j;
148
149	for(j = 0; j < BC; j++) {
150		for(i = 0; i < 4; i++) {
151			#if		AES_MUL_BY_LOOKUP
152			b[i][j] = mulBy0x02[a[i][j]]
153				^ mulBy0x03[a[(i + 1) % 4][j]]
154				^ a[(i + 2) % 4][j]
155				^ a[(i + 3) % 4][j];
156			#else
157			b[i][j] = mul(2,a[i][j])
158				^ mul(3,a[(i + 1) % 4][j])
159				^ a[(i + 2) % 4][j]
160				^ a[(i + 3) % 4][j];
161			#endif
162		}
163	}
164	for(i = 0; i < 4; i++) {
165		for(j = 0; j < BC; j++) a[i][j] = b[i][j];
166	}
167}
168
169static
170void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
171	/* Mix the four bytes of every column in a linear way
172	 * This is the opposite operation of Mixcolumn
173	 */
174	word8 b[4][MAXBC];
175	int i, j;
176
177	for(j = 0; j < BC; j++) {
178		for(i = 0; i < 4; i++) {
179			#if		AES_MUL_BY_LOOKUP
180			b[i][j] = mulBy0x0e[a[i][j]]
181				^ mulBy0x0b[a[(i + 1) % 4][j]]
182				^ mulBy0x0d[a[(i + 2) % 4][j]]
183				^ mulBy0x09[a[(i + 3) % 4][j]];
184			#else
185			b[i][j] = mul(0xe,a[i][j])
186				^ mul(0xb,a[(i + 1) % 4][j])
187				^ mul(0xd,a[(i + 2) % 4][j])
188				^ mul(0x9,a[(i + 3) % 4][j]);
189			#endif
190		}
191	}
192	for(i = 0; i < 4; i++) {
193		for(j = 0; j < BC; j++) a[i][j] = b[i][j];
194	}
195}
196
197int rijndaelKeySched (
198	word8 k[4][MAXKC],
199	int keyBits,
200	int blockBits,
201	word8 W[MAXROUNDS+1][4][MAXBC]) {
202
203	/* Calculate the necessary round keys
204	 * The number of calculations depends on keyBits and blockBits
205	 */
206	int KC, BC, ROUNDS;
207	int i, j, t, rconpointer = 0;
208	word8 tk[4][MAXKC];
209
210	switch (keyBits) {
211	case 128: KC = 4; break;
212	case 192: KC = 6; break;
213	case 256: KC = 8; break;
214	default : return (-1);
215	}
216
217	switch (blockBits) {
218	case 128: BC = 4; break;
219	case 192: BC = 6; break;
220	case 256: BC = 8; break;
221	default : return (-2);
222	}
223
224	switch (keyBits >= blockBits ? keyBits : blockBits) {
225	case 128: ROUNDS = 10; break;
226	case 192: ROUNDS = 12; break;
227	case 256: ROUNDS = 14; break;
228	default : return (-3); /* this cannot happen */
229	}
230
231
232	for(j = 0; j < KC; j++)
233		for(i = 0; i < 4; i++)
234			tk[i][j] = k[i][j];
235	t = 0;
236	/* copy values into round key array */
237	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
238		for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
239
240	while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
241		/* calculate new values */
242		for(i = 0; i < 4; i++)
243			tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
244		tk[0][0] ^= rcon[rconpointer++];
245
246		if (KC != 8)
247			for(j = 1; j < KC; j++)
248				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
249		else {
250			for(j = 1; j < KC/2; j++)
251				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
252			for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
253			for(j = KC/2 + 1; j < KC; j++)
254				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
255	}
256	/* copy values into round key array */
257	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
258		for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
259	}
260
261	return 0;
262}
263
264int rijndaelEncrypt (
265	word8 a[4][MAXBC],
266	int keyBits,
267	int blockBits,
268	word8 rk[MAXROUNDS+1][4][MAXBC])
269{
270	/* Encryption of one block, general case.
271	 */
272	int r, BC, ROUNDS;
273
274	switch (blockBits) {
275	case 128: BC = 4; break;
276	case 192: BC = 6; break;
277	case 256: BC = 8; break;
278	default : return (-2);
279	}
280
281	switch (keyBits >= blockBits ? keyBits : blockBits) {
282	case 128: ROUNDS = 10; break;
283	case 192: ROUNDS = 12; break;
284	case 256: ROUNDS = 14; break;
285	default : return (-3); /* this cannot happen */
286	}
287
288	/* begin with a key addition
289	 */
290	KeyAddition(a,rk[0],BC);
291
292	/* ROUNDS-1 ordinary rounds
293	 */
294	for(r = 1; r < ROUNDS; r++) {
295		Substitution(a,S,BC);
296		ShiftRow(a,0,BC);
297		MixColumn(a,BC);
298		KeyAddition(a,rk[r],BC);
299	}
300
301	/* Last round is special: there is no MixColumn
302	 */
303	Substitution(a,S,BC);
304	ShiftRow(a,0,BC);
305	KeyAddition(a,rk[ROUNDS],BC);
306
307	return 0;
308}
309
310int rijndaelDecrypt (
311	word8 a[4][MAXBC],
312	int keyBits,
313	int blockBits,
314	word8 rk[MAXROUNDS+1][4][MAXBC])
315{
316	int r, BC, ROUNDS;
317
318	switch (blockBits) {
319	case 128: BC = 4; break;
320	case 192: BC = 6; break;
321	case 256: BC = 8; break;
322	default : return (-2);
323	}
324
325	switch (keyBits >= blockBits ? keyBits : blockBits) {
326	case 128: ROUNDS = 10; break;
327	case 192: ROUNDS = 12; break;
328	case 256: ROUNDS = 14; break;
329	default : return (-3); /* this cannot happen */
330	}
331
332	/* To decrypt: apply the inverse operations of the encrypt routine,
333	 *             in opposite order
334	 *
335	 * (KeyAddition is an involution: it 's equal to its inverse)
336	 * (the inverse of Substitution with table S is Substitution with the
337	 *  inverse table of S)
338	 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
339	 */
340
341	/* First the special round:
342	 *   without InvMixColumn
343	 *   with extra KeyAddition
344	 */
345	KeyAddition(a,rk[ROUNDS],BC);
346	Substitution(a,Si,BC);
347	ShiftRow(a,1,BC);
348
349	/* ROUNDS-1 ordinary rounds
350	 */
351	for(r = ROUNDS-1; r > 0; r--) {
352		KeyAddition(a,rk[r],BC);
353		InvMixColumn(a,BC);
354		Substitution(a,Si,BC);
355		ShiftRow(a,1,BC);
356	}
357
358	/* End with the extra key addition
359	 */
360
361	KeyAddition(a,rk[0],BC);
362
363	return 0;
364}
365
366#if		!GLADMAN_AES_128_ENABLE
367
368/*
369 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
370 * char array pointers.�The key schedule arrays are easy; they come from
371 * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
372 * Others require manual alignment of a local variable by the caller.
373 */
374
375static inline void KeyAddition128(
376	word8 a[4][BC_128_OPT],
377	word8 rk[4][MAXBC]) {
378
379	/* these casts are endian-independent */
380	((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
381	((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
382	((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
383	((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
384}
385
386static void Substitution128(
387	word8 a[4][BC_128_OPT],
388	const word8 box[256]) {
389	/* Replace every byte of the input by the byte at that place
390	 * in the nonlinear S-box
391	 */
392	int i, j;
393
394	/* still to be optimized - larger S boxes? */
395	for(i = 0; i < 4; i++) {
396		for(j = 0; j < BC_128_OPT; j++) {
397			a[i][j] = box[a[i][j]];
398		}
399	}
400}
401
402#if	defined(__ppc__) && defined(__GNUC__)
403
404static inline void rotateWordLeft(
405	word8 *word,			// known to be word aligned
406	unsigned rotCount)		// in bits
407{
408	word32 lword = *((word32 *)word);
409	asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
410	*((word32 *)word) = lword;
411}
412
413#else
414
415/*
416 * Insert your machine/compiler dependent code here,
417 * or just use this, which works on any platform and compiler
418 * which supports the __attribute__((aligned(4))) directive.
419 */
420static void rotateWordLeft(
421	word8 *word,			// known to be word aligned
422	unsigned rotCount)		// in bits
423{
424	word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
425	unsigned bytes = rotCount / 8;
426
427	tmp[0] = word[bytes     & (BC_128_OPT-1)];
428	tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
429	tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
430	tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
431	*((word32 *)word) = *((word32 *)tmp);
432}
433#endif
434
435static inline void ShiftRow128(
436	word8 a[4][BC_128_OPT],
437	word8 d) {
438	/* Row 0 remains unchanged
439	 * The other three rows are shifted (actually rotated) a variable amount
440	 */
441	int i;
442
443	for(i = 1; i < 4; i++) {
444		rotateWordLeft(a[i], shifts128[i][d]);
445	}
446}
447
448/*
449 * The following two routines are where most of the time is spent in this
450 * module. Further optimization would have to focus here.
451 */
452static void MixColumn128(word8 a[4][BC_128_OPT]) {
453	/* Mix the four bytes of every column in a linear way
454	 */
455	word8 b[4][BC_128_OPT];
456	int i, j;
457
458	for(j = 0; j < BC_128_OPT; j++) {
459		for(i = 0; i < 4; i++) {
460			#if		AES_MUL_BY_LOOKUP
461			b[i][j] = mulBy0x02[a[i][j]]
462				^ mulBy0x03[a[(i + 1) % 4][j]]
463				^ a[(i + 2) % 4][j]
464				^ a[(i + 3) % 4][j];
465			#else
466			b[i][j] = mul(2,a[i][j])
467				^ mul(3,a[(i + 1) % 4][j])
468				^ a[(i + 2) % 4][j]
469				^ a[(i + 3) % 4][j];
470			#endif
471		}
472	}
473	memmove(a, b, 4 * BC_128_OPT);
474}
475
476static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
477	/* Mix the four bytes of every column in a linear way
478	 * This is the opposite operation of Mixcolumn
479	 */
480	word8 b[4][BC_128_OPT];
481	int i, j;
482
483	for(j = 0; j < BC_128_OPT; j++) {
484		for(i = 0; i < 4; i++) {
485			#if		AES_MUL_BY_LOOKUP
486			b[i][j] = mulBy0x0e[a[i][j]]
487				^ mulBy0x0b[a[(i + 1) % 4][j]]
488				^ mulBy0x0d[a[(i + 2) % 4][j]]
489				^ mulBy0x09[a[(i + 3) % 4][j]];
490			#else
491			b[i][j] = mul(0xe,a[i][j])
492				^ mul(0xb,a[(i + 1) % 4][j])
493				^ mul(0xd,a[(i + 2) % 4][j])
494				^ mul(0x9,a[(i + 3) % 4][j]);
495			#endif
496		}
497	}
498	memmove(a, b, 4 * BC_128_OPT);
499}
500
501int rijndaelKeySched128 (
502	word8 k[4][KC_128_OPT],
503	word8 W[MAXROUNDS+1][4][MAXBC]) {
504
505	/* Calculate the necessary round keys
506	 * The number of calculations depends on keyBits and blockBits
507	 */
508	int i, j, t, rconpointer = 0;
509	word8 tk[4][KC_128_OPT];
510	unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
511
512	for(j = 0; j < KC_128_OPT; j++)
513		for(i = 0; i < 4; i++)
514			tk[i][j] = k[i][j];
515	t = 0;
516	/* copy values into round key array */
517	for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
518		for(i = 0; i < 4; i++) {
519			W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
520		}
521	}
522
523	while (t < numSchedRows) {
524		/* while not enough round key material calculated */
525		/* calculate new values */
526		for(i = 0; i < 4; i++) {
527			tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
528		}
529		tk[0][0] ^= rcon[rconpointer++];
530
531		for(j = 1; j < KC_128_OPT; j++) {
532			for(i = 0; i < 4; i++) {
533				tk[i][j] ^= tk[i][j-1];
534			}
535		}
536
537		/* copy values into round key array */
538		for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
539			for(i = 0; i < 4; i++) {
540				W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
541			}
542		}
543	}
544
545	return 0;
546}
547
548int rijndaelEncrypt128 (
549	word8 a[4][BC_128_OPT],
550	word8 rk[MAXROUNDS+1][4][MAXBC])
551{
552	/* Encryption of one block.
553	 */
554	int r;
555
556	/* begin with a key addition
557	 */
558	KeyAddition128(a,rk[0]);
559
560	/* ROUNDS-1 ordinary rounds
561	 */
562	for(r = 1; r < ROUNDS_128_OPT; r++) {
563		Substitution128(a,S);
564		ShiftRow128(a,0);
565		MixColumn128(a);
566		KeyAddition128(a,rk[r]);
567	}
568
569	/* Last round is special: there is no MixColumn
570	 */
571	Substitution128(a,S);
572	ShiftRow128(a,0);
573	KeyAddition128(a,rk[ROUNDS_128_OPT]);
574
575	return 0;
576}
577
578int rijndaelDecrypt128 (
579	word8 a[4][BC_128_OPT],
580	word8 rk[MAXROUNDS+1][4][MAXBC])
581{
582	int r;
583
584	/* To decrypt: apply the inverse operations of the encrypt routine,
585	 *             in opposite order
586	 *
587	 * (KeyAddition is an involution: it 's equal to its inverse)
588	 * (the inverse of Substitution with table S is Substitution with the
589	 *  inverse table of S)
590	 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
591	 */
592
593	/* First the special round:
594	 *   without InvMixColumn
595	 *   with extra KeyAddition
596	 */
597	KeyAddition128(a,rk[ROUNDS_128_OPT]);
598	Substitution128(a,Si);
599	ShiftRow128(a,1);
600
601	/* ROUNDS-1 ordinary rounds
602	 */
603	for(r = ROUNDS_128_OPT-1; r > 0; r--) {
604		KeyAddition128(a,rk[r]);
605		InvMixColumn128(a);
606		Substitution128(a,Si);
607		ShiftRow128(a,1);
608	}
609
610	/* End with the extra key addition
611	 */
612
613	KeyAddition128(a,rk[0]);
614
615	return 0;
616}
617
618#endif		/* !GLADMAN_AES_128_ENABLE */
619
620