1/* 2 * Copyright (c) 2000-2001,2011-2012,2014 Apple Inc. All Rights Reserved. 3 * 4 * The contents of this file constitute Original Code as defined in and are 5 * subject to the Apple Public Source License Version 1.2 (the 'License'). 6 * You may not use this file except in compliance with the License. Please obtain 7 * a copy of the License at http://www.apple.com/publicsource and read it before 8 * using this file. 9 * 10 * This Original Code and all software distributed under the License are 11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS 12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT 13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the 15 * specific language governing rights and limitations under the License. 16 */ 17 18 19/* rijndael-alg-ref.c v2.0 August '99 20 * Reference ANSI C code 21 * authors: Paulo Barreto 22 * Vincent Rijmen 23 * 24 */ 25 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "rijndael-alg-ref.h" 31#include <cspdebugging.h> 32 33#define SC ((BC - 4) >> 1) 34 35#include "boxes-ref.h" 36 37static const word8 shifts[3][4][2] = { 38 { { 0, 0 }, 39 { 1, 3 }, 40 { 2, 2 }, 41 { 3, 1 } 42 }, 43 { { 0, 0 }, 44 { 1, 5 }, 45 { 2, 4 }, 46 { 3, 3 } 47 }, 48 { { 0, 0 }, 49 { 1, 7 }, 50 { 3, 5 }, 51 { 4, 4 } 52 } 53}; 54 55#if !GLADMAN_AES_128_ENABLE 56 57/* 128 bit key/word shift table in bits */ 58static const word8 shifts128[4][2] = { 59 { 0, 0 }, 60 { 8, 24 }, 61 { 16, 16 }, 62 { 24, 8 } 63}; 64 65#endif /* GLADMAN_AES_128_ENABLE */ 66 67#if !AES_MUL_BY_LOOKUP 68/* 69 * Profiling measurements showed that the mul routine is where a large propertion of 70 * the time is spent. Since the first argument to mul is always one of six 71 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to 72 * do the multiplies. This eliminates the need for the log/antilog tables, so 73 * it's only adding one kilobyte of const data. Throughput improvement for this 74 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized 75 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt 76 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the 77 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken 78 * on a 500 MHz G4 with 1 MB of L2 cache. 79 */ 80 81/* 82 * The mod 255 op in mul is really expensive... 83 * 84 * We know that b <= (254 * 2), so there are only two cases. Either return b, 85 * or return b-255. 86 * 87 * On a G4 this single optimization results in a 24% speedup for encrypt and 88 * a 25% speedup for decrypt. 89 */ 90static inline word8 mod255(word32 b) 91{ 92 if(b >= 255) { 93 b -= 255; 94 } 95 return b; 96} 97 98word8 mul(word8 a, word8 b) { 99 /* multiply two elements of GF(2^m) 100 * needed for MixColumn and InvMixColumn 101 */ 102 if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])]; 103 else return 0; 104} 105#endif /* !AES_MUL_BY_LOOKUP */ 106 107static 108void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) { 109 /* Exor corresponding text input and round key input bytes 110 */ 111 int i, j; 112 113 for(i = 0; i < 4; i++) 114 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j]; 115} 116 117static 118void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) { 119 /* Row 0 remains unchanged 120 * The other three rows are shifted a variable amount 121 */ 122 word8 tmp[MAXBC]; 123 int i, j; 124 125 for(i = 1; i < 4; i++) { 126 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC]; 127 for(j = 0; j < BC; j++) a[i][j] = tmp[j]; 128 } 129} 130 131static 132void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) { 133 /* Replace every byte of the input by the byte at that place 134 * in the nonlinear S-box 135 */ 136 int i, j; 137 138 for(i = 0; i < 4; i++) 139 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ; 140} 141 142static 143void MixColumn(word8 a[4][MAXBC], word8 BC) { 144 /* Mix the four bytes of every column in a linear way 145 */ 146 word8 b[4][MAXBC]; 147 int i, j; 148 149 for(j = 0; j < BC; j++) { 150 for(i = 0; i < 4; i++) { 151 #if AES_MUL_BY_LOOKUP 152 b[i][j] = mulBy0x02[a[i][j]] 153 ^ mulBy0x03[a[(i + 1) % 4][j]] 154 ^ a[(i + 2) % 4][j] 155 ^ a[(i + 3) % 4][j]; 156 #else 157 b[i][j] = mul(2,a[i][j]) 158 ^ mul(3,a[(i + 1) % 4][j]) 159 ^ a[(i + 2) % 4][j] 160 ^ a[(i + 3) % 4][j]; 161 #endif 162 } 163 } 164 for(i = 0; i < 4; i++) { 165 for(j = 0; j < BC; j++) a[i][j] = b[i][j]; 166 } 167} 168 169static 170void InvMixColumn(word8 a[4][MAXBC], word8 BC) { 171 /* Mix the four bytes of every column in a linear way 172 * This is the opposite operation of Mixcolumn 173 */ 174 word8 b[4][MAXBC]; 175 int i, j; 176 177 for(j = 0; j < BC; j++) { 178 for(i = 0; i < 4; i++) { 179 #if AES_MUL_BY_LOOKUP 180 b[i][j] = mulBy0x0e[a[i][j]] 181 ^ mulBy0x0b[a[(i + 1) % 4][j]] 182 ^ mulBy0x0d[a[(i + 2) % 4][j]] 183 ^ mulBy0x09[a[(i + 3) % 4][j]]; 184 #else 185 b[i][j] = mul(0xe,a[i][j]) 186 ^ mul(0xb,a[(i + 1) % 4][j]) 187 ^ mul(0xd,a[(i + 2) % 4][j]) 188 ^ mul(0x9,a[(i + 3) % 4][j]); 189 #endif 190 } 191 } 192 for(i = 0; i < 4; i++) { 193 for(j = 0; j < BC; j++) a[i][j] = b[i][j]; 194 } 195} 196 197int rijndaelKeySched ( 198 word8 k[4][MAXKC], 199 int keyBits, 200 int blockBits, 201 word8 W[MAXROUNDS+1][4][MAXBC]) { 202 203 /* Calculate the necessary round keys 204 * The number of calculations depends on keyBits and blockBits 205 */ 206 int KC, BC, ROUNDS; 207 int i, j, t, rconpointer = 0; 208 word8 tk[4][MAXKC]; 209 210 switch (keyBits) { 211 case 128: KC = 4; break; 212 case 192: KC = 6; break; 213 case 256: KC = 8; break; 214 default : return (-1); 215 } 216 217 switch (blockBits) { 218 case 128: BC = 4; break; 219 case 192: BC = 6; break; 220 case 256: BC = 8; break; 221 default : return (-2); 222 } 223 224 switch (keyBits >= blockBits ? keyBits : blockBits) { 225 case 128: ROUNDS = 10; break; 226 case 192: ROUNDS = 12; break; 227 case 256: ROUNDS = 14; break; 228 default : return (-3); /* this cannot happen */ 229 } 230 231 232 for(j = 0; j < KC; j++) 233 for(i = 0; i < 4; i++) 234 tk[i][j] = k[i][j]; 235 t = 0; 236 /* copy values into round key array */ 237 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 238 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; 239 240 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ 241 /* calculate new values */ 242 for(i = 0; i < 4; i++) 243 tk[i][0] ^= S[tk[(i+1)%4][KC-1]]; 244 tk[0][0] ^= rcon[rconpointer++]; 245 246 if (KC != 8) 247 for(j = 1; j < KC; j++) 248 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; 249 else { 250 for(j = 1; j < KC/2; j++) 251 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; 252 for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]]; 253 for(j = KC/2 + 1; j < KC; j++) 254 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; 255 } 256 /* copy values into round key array */ 257 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 258 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; 259 } 260 261 return 0; 262} 263 264int rijndaelEncrypt ( 265 word8 a[4][MAXBC], 266 int keyBits, 267 int blockBits, 268 word8 rk[MAXROUNDS+1][4][MAXBC]) 269{ 270 /* Encryption of one block, general case. 271 */ 272 int r, BC, ROUNDS; 273 274 switch (blockBits) { 275 case 128: BC = 4; break; 276 case 192: BC = 6; break; 277 case 256: BC = 8; break; 278 default : return (-2); 279 } 280 281 switch (keyBits >= blockBits ? keyBits : blockBits) { 282 case 128: ROUNDS = 10; break; 283 case 192: ROUNDS = 12; break; 284 case 256: ROUNDS = 14; break; 285 default : return (-3); /* this cannot happen */ 286 } 287 288 /* begin with a key addition 289 */ 290 KeyAddition(a,rk[0],BC); 291 292 /* ROUNDS-1 ordinary rounds 293 */ 294 for(r = 1; r < ROUNDS; r++) { 295 Substitution(a,S,BC); 296 ShiftRow(a,0,BC); 297 MixColumn(a,BC); 298 KeyAddition(a,rk[r],BC); 299 } 300 301 /* Last round is special: there is no MixColumn 302 */ 303 Substitution(a,S,BC); 304 ShiftRow(a,0,BC); 305 KeyAddition(a,rk[ROUNDS],BC); 306 307 return 0; 308} 309 310int rijndaelDecrypt ( 311 word8 a[4][MAXBC], 312 int keyBits, 313 int blockBits, 314 word8 rk[MAXROUNDS+1][4][MAXBC]) 315{ 316 int r, BC, ROUNDS; 317 318 switch (blockBits) { 319 case 128: BC = 4; break; 320 case 192: BC = 6; break; 321 case 256: BC = 8; break; 322 default : return (-2); 323 } 324 325 switch (keyBits >= blockBits ? keyBits : blockBits) { 326 case 128: ROUNDS = 10; break; 327 case 192: ROUNDS = 12; break; 328 case 256: ROUNDS = 14; break; 329 default : return (-3); /* this cannot happen */ 330 } 331 332 /* To decrypt: apply the inverse operations of the encrypt routine, 333 * in opposite order 334 * 335 * (KeyAddition is an involution: it 's equal to its inverse) 336 * (the inverse of Substitution with table S is Substitution with the 337 * inverse table of S) 338 * (the inverse of Shiftrow is Shiftrow over a suitable distance) 339 */ 340 341 /* First the special round: 342 * without InvMixColumn 343 * with extra KeyAddition 344 */ 345 KeyAddition(a,rk[ROUNDS],BC); 346 Substitution(a,Si,BC); 347 ShiftRow(a,1,BC); 348 349 /* ROUNDS-1 ordinary rounds 350 */ 351 for(r = ROUNDS-1; r > 0; r--) { 352 KeyAddition(a,rk[r],BC); 353 InvMixColumn(a,BC); 354 Substitution(a,Si,BC); 355 ShiftRow(a,1,BC); 356 } 357 358 /* End with the extra key addition 359 */ 360 361 KeyAddition(a,rk[0],BC); 362 363 return 0; 364} 365 366#if !GLADMAN_AES_128_ENABLE 367 368/* 369 * All of these 128-bit-key-and-block routines require 32-bit word-aligned 370 * char array pointers.�The key schedule arrays are easy; they come from 371 * keyInstance which has a 4-byte-aligned element preceeding the key schedule. 372 * Others require manual alignment of a local variable by the caller. 373 */ 374 375static inline void KeyAddition128( 376 word8 a[4][BC_128_OPT], 377 word8 rk[4][MAXBC]) { 378 379 /* these casts are endian-independent */ 380 ((word32 *)a)[0] ^= *((word32 *)(&rk[0])); 381 ((word32 *)a)[1] ^= *((word32 *)(&rk[1])); 382 ((word32 *)a)[2] ^= *((word32 *)(&rk[2])); 383 ((word32 *)a)[3] ^= *((word32 *)(&rk[3])); 384} 385 386static void Substitution128( 387 word8 a[4][BC_128_OPT], 388 const word8 box[256]) { 389 /* Replace every byte of the input by the byte at that place 390 * in the nonlinear S-box 391 */ 392 int i, j; 393 394 /* still to be optimized - larger S boxes? */ 395 for(i = 0; i < 4; i++) { 396 for(j = 0; j < BC_128_OPT; j++) { 397 a[i][j] = box[a[i][j]]; 398 } 399 } 400} 401 402#if defined(__ppc__) && defined(__GNUC__) 403 404static inline void rotateWordLeft( 405 word8 *word, // known to be word aligned 406 unsigned rotCount) // in bits 407{ 408 word32 lword = *((word32 *)word); 409 asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount)); 410 *((word32 *)word) = lword; 411} 412 413#else 414 415/* 416 * Insert your machine/compiler dependent code here, 417 * or just use this, which works on any platform and compiler 418 * which supports the __attribute__((aligned(4))) directive. 419 */ 420static void rotateWordLeft( 421 word8 *word, // known to be word aligned 422 unsigned rotCount) // in bits 423{ 424 word8 tmp[BC_128_OPT] __attribute__((aligned(4))); 425 unsigned bytes = rotCount / 8; 426 427 tmp[0] = word[bytes & (BC_128_OPT-1)]; 428 tmp[1] = word[(1+bytes) & (BC_128_OPT-1)]; 429 tmp[2] = word[(2+bytes) & (BC_128_OPT-1)]; 430 tmp[3] = word[(3+bytes) & (BC_128_OPT-1)]; 431 *((word32 *)word) = *((word32 *)tmp); 432} 433#endif 434 435static inline void ShiftRow128( 436 word8 a[4][BC_128_OPT], 437 word8 d) { 438 /* Row 0 remains unchanged 439 * The other three rows are shifted (actually rotated) a variable amount 440 */ 441 int i; 442 443 for(i = 1; i < 4; i++) { 444 rotateWordLeft(a[i], shifts128[i][d]); 445 } 446} 447 448/* 449 * The following two routines are where most of the time is spent in this 450 * module. Further optimization would have to focus here. 451 */ 452static void MixColumn128(word8 a[4][BC_128_OPT]) { 453 /* Mix the four bytes of every column in a linear way 454 */ 455 word8 b[4][BC_128_OPT]; 456 int i, j; 457 458 for(j = 0; j < BC_128_OPT; j++) { 459 for(i = 0; i < 4; i++) { 460 #if AES_MUL_BY_LOOKUP 461 b[i][j] = mulBy0x02[a[i][j]] 462 ^ mulBy0x03[a[(i + 1) % 4][j]] 463 ^ a[(i + 2) % 4][j] 464 ^ a[(i + 3) % 4][j]; 465 #else 466 b[i][j] = mul(2,a[i][j]) 467 ^ mul(3,a[(i + 1) % 4][j]) 468 ^ a[(i + 2) % 4][j] 469 ^ a[(i + 3) % 4][j]; 470 #endif 471 } 472 } 473 memmove(a, b, 4 * BC_128_OPT); 474} 475 476static void InvMixColumn128(word8 a[4][BC_128_OPT]) { 477 /* Mix the four bytes of every column in a linear way 478 * This is the opposite operation of Mixcolumn 479 */ 480 word8 b[4][BC_128_OPT]; 481 int i, j; 482 483 for(j = 0; j < BC_128_OPT; j++) { 484 for(i = 0; i < 4; i++) { 485 #if AES_MUL_BY_LOOKUP 486 b[i][j] = mulBy0x0e[a[i][j]] 487 ^ mulBy0x0b[a[(i + 1) % 4][j]] 488 ^ mulBy0x0d[a[(i + 2) % 4][j]] 489 ^ mulBy0x09[a[(i + 3) % 4][j]]; 490 #else 491 b[i][j] = mul(0xe,a[i][j]) 492 ^ mul(0xb,a[(i + 1) % 4][j]) 493 ^ mul(0xd,a[(i + 2) % 4][j]) 494 ^ mul(0x9,a[(i + 3) % 4][j]); 495 #endif 496 } 497 } 498 memmove(a, b, 4 * BC_128_OPT); 499} 500 501int rijndaelKeySched128 ( 502 word8 k[4][KC_128_OPT], 503 word8 W[MAXROUNDS+1][4][MAXBC]) { 504 505 /* Calculate the necessary round keys 506 * The number of calculations depends on keyBits and blockBits 507 */ 508 int i, j, t, rconpointer = 0; 509 word8 tk[4][KC_128_OPT]; 510 unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT; 511 512 for(j = 0; j < KC_128_OPT; j++) 513 for(i = 0; i < 4; i++) 514 tk[i][j] = k[i][j]; 515 t = 0; 516 /* copy values into round key array */ 517 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) { 518 for(i = 0; i < 4; i++) { 519 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j]; 520 } 521 } 522 523 while (t < numSchedRows) { 524 /* while not enough round key material calculated */ 525 /* calculate new values */ 526 for(i = 0; i < 4; i++) { 527 tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]]; 528 } 529 tk[0][0] ^= rcon[rconpointer++]; 530 531 for(j = 1; j < KC_128_OPT; j++) { 532 for(i = 0; i < 4; i++) { 533 tk[i][j] ^= tk[i][j-1]; 534 } 535 } 536 537 /* copy values into round key array */ 538 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) { 539 for(i = 0; i < 4; i++) { 540 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j]; 541 } 542 } 543 } 544 545 return 0; 546} 547 548int rijndaelEncrypt128 ( 549 word8 a[4][BC_128_OPT], 550 word8 rk[MAXROUNDS+1][4][MAXBC]) 551{ 552 /* Encryption of one block. 553 */ 554 int r; 555 556 /* begin with a key addition 557 */ 558 KeyAddition128(a,rk[0]); 559 560 /* ROUNDS-1 ordinary rounds 561 */ 562 for(r = 1; r < ROUNDS_128_OPT; r++) { 563 Substitution128(a,S); 564 ShiftRow128(a,0); 565 MixColumn128(a); 566 KeyAddition128(a,rk[r]); 567 } 568 569 /* Last round is special: there is no MixColumn 570 */ 571 Substitution128(a,S); 572 ShiftRow128(a,0); 573 KeyAddition128(a,rk[ROUNDS_128_OPT]); 574 575 return 0; 576} 577 578int rijndaelDecrypt128 ( 579 word8 a[4][BC_128_OPT], 580 word8 rk[MAXROUNDS+1][4][MAXBC]) 581{ 582 int r; 583 584 /* To decrypt: apply the inverse operations of the encrypt routine, 585 * in opposite order 586 * 587 * (KeyAddition is an involution: it 's equal to its inverse) 588 * (the inverse of Substitution with table S is Substitution with the 589 * inverse table of S) 590 * (the inverse of Shiftrow is Shiftrow over a suitable distance) 591 */ 592 593 /* First the special round: 594 * without InvMixColumn 595 * with extra KeyAddition 596 */ 597 KeyAddition128(a,rk[ROUNDS_128_OPT]); 598 Substitution128(a,Si); 599 ShiftRow128(a,1); 600 601 /* ROUNDS-1 ordinary rounds 602 */ 603 for(r = ROUNDS_128_OPT-1; r > 0; r--) { 604 KeyAddition128(a,rk[r]); 605 InvMixColumn128(a); 606 Substitution128(a,Si); 607 ShiftRow128(a,1); 608 } 609 610 /* End with the extra key addition 611 */ 612 613 KeyAddition128(a,rk[0]); 614 615 return 0; 616} 617 618#endif /* !GLADMAN_AES_128_ENABLE */ 619 620