1/* Rijndael (AES) for GnuPG 2 * Copyright (C) 2000, 2001, 2002, 2003, 2007, 3 * 2008, 2011 Free Software Foundation, Inc. 4 * 5 * This file is part of Libgcrypt. 6 * 7 * Libgcrypt is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Lesser General Public License as 9 * published by the Free Software Foundation; either version 2.1 of 10 * the License, or (at your option) any later version. 11 * 12 * Libgcrypt is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this program; if not, see <http://www.gnu.org/licenses/>. 19 ******************************************************************* 20 * The code here is based on the optimized implementation taken from 21 * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, 22 * which carries this notice: 23 *------------------------------------------ 24 * rijndael-alg-fst.c v2.3 April '2000 25 * 26 * Optimised ANSI C code 27 * 28 * authors: v1.0: Antoon Bosselaers 29 * v2.0: Vincent Rijmen 30 * v2.3: Paulo Barreto 31 * 32 * This code is placed in the public domain. 33 *------------------------------------------ 34 * 35 * The SP800-38a document is available at: 36 * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 37 * 38 */ 39 40#include <config.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> /* for memcmp() */ 44 45#include "types.h" /* for byte and u32 typedefs */ 46#include "g10lib.h" 47#include "cipher.h" 48 49#define MAXKC (256/32) 50#define MAXROUNDS 14 51#define BLOCKSIZE (128/8) 52 53 54/* Helper macro to force alignment to 16 bytes. */ 55#ifdef __GNUC__ 56# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) 57#else 58# define ATTR_ALIGNED_16 59#endif 60 61 62/* USE_PADLOCK indicates whether to compile the padlock specific 63 code. */ 64#undef USE_PADLOCK 65#ifdef ENABLE_PADLOCK_SUPPORT 66# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__) 67# define USE_PADLOCK 1 68# endif 69#endif /*ENABLE_PADLOCK_SUPPORT*/ 70 71/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We 72 need the vector-size attribute which seems to be available since 73 gcc 3. However, to be on the safe side we require at least gcc 4. */ 74#undef USE_AESNI 75#ifdef ENABLE_AESNI_SUPPORT 76# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 77# define USE_AESNI 1 78# endif 79#endif /* ENABLE_AESNI_SUPPORT */ 80 81#ifdef USE_AESNI 82 typedef int m128i_t __attribute__ ((__vector_size__ (16))); 83#endif /*USE_AESNI*/ 84 85/* Define an u32 variant for the sake of gcc 4.4's strict aliasing. */ 86#if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 ) 87typedef u32 __attribute__ ((__may_alias__)) u32_a_t; 88#else 89typedef u32 u32_a_t; 90#endif 91 92 93 94/* Our context object. */ 95typedef struct 96{ 97 /* The first fields are the keyschedule arrays. This is so that 98 they are aligned on a 16 byte boundary if using gcc. This 99 alignment is required for the AES-NI code and a good idea in any 100 case. The alignment is guaranteed due to the way cipher.c 101 allocates the space for the context. The PROPERLY_ALIGNED_TYPE 102 hack is used to force a minimal alignment if not using gcc of if 103 the alignment requirement is higher that 16 bytes. */ 104 union 105 { 106 PROPERLY_ALIGNED_TYPE dummy; 107 byte keyschedule[MAXROUNDS+1][4][4]; 108#ifdef USE_PADLOCK 109 /* The key as passed to the padlock engine. It is only used if 110 the padlock engine is used (USE_PADLOCK, below). */ 111 unsigned char padlock_key[16] __attribute__ ((aligned (16))); 112#endif /*USE_PADLOCK*/ 113 } u1; 114 union 115 { 116 PROPERLY_ALIGNED_TYPE dummy; 117 byte keyschedule[MAXROUNDS+1][4][4]; 118 } u2; 119 int rounds; /* Key-length-dependent number of rounds. */ 120 int decryption_prepared; /* The decryption key schedule is available. */ 121#ifdef USE_PADLOCK 122 int use_padlock; /* Padlock shall be used. */ 123#endif /*USE_PADLOCK*/ 124#ifdef USE_AESNI 125 int use_aesni; /* AES-NI shall be used. */ 126#endif /*USE_AESNI*/ 127} RIJNDAEL_context; 128 129/* Macros defining alias for the keyschedules. */ 130#define keyschenc u1.keyschedule 131#define keyschdec u2.keyschedule 132#define padlockkey u1.padlock_key 133 134/* Two macros to be called prior and after the use of AESNI 135 instructions. There should be no external function calls between 136 the use of these macros. There purpose is to make sure that the 137 SSE regsiters are cleared and won't reveal any information about 138 the key or the data. */ 139#ifdef USE_AESNI 140# define aesni_prepare() do { } while (0) 141# define aesni_cleanup() \ 142 do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ 143 "pxor %%xmm1, %%xmm1\n" :: ); \ 144 } while (0) 145# define aesni_cleanup_2_4() \ 146 do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ 147 "pxor %%xmm3, %%xmm3\n" \ 148 "pxor %%xmm4, %%xmm4\n":: ); \ 149 } while (0) 150#else 151# define aesni_prepare() do { } while (0) 152# define aesni_cleanup() do { } while (0) 153#endif 154 155 156/* All the numbers. */ 157#include "rijndael-tables.h" 158 159 160 161/* Function prototypes. */ 162#ifdef USE_AESNI 163/* We don't want to inline these functions to help gcc allocate enough 164 registers. */ 165static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, 166 unsigned char *b, const unsigned char *a) 167 __attribute__ ((__noinline__)); 168static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, 169 unsigned char *b, const unsigned char *a) 170 __attribute__ ((__noinline__)); 171#endif /*USE_AESNI*/ 172 173static const char *selftest(void); 174 175 176 177/* Perform the key setup. */ 178static gcry_err_code_t 179do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) 180{ 181 static int initialized = 0; 182 static const char *selftest_failed=0; 183 int rounds; 184 int i,j, r, t, rconpointer = 0; 185 int KC; 186 union 187 { 188 PROPERLY_ALIGNED_TYPE dummy; 189 byte k[MAXKC][4]; 190 } k; 191#define k k.k 192 union 193 { 194 PROPERLY_ALIGNED_TYPE dummy; 195 byte tk[MAXKC][4]; 196 } tk; 197#define tk tk.tk 198 199 /* The on-the-fly self tests are only run in non-fips mode. In fips 200 mode explicit self-tests are required. Actually the on-the-fly 201 self-tests are not fully thread-safe and it might happen that a 202 failed self-test won't get noticed in another thread. 203 204 FIXME: We might want to have a central registry of succeeded 205 self-tests. */ 206 if (!fips_mode () && !initialized) 207 { 208 initialized = 1; 209 selftest_failed = selftest (); 210 if (selftest_failed) 211 log_error ("%s\n", selftest_failed ); 212 } 213 if (selftest_failed) 214 return GPG_ERR_SELFTEST_FAILED; 215 216 ctx->decryption_prepared = 0; 217#ifdef USE_PADLOCK 218 ctx->use_padlock = 0; 219#endif 220#ifdef USE_AESNI 221 ctx->use_aesni = 0; 222#endif 223 224 if( keylen == 128/8 ) 225 { 226 rounds = 10; 227 KC = 4; 228 229 if (0) 230 ; 231#ifdef USE_PADLOCK 232 else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES)) 233 { 234 ctx->use_padlock = 1; 235 memcpy (ctx->padlockkey, key, keylen); 236 } 237#endif 238#ifdef USE_AESNI 239 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 240 { 241 ctx->use_aesni = 1; 242 } 243#endif 244 } 245 else if ( keylen == 192/8 ) 246 { 247 rounds = 12; 248 KC = 6; 249 250 if (0) 251 { 252 ; 253 } 254#ifdef USE_AESNI 255 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 256 { 257 ctx->use_aesni = 1; 258 } 259#endif 260 } 261 else if ( keylen == 256/8 ) 262 { 263 rounds = 14; 264 KC = 8; 265 266 if (0) 267 { 268 ; 269 } 270#ifdef USE_AESNI 271 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 272 { 273 ctx->use_aesni = 1; 274 } 275#endif 276 } 277 else 278 return GPG_ERR_INV_KEYLEN; 279 280 ctx->rounds = rounds; 281 282 if (0) 283 ; 284#ifdef USE_PADLOCK 285 else if (ctx->use_padlock) 286 { 287 /* Nothing to do as we support only hardware key generation for 288 now. */ 289 } 290#endif /*USE_PADLOCK*/ 291#ifdef USE_AESNI_is_disabled_here 292 else if (ctx->use_aesni && ctx->rounds == 10) 293 { 294 /* Note: This code works for AES-128 but it is not much better 295 than using the standard key schedule. We disable it for 296 now and don't put any effort into implementing this for 297 AES-192 and AES-256. */ 298 asm volatile ("movl %[key], %%esi\n\t" 299 "movdqu (%%esi), %%xmm1\n\t" /* xmm1 := key */ 300 "movl %[ksch], %%esi\n\t" 301 "movdqa %%xmm1, (%%esi)\n\t" /* ksch[0] := xmm1 */ 302 "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t" 303 "call .Lexpand128_%=\n\t" 304 "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1 */ 305 "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t" 306 "call .Lexpand128_%=\n\t" 307 "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1 */ 308 "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t" 309 "call .Lexpand128_%=\n\t" 310 "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1 */ 311 "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t" 312 "call .Lexpand128_%=\n\t" 313 "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1 */ 314 "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t" 315 "call .Lexpand128_%=\n\t" 316 "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1 */ 317 "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t" 318 "call .Lexpand128_%=\n\t" 319 "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1 */ 320 "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t" 321 "call .Lexpand128_%=\n\t" 322 "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1 */ 323 "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t" 324 "call .Lexpand128_%=\n\t" 325 "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1 */ 326 "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t" 327 "call .Lexpand128_%=\n\t" 328 "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1 */ 329 "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t" 330 "call .Lexpand128_%=\n\t" 331 "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1 */ 332 "jmp .Lleave%=\n" 333 334 ".Lexpand128_%=:\n\t" 335 "pshufd $0xff, %%xmm2, %%xmm2\n\t" 336 "movdqa %%xmm1, %%xmm3\n\t" 337 "pslldq $4, %%xmm3\n\t" 338 "pxor %%xmm3, %%xmm1\n\t" 339 "pslldq $4, %%xmm3\n\t" 340 "pxor %%xmm3, %%xmm1\n\t" 341 "pslldq $4, %%xmm3\n\t" 342 "pxor %%xmm3, %%xmm2\n\t" 343 "pxor %%xmm2, %%xmm1\n\t" 344 "ret\n" 345 346 ".Lleave%=:\n\t" 347 "pxor %%xmm1, %%xmm1\n\t" 348 "pxor %%xmm2, %%xmm2\n\t" 349 "pxor %%xmm3, %%xmm3\n" 350 : 351 : [key] "g" (key), [ksch] "g" (ctx->keyschenc) 352 : "%esi", "cc", "memory" ); 353 } 354#endif /*USE_AESNI*/ 355 else 356 { 357#define W (ctx->keyschenc) 358 for (i = 0; i < keylen; i++) 359 { 360 k[i >> 2][i & 3] = key[i]; 361 } 362 363 for (j = KC-1; j >= 0; j--) 364 { 365 *((u32*)tk[j]) = *((u32*)k[j]); 366 } 367 r = 0; 368 t = 0; 369 /* Copy values into round key array. */ 370 for (j = 0; (j < KC) && (r < rounds + 1); ) 371 { 372 for (; (j < KC) && (t < 4); j++, t++) 373 { 374 *((u32*)W[r][t]) = *((u32*)tk[j]); 375 } 376 if (t == 4) 377 { 378 r++; 379 t = 0; 380 } 381 } 382 383 while (r < rounds + 1) 384 { 385 /* While not enough round key material calculated calculate 386 new values. */ 387 tk[0][0] ^= S[tk[KC-1][1]]; 388 tk[0][1] ^= S[tk[KC-1][2]]; 389 tk[0][2] ^= S[tk[KC-1][3]]; 390 tk[0][3] ^= S[tk[KC-1][0]]; 391 tk[0][0] ^= rcon[rconpointer++]; 392 393 if (KC != 8) 394 { 395 for (j = 1; j < KC; j++) 396 { 397 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 398 } 399 } 400 else 401 { 402 for (j = 1; j < KC/2; j++) 403 { 404 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 405 } 406 tk[KC/2][0] ^= S[tk[KC/2 - 1][0]]; 407 tk[KC/2][1] ^= S[tk[KC/2 - 1][1]]; 408 tk[KC/2][2] ^= S[tk[KC/2 - 1][2]]; 409 tk[KC/2][3] ^= S[tk[KC/2 - 1][3]]; 410 for (j = KC/2 + 1; j < KC; j++) 411 { 412 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 413 } 414 } 415 416 /* Copy values into round key array. */ 417 for (j = 0; (j < KC) && (r < rounds + 1); ) 418 { 419 for (; (j < KC) && (t < 4); j++, t++) 420 { 421 *((u32*)W[r][t]) = *((u32*)tk[j]); 422 } 423 if (t == 4) 424 { 425 r++; 426 t = 0; 427 } 428 } 429 } 430#undef W 431 } 432 433 return 0; 434#undef tk 435#undef k 436} 437 438 439static gcry_err_code_t 440rijndael_setkey (void *context, const byte *key, const unsigned keylen) 441{ 442 RIJNDAEL_context *ctx = context; 443 444 int rc = do_setkey (ctx, key, keylen); 445 _gcry_burn_stack ( 100 + 16*sizeof(int)); 446 return rc; 447} 448 449 450/* Make a decryption key from an encryption key. */ 451static void 452prepare_decryption( RIJNDAEL_context *ctx ) 453{ 454 int r; 455 456#ifdef USE_AESNI 457 if (ctx->use_aesni) 458 { 459 /* The AES-NI decrypt instructions use the Equivalent Inverse 460 Cipher, thus we can't use the the standard decrypt key 461 preparation. */ 462 m128i_t *ekey = (m128i_t*)ctx->keyschenc; 463 m128i_t *dkey = (m128i_t*)ctx->keyschdec; 464 int rr; 465 466 dkey[0] = ekey[ctx->rounds]; 467 for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--) 468 { 469 asm volatile 470 ("movdqu %[ekey], %%xmm1\n\t" 471 /*"aesimc %%xmm1, %%xmm1\n\t"*/ 472 ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" 473 "movdqu %%xmm1, %[dkey]" 474 : [dkey] "=m" (dkey[r]) 475 : [ekey] "m" (ekey[rr]) ); 476 } 477 dkey[r] = ekey[0]; 478 } 479 else 480#endif /*USE_AESNI*/ 481 { 482 union 483 { 484 PROPERLY_ALIGNED_TYPE dummy; 485 byte *w; 486 } w; 487#define w w.w 488 489 for (r=0; r < MAXROUNDS+1; r++ ) 490 { 491 *((u32*)ctx->keyschdec[r][0]) = *((u32*)ctx->keyschenc[r][0]); 492 *((u32*)ctx->keyschdec[r][1]) = *((u32*)ctx->keyschenc[r][1]); 493 *((u32*)ctx->keyschdec[r][2]) = *((u32*)ctx->keyschenc[r][2]); 494 *((u32*)ctx->keyschdec[r][3]) = *((u32*)ctx->keyschenc[r][3]); 495 } 496#define W (ctx->keyschdec) 497 for (r = 1; r < ctx->rounds; r++) 498 { 499 w = W[r][0]; 500 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 501 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 502 503 w = W[r][1]; 504 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 505 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 506 507 w = W[r][2]; 508 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 509 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 510 511 w = W[r][3]; 512 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 513 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 514 } 515#undef W 516#undef w 517 } 518} 519 520 521/* Encrypt one block. A and B need to be aligned on a 4 byte 522 boundary. A and B may be the same. */ 523static void 524do_encrypt_aligned (const RIJNDAEL_context *ctx, 525 unsigned char *b, const unsigned char *a) 526{ 527#define rk (ctx->keyschenc) 528 int rounds = ctx->rounds; 529 int r; 530 union 531 { 532 u32 tempu32[4]; /* Force correct alignment. */ 533 byte temp[4][4]; 534 } u; 535 536 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[0][0]); 537 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]); 538 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]); 539 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]); 540 *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) 541 ^ *((u32_a_t*)T2[u.temp[1][1]]) 542 ^ *((u32_a_t*)T3[u.temp[2][2]]) 543 ^ *((u32_a_t*)T4[u.temp[3][3]])); 544 *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) 545 ^ *((u32_a_t*)T2[u.temp[2][1]]) 546 ^ *((u32_a_t*)T3[u.temp[3][2]]) 547 ^ *((u32_a_t*)T4[u.temp[0][3]])); 548 *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) 549 ^ *((u32_a_t*)T2[u.temp[3][1]]) 550 ^ *((u32_a_t*)T3[u.temp[0][2]]) 551 ^ *((u32_a_t*)T4[u.temp[1][3]])); 552 *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) 553 ^ *((u32_a_t*)T2[u.temp[0][1]]) 554 ^ *((u32_a_t*)T3[u.temp[1][2]]) 555 ^ *((u32_a_t*)T4[u.temp[2][3]])); 556 557 for (r = 1; r < rounds-1; r++) 558 { 559 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); 560 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); 561 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); 562 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); 563 564 *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) 565 ^ *((u32_a_t*)T2[u.temp[1][1]]) 566 ^ *((u32_a_t*)T3[u.temp[2][2]]) 567 ^ *((u32_a_t*)T4[u.temp[3][3]])); 568 *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) 569 ^ *((u32_a_t*)T2[u.temp[2][1]]) 570 ^ *((u32_a_t*)T3[u.temp[3][2]]) 571 ^ *((u32_a_t*)T4[u.temp[0][3]])); 572 *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) 573 ^ *((u32_a_t*)T2[u.temp[3][1]]) 574 ^ *((u32_a_t*)T3[u.temp[0][2]]) 575 ^ *((u32_a_t*)T4[u.temp[1][3]])); 576 *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) 577 ^ *((u32_a_t*)T2[u.temp[0][1]]) 578 ^ *((u32_a_t*)T3[u.temp[1][2]]) 579 ^ *((u32_a_t*)T4[u.temp[2][3]])); 580 } 581 582 /* Last round is special. */ 583 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[rounds-1][0]); 584 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]); 585 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]); 586 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]); 587 b[ 0] = T1[u.temp[0][0]][1]; 588 b[ 1] = T1[u.temp[1][1]][1]; 589 b[ 2] = T1[u.temp[2][2]][1]; 590 b[ 3] = T1[u.temp[3][3]][1]; 591 b[ 4] = T1[u.temp[1][0]][1]; 592 b[ 5] = T1[u.temp[2][1]][1]; 593 b[ 6] = T1[u.temp[3][2]][1]; 594 b[ 7] = T1[u.temp[0][3]][1]; 595 b[ 8] = T1[u.temp[2][0]][1]; 596 b[ 9] = T1[u.temp[3][1]][1]; 597 b[10] = T1[u.temp[0][2]][1]; 598 b[11] = T1[u.temp[1][3]][1]; 599 b[12] = T1[u.temp[3][0]][1]; 600 b[13] = T1[u.temp[0][1]][1]; 601 b[14] = T1[u.temp[1][2]][1]; 602 b[15] = T1[u.temp[2][3]][1]; 603 *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[rounds][0]); 604 *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]); 605 *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]); 606 *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]); 607#undef rk 608} 609 610 611static void 612do_encrypt (const RIJNDAEL_context *ctx, 613 unsigned char *bx, const unsigned char *ax) 614{ 615 /* BX and AX are not necessary correctly aligned. Thus we might 616 need to copy them here. We try to align to a 16 bytes. */ 617 if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) 618 { 619 union 620 { 621 u32 dummy[4]; 622 byte a[16] ATTR_ALIGNED_16; 623 } a; 624 union 625 { 626 u32 dummy[4]; 627 byte b[16] ATTR_ALIGNED_16; 628 } b; 629 630 memcpy (a.a, ax, 16); 631 do_encrypt_aligned (ctx, b.b, a.a); 632 memcpy (bx, b.b, 16); 633 } 634 else 635 { 636 do_encrypt_aligned (ctx, bx, ax); 637 } 638} 639 640 641/* Encrypt or decrypt one block using the padlock engine. A and B may 642 be the same. */ 643#ifdef USE_PADLOCK 644static void 645do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag, 646 unsigned char *bx, const unsigned char *ax) 647{ 648 /* BX and AX are not necessary correctly aligned. Thus we need to 649 copy them here. */ 650 unsigned char a[16] __attribute__ ((aligned (16))); 651 unsigned char b[16] __attribute__ ((aligned (16))); 652 unsigned int cword[4] __attribute__ ((aligned (16))); 653 654 /* The control word fields are: 655 127:12 11:10 9 8 7 6 5 4 3:0 656 RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */ 657 cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */ 658 cword[1] = 0; 659 cword[2] = 0; 660 cword[3] = 0; 661 if (decrypt_flag) 662 cword[0] |= 0x00000200; 663 664 memcpy (a, ax, 16); 665 666 asm volatile 667 ("pushfl\n\t" /* Force key reload. */ 668 "popfl\n\t" 669 "xchg %3, %%ebx\n\t" /* Load key. */ 670 "movl $1, %%ecx\n\t" /* Init counter for just one block. */ 671 ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */ 672 "xchg %3, %%ebx\n" /* Restore GOT register. */ 673 : /* No output */ 674 : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey) 675 : "%ecx", "cc", "memory" 676 ); 677 678 memcpy (bx, b, 16); 679 680} 681#endif /*USE_PADLOCK*/ 682 683 684#ifdef USE_AESNI 685/* Encrypt one block using the Intel AES-NI instructions. A and B may 686 be the same; they need to be properly aligned to 16 bytes. 687 688 Our problem here is that gcc does not allow the "x" constraint for 689 SSE registers in asm unless you compile with -msse. The common 690 wisdom is to use a separate file for SSE instructions and build it 691 separately. This would require a lot of extra build system stuff, 692 similar to what we do in mpi/ for the asm stuff. What we do 693 instead is to use standard registers and a bit more of plain asm 694 which copies the data and key stuff to the SSE registers and later 695 back. If we decide to implement some block modes with parallelized 696 AES instructions, it might indeed be better to use plain asm ala 697 mpi/. */ 698static void 699do_aesni_enc_aligned (const RIJNDAEL_context *ctx, 700 unsigned char *b, const unsigned char *a) 701{ 702#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 703#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 704 /* Note: For now we relax the alignment requirement for A and B: It 705 does not make much difference because in many case we would need 706 to memcpy them to an extra buffer; using the movdqu is much faster 707 that memcpy and movdqa. For CFB we know that the IV is properly 708 aligned but that is a special case. We should better implement 709 CFB direct in asm. */ 710 asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ 711 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 712 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 713 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 714 "movdqa 0x10(%%esi), %%xmm1\n\t" 715 aesenc_xmm1_xmm0 716 "movdqa 0x20(%%esi), %%xmm1\n\t" 717 aesenc_xmm1_xmm0 718 "movdqa 0x30(%%esi), %%xmm1\n\t" 719 aesenc_xmm1_xmm0 720 "movdqa 0x40(%%esi), %%xmm1\n\t" 721 aesenc_xmm1_xmm0 722 "movdqa 0x50(%%esi), %%xmm1\n\t" 723 aesenc_xmm1_xmm0 724 "movdqa 0x60(%%esi), %%xmm1\n\t" 725 aesenc_xmm1_xmm0 726 "movdqa 0x70(%%esi), %%xmm1\n\t" 727 aesenc_xmm1_xmm0 728 "movdqa 0x80(%%esi), %%xmm1\n\t" 729 aesenc_xmm1_xmm0 730 "movdqa 0x90(%%esi), %%xmm1\n\t" 731 aesenc_xmm1_xmm0 732 "movdqa 0xa0(%%esi), %%xmm1\n\t" 733 "cmp $10, %[rounds]\n\t" 734 "jz .Lenclast%=\n\t" 735 aesenc_xmm1_xmm0 736 "movdqa 0xb0(%%esi), %%xmm1\n\t" 737 aesenc_xmm1_xmm0 738 "movdqa 0xc0(%%esi), %%xmm1\n\t" 739 "cmp $12, %[rounds]\n\t" 740 "jz .Lenclast%=\n\t" 741 aesenc_xmm1_xmm0 742 "movdqa 0xd0(%%esi), %%xmm1\n\t" 743 aesenc_xmm1_xmm0 744 "movdqa 0xe0(%%esi), %%xmm1\n" 745 746 ".Lenclast%=:\n\t" 747 aesenclast_xmm1_xmm0 748 "movdqu %%xmm0, %[dst]\n" 749 : [dst] "=m" (*b) 750 : [src] "m" (*a), 751 [key] "r" (ctx->keyschenc), 752 [rounds] "r" (ctx->rounds) 753 : "%esi", "cc", "memory"); 754#undef aesenc_xmm1_xmm0 755#undef aesenclast_xmm1_xmm0 756} 757 758 759static void 760do_aesni_dec_aligned (const RIJNDAEL_context *ctx, 761 unsigned char *b, const unsigned char *a) 762{ 763#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" 764#define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" 765 asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ 766 "movl %[key], %%esi\n\t" 767 "movdqa (%%esi), %%xmm1\n\t" 768 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 769 "movdqa 0x10(%%esi), %%xmm1\n\t" 770 aesdec_xmm1_xmm0 771 "movdqa 0x20(%%esi), %%xmm1\n\t" 772 aesdec_xmm1_xmm0 773 "movdqa 0x30(%%esi), %%xmm1\n\t" 774 aesdec_xmm1_xmm0 775 "movdqa 0x40(%%esi), %%xmm1\n\t" 776 aesdec_xmm1_xmm0 777 "movdqa 0x50(%%esi), %%xmm1\n\t" 778 aesdec_xmm1_xmm0 779 "movdqa 0x60(%%esi), %%xmm1\n\t" 780 aesdec_xmm1_xmm0 781 "movdqa 0x70(%%esi), %%xmm1\n\t" 782 aesdec_xmm1_xmm0 783 "movdqa 0x80(%%esi), %%xmm1\n\t" 784 aesdec_xmm1_xmm0 785 "movdqa 0x90(%%esi), %%xmm1\n\t" 786 aesdec_xmm1_xmm0 787 "movdqa 0xa0(%%esi), %%xmm1\n\t" 788 "cmp $10, %[rounds]\n\t" 789 "jz .Ldeclast%=\n\t" 790 aesdec_xmm1_xmm0 791 "movdqa 0xb0(%%esi), %%xmm1\n\t" 792 aesdec_xmm1_xmm0 793 "movdqa 0xc0(%%esi), %%xmm1\n\t" 794 "cmp $12, %[rounds]\n\t" 795 "jz .Ldeclast%=\n\t" 796 aesdec_xmm1_xmm0 797 "movdqa 0xd0(%%esi), %%xmm1\n\t" 798 aesdec_xmm1_xmm0 799 "movdqa 0xe0(%%esi), %%xmm1\n" 800 801 ".Ldeclast%=:\n\t" 802 aesdeclast_xmm1_xmm0 803 "movdqu %%xmm0, %[dst]\n" 804 : [dst] "=m" (*b) 805 : [src] "m" (*a), 806 [key] "r" (ctx->keyschdec), 807 [rounds] "r" (ctx->rounds) 808 : "%esi", "cc", "memory"); 809#undef aesdec_xmm1_xmm0 810#undef aesdeclast_xmm1_xmm0 811} 812 813 814/* Perform a CFB encryption or decryption round using the 815 initialization vector IV and the input block A. Write the result 816 to the output block B and update IV. IV needs to be 16 byte 817 aligned. */ 818static void 819do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag, 820 unsigned char *iv, unsigned char *b, const unsigned char *a) 821{ 822#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 823#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 824 asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */ 825 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 826 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 827 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 828 "movdqa 0x10(%%esi), %%xmm1\n\t" 829 aesenc_xmm1_xmm0 830 "movdqa 0x20(%%esi), %%xmm1\n\t" 831 aesenc_xmm1_xmm0 832 "movdqa 0x30(%%esi), %%xmm1\n\t" 833 aesenc_xmm1_xmm0 834 "movdqa 0x40(%%esi), %%xmm1\n\t" 835 aesenc_xmm1_xmm0 836 "movdqa 0x50(%%esi), %%xmm1\n\t" 837 aesenc_xmm1_xmm0 838 "movdqa 0x60(%%esi), %%xmm1\n\t" 839 aesenc_xmm1_xmm0 840 "movdqa 0x70(%%esi), %%xmm1\n\t" 841 aesenc_xmm1_xmm0 842 "movdqa 0x80(%%esi), %%xmm1\n\t" 843 aesenc_xmm1_xmm0 844 "movdqa 0x90(%%esi), %%xmm1\n\t" 845 aesenc_xmm1_xmm0 846 "movdqa 0xa0(%%esi), %%xmm1\n\t" 847 "cmp $10, %[rounds]\n\t" 848 "jz .Lenclast%=\n\t" 849 aesenc_xmm1_xmm0 850 "movdqa 0xb0(%%esi), %%xmm1\n\t" 851 aesenc_xmm1_xmm0 852 "movdqa 0xc0(%%esi), %%xmm1\n\t" 853 "cmp $12, %[rounds]\n\t" 854 "jz .Lenclast%=\n\t" 855 aesenc_xmm1_xmm0 856 "movdqa 0xd0(%%esi), %%xmm1\n\t" 857 aesenc_xmm1_xmm0 858 "movdqa 0xe0(%%esi), %%xmm1\n" 859 860 ".Lenclast%=:\n\t" 861 aesenclast_xmm1_xmm0 862 "movdqu %[src], %%xmm1\n\t" /* Save input. */ 863 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */ 864 865 "cmp $1, %[decrypt]\n\t" 866 "jz .Ldecrypt_%=\n\t" 867 "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */ 868 "jmp .Lleave_%=\n" 869 ".Ldecrypt_%=:\n\t" 870 "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */ 871 ".Lleave_%=:\n\t" 872 "movdqu %%xmm0, %[dst]\n" /* Store output. */ 873 : [iv] "+m" (*iv), [dst] "=m" (*b) 874 : [src] "m" (*a), 875 [key] "g" (ctx->keyschenc), 876 [rounds] "g" (ctx->rounds), 877 [decrypt] "m" (decrypt_flag) 878 : "%esi", "cc", "memory"); 879#undef aesenc_xmm1_xmm0 880#undef aesenclast_xmm1_xmm0 881} 882 883/* Perform a CTR encryption round using the counter CTR and the input 884 block A. Write the result to the output block B and update CTR. 885 CTR needs to be a 16 byte aligned little-endian value. */ 886static void 887do_aesni_ctr (const RIJNDAEL_context *ctx, 888 unsigned char *ctr, unsigned char *b, const unsigned char *a) 889{ 890#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 891#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 892 static unsigned char be_mask[16] __attribute__ ((aligned (16))) = 893 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 894 895 asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ 896 "movaps %%xmm0, %%xmm2\n\t" 897 "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */ 898 "movd %%esi, %%xmm1\n\t" 899 "pshufb %[mask], %%xmm2\n\t" 900 "paddq %%xmm1, %%xmm2\n\t" 901 "pshufb %[mask], %%xmm2\n\t" 902 "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */ 903 904 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 905 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 906 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 907 "movdqa 0x10(%%esi), %%xmm1\n\t" 908 aesenc_xmm1_xmm0 909 "movdqa 0x20(%%esi), %%xmm1\n\t" 910 aesenc_xmm1_xmm0 911 "movdqa 0x30(%%esi), %%xmm1\n\t" 912 aesenc_xmm1_xmm0 913 "movdqa 0x40(%%esi), %%xmm1\n\t" 914 aesenc_xmm1_xmm0 915 "movdqa 0x50(%%esi), %%xmm1\n\t" 916 aesenc_xmm1_xmm0 917 "movdqa 0x60(%%esi), %%xmm1\n\t" 918 aesenc_xmm1_xmm0 919 "movdqa 0x70(%%esi), %%xmm1\n\t" 920 aesenc_xmm1_xmm0 921 "movdqa 0x80(%%esi), %%xmm1\n\t" 922 aesenc_xmm1_xmm0 923 "movdqa 0x90(%%esi), %%xmm1\n\t" 924 aesenc_xmm1_xmm0 925 "movdqa 0xa0(%%esi), %%xmm1\n\t" 926 "cmp $10, %[rounds]\n\t" 927 "jz .Lenclast%=\n\t" 928 aesenc_xmm1_xmm0 929 "movdqa 0xb0(%%esi), %%xmm1\n\t" 930 aesenc_xmm1_xmm0 931 "movdqa 0xc0(%%esi), %%xmm1\n\t" 932 "cmp $12, %[rounds]\n\t" 933 "jz .Lenclast%=\n\t" 934 aesenc_xmm1_xmm0 935 "movdqa 0xd0(%%esi), %%xmm1\n\t" 936 aesenc_xmm1_xmm0 937 "movdqa 0xe0(%%esi), %%xmm1\n" 938 939 ".Lenclast%=:\n\t" 940 aesenclast_xmm1_xmm0 941 "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ 942 "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ 943 "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ 944 945 : [ctr] "+m" (*ctr), [dst] "=m" (*b) 946 : [src] "m" (*a), 947 [key] "g" (ctx->keyschenc), 948 [rounds] "g" (ctx->rounds), 949 [mask] "m" (*be_mask) 950 : "%esi", "cc", "memory"); 951#undef aesenc_xmm1_xmm0 952#undef aesenclast_xmm1_xmm0 953} 954 955 956/* Four blocks at a time variant of do_aesni_ctr. */ 957static void 958do_aesni_ctr_4 (const RIJNDAEL_context *ctx, 959 unsigned char *ctr, unsigned char *b, const unsigned char *a) 960{ 961#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 962#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" 963#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" 964#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" 965#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 966#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" 967#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" 968#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" 969 970 static unsigned char be_mask[16] __attribute__ ((aligned (16))) = 971 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 972 973 /* Register usage: 974 esi keyschedule 975 xmm0 CTR-0 976 xmm1 temp / round key 977 xmm2 CTR-1 978 xmm3 CTR-2 979 xmm4 CTR-3 980 xmm5 temp 981 */ 982 983 asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ 984 "movaps %%xmm0, %%xmm2\n\t" 985 "mov $1, %%esi\n\t" /* xmm1 := 1 */ 986 "movd %%esi, %%xmm1\n\t" 987 "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */ 988 "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */ 989 "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ 990 "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */ 991 "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ 992 "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */ 993 "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ 994 "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */ 995 "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */ 996 "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */ 997 "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */ 998 "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */ 999 "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */ 1000 1001 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 1002 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 1003 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 1004 "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ 1005 "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ 1006 "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ 1007 "movdqa 0x10(%%esi), %%xmm1\n\t" 1008 aesenc_xmm1_xmm0 1009 aesenc_xmm1_xmm2 1010 aesenc_xmm1_xmm3 1011 aesenc_xmm1_xmm4 1012 "movdqa 0x20(%%esi), %%xmm1\n\t" 1013 aesenc_xmm1_xmm0 1014 aesenc_xmm1_xmm2 1015 aesenc_xmm1_xmm3 1016 aesenc_xmm1_xmm4 1017 "movdqa 0x30(%%esi), %%xmm1\n\t" 1018 aesenc_xmm1_xmm0 1019 aesenc_xmm1_xmm2 1020 aesenc_xmm1_xmm3 1021 aesenc_xmm1_xmm4 1022 "movdqa 0x40(%%esi), %%xmm1\n\t" 1023 aesenc_xmm1_xmm0 1024 aesenc_xmm1_xmm2 1025 aesenc_xmm1_xmm3 1026 aesenc_xmm1_xmm4 1027 "movdqa 0x50(%%esi), %%xmm1\n\t" 1028 aesenc_xmm1_xmm0 1029 aesenc_xmm1_xmm2 1030 aesenc_xmm1_xmm3 1031 aesenc_xmm1_xmm4 1032 "movdqa 0x60(%%esi), %%xmm1\n\t" 1033 aesenc_xmm1_xmm0 1034 aesenc_xmm1_xmm2 1035 aesenc_xmm1_xmm3 1036 aesenc_xmm1_xmm4 1037 "movdqa 0x70(%%esi), %%xmm1\n\t" 1038 aesenc_xmm1_xmm0 1039 aesenc_xmm1_xmm2 1040 aesenc_xmm1_xmm3 1041 aesenc_xmm1_xmm4 1042 "movdqa 0x80(%%esi), %%xmm1\n\t" 1043 aesenc_xmm1_xmm0 1044 aesenc_xmm1_xmm2 1045 aesenc_xmm1_xmm3 1046 aesenc_xmm1_xmm4 1047 "movdqa 0x90(%%esi), %%xmm1\n\t" 1048 aesenc_xmm1_xmm0 1049 aesenc_xmm1_xmm2 1050 aesenc_xmm1_xmm3 1051 aesenc_xmm1_xmm4 1052 "movdqa 0xa0(%%esi), %%xmm1\n\t" 1053 "cmp $10, %[rounds]\n\t" 1054 "jz .Lenclast%=\n\t" 1055 aesenc_xmm1_xmm0 1056 aesenc_xmm1_xmm2 1057 aesenc_xmm1_xmm3 1058 aesenc_xmm1_xmm4 1059 "movdqa 0xb0(%%esi), %%xmm1\n\t" 1060 aesenc_xmm1_xmm0 1061 aesenc_xmm1_xmm2 1062 aesenc_xmm1_xmm3 1063 aesenc_xmm1_xmm4 1064 "movdqa 0xc0(%%esi), %%xmm1\n\t" 1065 "cmp $12, %[rounds]\n\t" 1066 "jz .Lenclast%=\n\t" 1067 aesenc_xmm1_xmm0 1068 aesenc_xmm1_xmm2 1069 aesenc_xmm1_xmm3 1070 aesenc_xmm1_xmm4 1071 "movdqa 0xd0(%%esi), %%xmm1\n\t" 1072 aesenc_xmm1_xmm0 1073 aesenc_xmm1_xmm2 1074 aesenc_xmm1_xmm3 1075 aesenc_xmm1_xmm4 1076 "movdqa 0xe0(%%esi), %%xmm1\n" 1077 1078 ".Lenclast%=:\n\t" 1079 aesenclast_xmm1_xmm0 1080 aesenclast_xmm1_xmm2 1081 aesenclast_xmm1_xmm3 1082 aesenclast_xmm1_xmm4 1083 1084 "movdqu %[src], %%xmm1\n\t" /* Get block 1. */ 1085 "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ 1086 "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */ 1087 1088 "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */ 1089 "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ 1090 "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */ 1091 1092 "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */ 1093 "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ 1094 "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */ 1095 1096 "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */ 1097 "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ 1098 "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */ 1099 1100 : [ctr] "+m" (*ctr), [dst] "=m" (*b) 1101 : [src] "m" (*a), 1102 [key] "g" (ctx->keyschenc), 1103 [rounds] "g" (ctx->rounds), 1104 [mask] "m" (*be_mask) 1105 : "%esi", "cc", "memory"); 1106#undef aesenc_xmm1_xmm0 1107#undef aesenc_xmm1_xmm2 1108#undef aesenc_xmm1_xmm3 1109#undef aesenc_xmm1_xmm4 1110#undef aesenclast_xmm1_xmm0 1111#undef aesenclast_xmm1_xmm2 1112#undef aesenclast_xmm1_xmm3 1113#undef aesenclast_xmm1_xmm4 1114} 1115 1116 1117static void 1118do_aesni (RIJNDAEL_context *ctx, int decrypt_flag, 1119 unsigned char *bx, const unsigned char *ax) 1120{ 1121 1122 if (decrypt_flag) 1123 { 1124 if (!ctx->decryption_prepared ) 1125 { 1126 prepare_decryption ( ctx ); 1127 ctx->decryption_prepared = 1; 1128 } 1129 do_aesni_dec_aligned (ctx, bx, ax); 1130 } 1131 else 1132 do_aesni_enc_aligned (ctx, bx, ax); 1133} 1134#endif /*USE_AESNI*/ 1135 1136 1137static void 1138rijndael_encrypt (void *context, byte *b, const byte *a) 1139{ 1140 RIJNDAEL_context *ctx = context; 1141 1142 if (0) 1143 ; 1144#ifdef USE_PADLOCK 1145 else if (ctx->use_padlock) 1146 { 1147 do_padlock (ctx, 0, b, a); 1148 _gcry_burn_stack (48 + 15 /* possible padding for alignment */); 1149 } 1150#endif /*USE_PADLOCK*/ 1151#ifdef USE_AESNI 1152 else if (ctx->use_aesni) 1153 { 1154 aesni_prepare (); 1155 do_aesni (ctx, 0, b, a); 1156 aesni_cleanup (); 1157 } 1158#endif /*USE_AESNI*/ 1159 else 1160 { 1161 do_encrypt (ctx, b, a); 1162 _gcry_burn_stack (56 + 2*sizeof(int)); 1163 } 1164} 1165 1166 1167/* Bulk encryption of complete blocks in CFB mode. Caller needs to 1168 make sure that IV is aligned on an unsigned long boundary. This 1169 function is only intended for the bulk encryption feature of 1170 cipher.c. */ 1171void 1172_gcry_aes_cfb_enc (void *context, unsigned char *iv, 1173 void *outbuf_arg, const void *inbuf_arg, 1174 unsigned int nblocks) 1175{ 1176 RIJNDAEL_context *ctx = context; 1177 unsigned char *outbuf = outbuf_arg; 1178 const unsigned char *inbuf = inbuf_arg; 1179 unsigned char *ivp; 1180 int i; 1181 1182 if (0) 1183 ; 1184#ifdef USE_PADLOCK 1185 else if (ctx->use_padlock) 1186 { 1187 /* Fixme: Let Padlock do the CFBing. */ 1188 for ( ;nblocks; nblocks-- ) 1189 { 1190 /* Encrypt the IV. */ 1191 do_padlock (ctx, 0, iv, iv); 1192 /* XOR the input with the IV and store input into IV. */ 1193 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1194 *outbuf++ = (*ivp++ ^= *inbuf++); 1195 } 1196 } 1197#endif /*USE_PADLOCK*/ 1198#ifdef USE_AESNI 1199 else if (ctx->use_aesni) 1200 { 1201 aesni_prepare (); 1202 for ( ;nblocks; nblocks-- ) 1203 { 1204 do_aesni_cfb (ctx, 0, iv, outbuf, inbuf); 1205 outbuf += BLOCKSIZE; 1206 inbuf += BLOCKSIZE; 1207 } 1208 aesni_cleanup (); 1209 } 1210#endif /*USE_AESNI*/ 1211 else 1212 { 1213 for ( ;nblocks; nblocks-- ) 1214 { 1215 /* Encrypt the IV. */ 1216 do_encrypt_aligned (ctx, iv, iv); 1217 /* XOR the input with the IV and store input into IV. */ 1218 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1219 *outbuf++ = (*ivp++ ^= *inbuf++); 1220 } 1221 } 1222 1223 _gcry_burn_stack (48 + 2*sizeof(int)); 1224} 1225 1226 1227/* Bulk encryption of complete blocks in CBC mode. Caller needs to 1228 make sure that IV is aligned on an unsigned long boundary. This 1229 function is only intended for the bulk encryption feature of 1230 cipher.c. */ 1231void 1232_gcry_aes_cbc_enc (void *context, unsigned char *iv, 1233 void *outbuf_arg, const void *inbuf_arg, 1234 unsigned int nblocks, int cbc_mac) 1235{ 1236 RIJNDAEL_context *ctx = context; 1237 unsigned char *outbuf = outbuf_arg; 1238 const unsigned char *inbuf = inbuf_arg; 1239 unsigned char *ivp; 1240 int i; 1241 1242 aesni_prepare (); 1243 for ( ;nblocks; nblocks-- ) 1244 { 1245 for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) 1246 outbuf[i] = inbuf[i] ^ *ivp++; 1247 1248 if (0) 1249 ; 1250#ifdef USE_PADLOCK 1251 else if (ctx->use_padlock) 1252 do_padlock (ctx, 0, outbuf, outbuf); 1253#endif /*USE_PADLOCK*/ 1254#ifdef USE_AESNI 1255 else if (ctx->use_aesni) 1256 do_aesni (ctx, 0, outbuf, outbuf); 1257#endif /*USE_AESNI*/ 1258 else 1259 do_encrypt (ctx, outbuf, outbuf ); 1260 1261 memcpy (iv, outbuf, BLOCKSIZE); 1262 inbuf += BLOCKSIZE; 1263 if (!cbc_mac) 1264 outbuf += BLOCKSIZE; 1265 } 1266 aesni_cleanup (); 1267 1268 _gcry_burn_stack (48 + 2*sizeof(int)); 1269} 1270 1271 1272/* Bulk encryption of complete blocks in CTR mode. Caller needs to 1273 make sure that CTR is aligned on a 16 byte boundary if AESNI; the 1274 minimum alignment is for an u32. This function is only intended 1275 for the bulk encryption feature of cipher.c. CTR is expected to be 1276 of size BLOCKSIZE. */ 1277void 1278_gcry_aes_ctr_enc (void *context, unsigned char *ctr, 1279 void *outbuf_arg, const void *inbuf_arg, 1280 unsigned int nblocks) 1281{ 1282 RIJNDAEL_context *ctx = context; 1283 unsigned char *outbuf = outbuf_arg; 1284 const unsigned char *inbuf = inbuf_arg; 1285 unsigned char *p; 1286 int i; 1287 1288 if (0) 1289 ; 1290#ifdef USE_AESNI 1291 else if (ctx->use_aesni) 1292 { 1293 aesni_prepare (); 1294 for ( ;nblocks > 3 ; nblocks -= 4 ) 1295 { 1296 do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); 1297 outbuf += 4*BLOCKSIZE; 1298 inbuf += 4*BLOCKSIZE; 1299 } 1300 for ( ;nblocks; nblocks-- ) 1301 { 1302 do_aesni_ctr (ctx, ctr, outbuf, inbuf); 1303 outbuf += BLOCKSIZE; 1304 inbuf += BLOCKSIZE; 1305 } 1306 aesni_cleanup (); 1307 aesni_cleanup_2_4 (); 1308 } 1309#endif /*USE_AESNI*/ 1310 else 1311 { 1312 union { unsigned char x1[16]; u32 x32[4]; } tmp; 1313 1314 for ( ;nblocks; nblocks-- ) 1315 { 1316 /* Encrypt the counter. */ 1317 do_encrypt_aligned (ctx, tmp.x1, ctr); 1318 /* XOR the input with the encrypted counter and store in output. */ 1319 for (p=tmp.x1, i=0; i < BLOCKSIZE; i++) 1320 *outbuf++ = (*p++ ^= *inbuf++); 1321 /* Increment the counter. */ 1322 for (i = BLOCKSIZE; i > 0; i--) 1323 { 1324 ctr[i-1]++; 1325 if (ctr[i-1]) 1326 break; 1327 } 1328 } 1329 } 1330 1331 _gcry_burn_stack (48 + 2*sizeof(int)); 1332} 1333 1334 1335 1336/* Decrypt one block. A and B need to be aligned on a 4 byte boundary 1337 and the decryption must have been prepared. A and B may be the 1338 same. */ 1339static void 1340do_decrypt_aligned (RIJNDAEL_context *ctx, 1341 unsigned char *b, const unsigned char *a) 1342{ 1343#define rk (ctx->keyschdec) 1344 int rounds = ctx->rounds; 1345 int r; 1346 union 1347 { 1348 u32 tempu32[4]; /* Force correct alignment. */ 1349 byte temp[4][4]; 1350 } u; 1351 1352 1353 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[rounds][0]); 1354 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]); 1355 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]); 1356 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]); 1357 1358 *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) 1359 ^ *((u32_a_t*)T6[u.temp[3][1]]) 1360 ^ *((u32_a_t*)T7[u.temp[2][2]]) 1361 ^ *((u32_a_t*)T8[u.temp[1][3]])); 1362 *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) 1363 ^ *((u32_a_t*)T6[u.temp[0][1]]) 1364 ^ *((u32_a_t*)T7[u.temp[3][2]]) 1365 ^ *((u32_a_t*)T8[u.temp[2][3]])); 1366 *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) 1367 ^ *((u32_a_t*)T6[u.temp[1][1]]) 1368 ^ *((u32_a_t*)T7[u.temp[0][2]]) 1369 ^ *((u32_a_t*)T8[u.temp[3][3]])); 1370 *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) 1371 ^ *((u32_a_t*)T6[u.temp[2][1]]) 1372 ^ *((u32_a_t*)T7[u.temp[1][2]]) 1373 ^ *((u32_a_t*)T8[u.temp[0][3]])); 1374 1375 for (r = rounds-1; r > 1; r--) 1376 { 1377 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); 1378 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); 1379 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); 1380 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); 1381 *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) 1382 ^ *((u32_a_t*)T6[u.temp[3][1]]) 1383 ^ *((u32_a_t*)T7[u.temp[2][2]]) 1384 ^ *((u32_a_t*)T8[u.temp[1][3]])); 1385 *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) 1386 ^ *((u32_a_t*)T6[u.temp[0][1]]) 1387 ^ *((u32_a_t*)T7[u.temp[3][2]]) 1388 ^ *((u32_a_t*)T8[u.temp[2][3]])); 1389 *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) 1390 ^ *((u32_a_t*)T6[u.temp[1][1]]) 1391 ^ *((u32_a_t*)T7[u.temp[0][2]]) 1392 ^ *((u32_a_t*)T8[u.temp[3][3]])); 1393 *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) 1394 ^ *((u32_a_t*)T6[u.temp[2][1]]) 1395 ^ *((u32_a_t*)T7[u.temp[1][2]]) 1396 ^ *((u32_a_t*)T8[u.temp[0][3]])); 1397 } 1398 1399 /* Last round is special. */ 1400 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[1][0]); 1401 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]); 1402 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]); 1403 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]); 1404 b[ 0] = S5[u.temp[0][0]]; 1405 b[ 1] = S5[u.temp[3][1]]; 1406 b[ 2] = S5[u.temp[2][2]]; 1407 b[ 3] = S5[u.temp[1][3]]; 1408 b[ 4] = S5[u.temp[1][0]]; 1409 b[ 5] = S5[u.temp[0][1]]; 1410 b[ 6] = S5[u.temp[3][2]]; 1411 b[ 7] = S5[u.temp[2][3]]; 1412 b[ 8] = S5[u.temp[2][0]]; 1413 b[ 9] = S5[u.temp[1][1]]; 1414 b[10] = S5[u.temp[0][2]]; 1415 b[11] = S5[u.temp[3][3]]; 1416 b[12] = S5[u.temp[3][0]]; 1417 b[13] = S5[u.temp[2][1]]; 1418 b[14] = S5[u.temp[1][2]]; 1419 b[15] = S5[u.temp[0][3]]; 1420 *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[0][0]); 1421 *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]); 1422 *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]); 1423 *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]); 1424#undef rk 1425} 1426 1427 1428/* Decrypt one block. AX and BX may be the same. */ 1429static void 1430do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) 1431{ 1432 if ( !ctx->decryption_prepared ) 1433 { 1434 prepare_decryption ( ctx ); 1435 _gcry_burn_stack (64); 1436 ctx->decryption_prepared = 1; 1437 } 1438 1439 /* BX and AX are not necessary correctly aligned. Thus we might 1440 need to copy them here. We try to align to a 16 bytes. */ 1441 if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) 1442 { 1443 union 1444 { 1445 u32 dummy[4]; 1446 byte a[16] ATTR_ALIGNED_16; 1447 } a; 1448 union 1449 { 1450 u32 dummy[4]; 1451 byte b[16] ATTR_ALIGNED_16; 1452 } b; 1453 1454 memcpy (a.a, ax, 16); 1455 do_decrypt_aligned (ctx, b.b, a.a); 1456 memcpy (bx, b.b, 16); 1457 } 1458 else 1459 { 1460 do_decrypt_aligned (ctx, bx, ax); 1461 } 1462} 1463 1464 1465 1466 1467static void 1468rijndael_decrypt (void *context, byte *b, const byte *a) 1469{ 1470 RIJNDAEL_context *ctx = context; 1471 1472 if (0) 1473 ; 1474#ifdef USE_PADLOCK 1475 else if (ctx->use_padlock) 1476 { 1477 do_padlock (ctx, 1, b, a); 1478 _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */); 1479 } 1480#endif /*USE_PADLOCK*/ 1481#ifdef USE_AESNI 1482 else if (ctx->use_aesni) 1483 { 1484 aesni_prepare (); 1485 do_aesni (ctx, 1, b, a); 1486 aesni_cleanup (); 1487 } 1488#endif /*USE_AESNI*/ 1489 else 1490 { 1491 do_decrypt (ctx, b, a); 1492 _gcry_burn_stack (56+2*sizeof(int)); 1493 } 1494} 1495 1496 1497/* Bulk decryption of complete blocks in CFB mode. Caller needs to 1498 make sure that IV is aligned on an unisgned lonhg boundary. This 1499 function is only intended for the bulk encryption feature of 1500 cipher.c. */ 1501void 1502_gcry_aes_cfb_dec (void *context, unsigned char *iv, 1503 void *outbuf_arg, const void *inbuf_arg, 1504 unsigned int nblocks) 1505{ 1506 RIJNDAEL_context *ctx = context; 1507 unsigned char *outbuf = outbuf_arg; 1508 const unsigned char *inbuf = inbuf_arg; 1509 unsigned char *ivp; 1510 unsigned char temp; 1511 int i; 1512 1513 if (0) 1514 ; 1515#ifdef USE_PADLOCK 1516 else if (ctx->use_padlock) 1517 { 1518 /* Fixme: Let Padlock do the CFBing. */ 1519 for ( ;nblocks; nblocks-- ) 1520 { 1521 do_padlock (ctx, 0, iv, iv); 1522 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1523 { 1524 temp = *inbuf++; 1525 *outbuf++ = *ivp ^ temp; 1526 *ivp++ = temp; 1527 } 1528 } 1529 } 1530#endif /*USE_PADLOCK*/ 1531#ifdef USE_AESNI 1532 else if (ctx->use_aesni) 1533 { 1534 aesni_prepare (); 1535 for ( ;nblocks; nblocks-- ) 1536 { 1537 do_aesni_cfb (ctx, 1, iv, outbuf, inbuf); 1538 outbuf += BLOCKSIZE; 1539 inbuf += BLOCKSIZE; 1540 } 1541 aesni_cleanup (); 1542 } 1543#endif /*USE_AESNI*/ 1544 else 1545 { 1546 for ( ;nblocks; nblocks-- ) 1547 { 1548 do_encrypt_aligned (ctx, iv, iv); 1549 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1550 { 1551 temp = *inbuf++; 1552 *outbuf++ = *ivp ^ temp; 1553 *ivp++ = temp; 1554 } 1555 } 1556 } 1557 1558 _gcry_burn_stack (48 + 2*sizeof(int)); 1559} 1560 1561 1562/* Bulk decryption of complete blocks in CBC mode. Caller needs to 1563 make sure that IV is aligned on an unsigned long boundary. This 1564 function is only intended for the bulk encryption feature of 1565 cipher.c. */ 1566void 1567_gcry_aes_cbc_dec (void *context, unsigned char *iv, 1568 void *outbuf_arg, const void *inbuf_arg, 1569 unsigned int nblocks) 1570{ 1571 RIJNDAEL_context *ctx = context; 1572 unsigned char *outbuf = outbuf_arg; 1573 const unsigned char *inbuf = inbuf_arg; 1574 unsigned char *ivp; 1575 int i; 1576 unsigned char savebuf[BLOCKSIZE]; 1577 1578 aesni_prepare (); 1579 for ( ;nblocks; nblocks-- ) 1580 { 1581 /* We need to save INBUF away because it may be identical to 1582 OUTBUF. */ 1583 memcpy (savebuf, inbuf, BLOCKSIZE); 1584 1585 if (0) 1586 ; 1587#ifdef USE_PADLOCK 1588 else if (ctx->use_padlock) 1589 do_padlock (ctx, 1, outbuf, inbuf); 1590#endif /*USE_PADLOCK*/ 1591#ifdef USE_AESNI 1592 else if (ctx->use_aesni) 1593 do_aesni (ctx, 1, outbuf, inbuf); 1594#endif /*USE_AESNI*/ 1595 else 1596 do_decrypt (ctx, outbuf, inbuf); 1597 1598 for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) 1599 outbuf[i] ^= *ivp++; 1600 memcpy (iv, savebuf, BLOCKSIZE); 1601 inbuf += BLOCKSIZE; 1602 outbuf += BLOCKSIZE; 1603 } 1604 aesni_cleanup (); 1605 1606 _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*)); 1607} 1608 1609 1610 1611 1612/* Run the self-tests for AES 128. Returns NULL on success. */ 1613static const char* 1614selftest_basic_128 (void) 1615{ 1616 RIJNDAEL_context ctx; 1617 unsigned char scratch[16]; 1618 1619 /* The test vectors are from the AES supplied ones; more or less 1620 randomly taken from ecb_tbl.txt (I=42,81,14) */ 1621#if 1 1622 static const unsigned char plaintext_128[16] = 1623 { 1624 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 1625 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A 1626 }; 1627 static const unsigned char key_128[16] = 1628 { 1629 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 1630 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA 1631 }; 1632 static const unsigned char ciphertext_128[16] = 1633 { 1634 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 1635 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD 1636 }; 1637#else 1638 /* Test vectors from fips-197, appendix C. */ 1639# warning debug test vectors in use 1640 static const unsigned char plaintext_128[16] = 1641 { 1642 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 1643 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff 1644 }; 1645 static const unsigned char key_128[16] = 1646 { 1647 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1648 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f 1649 /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ 1650 /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ 1651 }; 1652 static const unsigned char ciphertext_128[16] = 1653 { 1654 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 1655 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a 1656 }; 1657#endif 1658 1659 rijndael_setkey (&ctx, key_128, sizeof (key_128)); 1660 rijndael_encrypt (&ctx, scratch, plaintext_128); 1661 if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) 1662 return "AES-128 test encryption failed."; 1663 rijndael_decrypt (&ctx, scratch, scratch); 1664 if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) 1665 return "AES-128 test decryption failed."; 1666 1667 return NULL; 1668} 1669 1670/* Run the self-tests for AES 192. Returns NULL on success. */ 1671static const char* 1672selftest_basic_192 (void) 1673{ 1674 RIJNDAEL_context ctx; 1675 unsigned char scratch[16]; 1676 1677 static unsigned char plaintext_192[16] = 1678 { 1679 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 1680 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 1681 }; 1682 static unsigned char key_192[24] = 1683 { 1684 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 1685 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 1686 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 1687 }; 1688 static const unsigned char ciphertext_192[16] = 1689 { 1690 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 1691 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA 1692 }; 1693 1694 rijndael_setkey (&ctx, key_192, sizeof(key_192)); 1695 rijndael_encrypt (&ctx, scratch, plaintext_192); 1696 if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) 1697 return "AES-192 test encryption failed."; 1698 rijndael_decrypt (&ctx, scratch, scratch); 1699 if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) 1700 return "AES-192 test decryption failed."; 1701 1702 return NULL; 1703} 1704 1705 1706/* Run the self-tests for AES 256. Returns NULL on success. */ 1707static const char* 1708selftest_basic_256 (void) 1709{ 1710 RIJNDAEL_context ctx; 1711 unsigned char scratch[16]; 1712 1713 static unsigned char plaintext_256[16] = 1714 { 1715 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 1716 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 1717 }; 1718 static unsigned char key_256[32] = 1719 { 1720 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 1721 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 1722 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 1723 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E 1724 }; 1725 static const unsigned char ciphertext_256[16] = 1726 { 1727 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 1728 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 1729 }; 1730 1731 rijndael_setkey (&ctx, key_256, sizeof(key_256)); 1732 rijndael_encrypt (&ctx, scratch, plaintext_256); 1733 if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) 1734 return "AES-256 test encryption failed."; 1735 rijndael_decrypt (&ctx, scratch, scratch); 1736 if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) 1737 return "AES-256 test decryption failed."; 1738 1739 return NULL; 1740} 1741 1742/* Run all the self-tests and return NULL on success. This function 1743 is used for the on-the-fly self-tests. */ 1744static const char * 1745selftest (void) 1746{ 1747 const char *r; 1748 1749 if ( (r = selftest_basic_128 ()) 1750 || (r = selftest_basic_192 ()) 1751 || (r = selftest_basic_256 ()) ) 1752 return r; 1753 1754 return r; 1755} 1756 1757 1758/* SP800-38a.pdf for AES-128. */ 1759static const char * 1760selftest_fips_128_38a (int requested_mode) 1761{ 1762 struct tv 1763 { 1764 int mode; 1765 const unsigned char key[16]; 1766 const unsigned char iv[16]; 1767 struct 1768 { 1769 const unsigned char input[16]; 1770 const unsigned char output[16]; 1771 } data[4]; 1772 } tv[2] = 1773 { 1774 { 1775 GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ 1776 { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 1777 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, 1778 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1779 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, 1780 { 1781 { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 1782 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, 1783 { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 1784 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, 1785 1786 { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 1787 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, 1788 { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 1789 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, 1790 1791 { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 1792 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, 1793 { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 1794 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, 1795 1796 { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 1797 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, 1798 { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 1799 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } 1800 } 1801 }, 1802 { 1803 GCRY_CIPHER_MODE_OFB, 1804 { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 1805 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, 1806 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1807 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, 1808 { 1809 { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 1810 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, 1811 { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 1812 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, 1813 1814 { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 1815 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, 1816 { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 1817 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, 1818 1819 { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 1820 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, 1821 { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 1822 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, 1823 1824 { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 1825 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, 1826 { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 1827 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, 1828 } 1829 } 1830 }; 1831 unsigned char scratch[16]; 1832 gpg_error_t err; 1833 int tvi, idx; 1834 gcry_cipher_hd_t hdenc = NULL; 1835 gcry_cipher_hd_t hddec = NULL; 1836 1837#define Fail(a) do { \ 1838 _gcry_cipher_close (hdenc); \ 1839 _gcry_cipher_close (hddec); \ 1840 return a; \ 1841 } while (0) 1842 1843 gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); 1844 gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); 1845 1846 for (tvi=0; tvi < DIM (tv); tvi++) 1847 if (tv[tvi].mode == requested_mode) 1848 break; 1849 if (tvi == DIM (tv)) 1850 Fail ("no test data for this mode"); 1851 1852 err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); 1853 if (err) 1854 Fail ("open"); 1855 err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); 1856 if (err) 1857 Fail ("open"); 1858 err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); 1859 if (!err) 1860 err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); 1861 if (err) 1862 Fail ("set key"); 1863 err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); 1864 if (!err) 1865 err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); 1866 if (err) 1867 Fail ("set IV"); 1868 for (idx=0; idx < DIM (tv[tvi].data); idx++) 1869 { 1870 err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, 1871 tv[tvi].data[idx].input, 1872 sizeof tv[tvi].data[idx].input); 1873 if (err) 1874 Fail ("encrypt command"); 1875 if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) 1876 Fail ("encrypt mismatch"); 1877 err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, 1878 tv[tvi].data[idx].output, 1879 sizeof tv[tvi].data[idx].output); 1880 if (err) 1881 Fail ("decrypt command"); 1882 if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) 1883 Fail ("decrypt mismatch"); 1884 } 1885 1886#undef Fail 1887 _gcry_cipher_close (hdenc); 1888 _gcry_cipher_close (hddec); 1889 return NULL; 1890} 1891 1892 1893/* Complete selftest for AES-128 with all modes and driver code. */ 1894static gpg_err_code_t 1895selftest_fips_128 (int extended, selftest_report_func_t report) 1896{ 1897 const char *what; 1898 const char *errtxt; 1899 1900 what = "low-level"; 1901 errtxt = selftest_basic_128 (); 1902 if (errtxt) 1903 goto failed; 1904 1905 if (extended) 1906 { 1907 what = "cfb"; 1908 errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); 1909 if (errtxt) 1910 goto failed; 1911 1912 what = "ofb"; 1913 errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); 1914 if (errtxt) 1915 goto failed; 1916 } 1917 1918 return 0; /* Succeeded. */ 1919 1920 failed: 1921 if (report) 1922 report ("cipher", GCRY_CIPHER_AES128, what, errtxt); 1923 return GPG_ERR_SELFTEST_FAILED; 1924} 1925 1926/* Complete selftest for AES-192. */ 1927static gpg_err_code_t 1928selftest_fips_192 (int extended, selftest_report_func_t report) 1929{ 1930 const char *what; 1931 const char *errtxt; 1932 1933 (void)extended; /* No extended tests available. */ 1934 1935 what = "low-level"; 1936 errtxt = selftest_basic_192 (); 1937 if (errtxt) 1938 goto failed; 1939 1940 1941 return 0; /* Succeeded. */ 1942 1943 failed: 1944 if (report) 1945 report ("cipher", GCRY_CIPHER_AES192, what, errtxt); 1946 return GPG_ERR_SELFTEST_FAILED; 1947} 1948 1949 1950/* Complete selftest for AES-256. */ 1951static gpg_err_code_t 1952selftest_fips_256 (int extended, selftest_report_func_t report) 1953{ 1954 const char *what; 1955 const char *errtxt; 1956 1957 (void)extended; /* No extended tests available. */ 1958 1959 what = "low-level"; 1960 errtxt = selftest_basic_256 (); 1961 if (errtxt) 1962 goto failed; 1963 1964 return 0; /* Succeeded. */ 1965 1966 failed: 1967 if (report) 1968 report ("cipher", GCRY_CIPHER_AES256, what, errtxt); 1969 return GPG_ERR_SELFTEST_FAILED; 1970} 1971 1972 1973 1974/* Run a full self-test for ALGO and return 0 on success. */ 1975static gpg_err_code_t 1976run_selftests (int algo, int extended, selftest_report_func_t report) 1977{ 1978 gpg_err_code_t ec; 1979 1980 switch (algo) 1981 { 1982 case GCRY_CIPHER_AES128: 1983 ec = selftest_fips_128 (extended, report); 1984 break; 1985 case GCRY_CIPHER_AES192: 1986 ec = selftest_fips_192 (extended, report); 1987 break; 1988 case GCRY_CIPHER_AES256: 1989 ec = selftest_fips_256 (extended, report); 1990 break; 1991 default: 1992 ec = GPG_ERR_CIPHER_ALGO; 1993 break; 1994 1995 } 1996 return ec; 1997} 1998 1999 2000 2001 2002static const char *rijndael_names[] = 2003 { 2004 "RIJNDAEL", 2005 "AES128", 2006 "AES-128", 2007 NULL 2008 }; 2009 2010static gcry_cipher_oid_spec_t rijndael_oids[] = 2011 { 2012 { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, 2013 { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, 2014 { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, 2015 { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, 2016 { NULL } 2017 }; 2018 2019gcry_cipher_spec_t _gcry_cipher_spec_aes = 2020 { 2021 "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), 2022 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2023 }; 2024cipher_extra_spec_t _gcry_cipher_extraspec_aes = 2025 { 2026 run_selftests 2027 }; 2028 2029static const char *rijndael192_names[] = 2030 { 2031 "RIJNDAEL192", 2032 "AES-192", 2033 NULL 2034 }; 2035 2036static gcry_cipher_oid_spec_t rijndael192_oids[] = 2037 { 2038 { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, 2039 { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, 2040 { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, 2041 { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, 2042 { NULL } 2043 }; 2044 2045gcry_cipher_spec_t _gcry_cipher_spec_aes192 = 2046 { 2047 "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), 2048 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2049 }; 2050cipher_extra_spec_t _gcry_cipher_extraspec_aes192 = 2051 { 2052 run_selftests 2053 }; 2054 2055static const char *rijndael256_names[] = 2056 { 2057 "RIJNDAEL256", 2058 "AES-256", 2059 NULL 2060 }; 2061 2062static gcry_cipher_oid_spec_t rijndael256_oids[] = 2063 { 2064 { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, 2065 { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, 2066 { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, 2067 { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, 2068 { NULL } 2069 }; 2070 2071gcry_cipher_spec_t _gcry_cipher_spec_aes256 = 2072 { 2073 "AES256", rijndael256_names, rijndael256_oids, 16, 256, 2074 sizeof (RIJNDAEL_context), 2075 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2076 }; 2077 2078cipher_extra_spec_t _gcry_cipher_extraspec_aes256 = 2079 { 2080 run_selftests 2081 }; 2082