1/* Rijndael (AES) for GnuPG 2 * Copyright (C) 2000, 2001, 2002, 2003, 2007, 3 * 2008, 2011 Free Software Foundation, Inc. 4 * 5 * This file is part of Libgcrypt. 6 * 7 * Libgcrypt is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Lesser General Public License as 9 * published by the Free Software Foundation; either version 2.1 of 10 * the License, or (at your option) any later version. 11 * 12 * Libgcrypt is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this program; if not, see <http://www.gnu.org/licenses/>. 19 ******************************************************************* 20 * The code here is based on the optimized implementation taken from 21 * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, 22 * which carries this notice: 23 *------------------------------------------ 24 * rijndael-alg-fst.c v2.3 April '2000 25 * 26 * Optimised ANSI C code 27 * 28 * authors: v1.0: Antoon Bosselaers 29 * v2.0: Vincent Rijmen 30 * v2.3: Paulo Barreto 31 * 32 * This code is placed in the public domain. 33 *------------------------------------------ 34 * 35 * The SP800-38a document is available at: 36 * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 37 * 38 */ 39 40#include <config.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> /* for memcmp() */ 44 45#include "types.h" /* for byte and u32 typedefs */ 46#include "g10lib.h" 47#include "cipher.h" 48 49#define MAXKC (256/32) 50#define MAXROUNDS 14 51#define BLOCKSIZE (128/8) 52 53 54/* Helper macro to force alignment to 16 bytes. */ 55#ifdef __GNUC__ 56# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) 57#else 58# define ATTR_ALIGNED_16 59#endif 60 61 62/* USE_PADLOCK indicates whether to compile the padlock specific 63 code. */ 64#undef USE_PADLOCK 65#ifdef ENABLE_PADLOCK_SUPPORT 66# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__) 67# define USE_PADLOCK 1 68# endif 69#endif /*ENABLE_PADLOCK_SUPPORT*/ 70 71/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We 72 need the vector-size attribute which seems to be available since 73 gcc 3. However, to be on the safe side we require at least gcc 4. */ 74#undef USE_AESNI 75#ifdef ENABLE_AESNI_SUPPORT 76# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 77# define USE_AESNI 1 78# endif 79#endif /* ENABLE_AESNI_SUPPORT */ 80 81#ifdef USE_AESNI 82 typedef int m128i_t __attribute__ ((__vector_size__ (16))); 83#endif /*USE_AESNI*/ 84 85/* Define an u32 variant for the sake of gcc 4.4's strict aliasing. */ 86#if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 ) 87typedef u32 __attribute__ ((__may_alias__)) u32_a_t; 88#else 89typedef u32 u32_a_t; 90#endif 91 92 93 94/* Our context object. */ 95typedef struct 96{ 97 /* The first fields are the keyschedule arrays. This is so that 98 they are aligned on a 16 byte boundary if using gcc. This 99 alignment is required for the AES-NI code and a good idea in any 100 case. The alignment is guaranteed due to the way cipher.c 101 allocates the space for the context. The PROPERLY_ALIGNED_TYPE 102 hack is used to force a minimal alignment if not using gcc of if 103 the alignment requirement is higher that 16 bytes. */ 104 union 105 { 106 PROPERLY_ALIGNED_TYPE dummy; 107 byte keyschedule[MAXROUNDS+1][4][4]; 108#ifdef USE_PADLOCK 109 /* The key as passed to the padlock engine. It is only used if 110 the padlock engine is used (USE_PADLOCK, below). */ 111 unsigned char padlock_key[16] __attribute__ ((aligned (16))); 112#endif /*USE_PADLOCK*/ 113 } u1; 114 union 115 { 116 PROPERLY_ALIGNED_TYPE dummy; 117 byte keyschedule[MAXROUNDS+1][4][4]; 118 } u2; 119 int rounds; /* Key-length-dependent number of rounds. */ 120 int decryption_prepared; /* The decryption key schedule is available. */ 121#ifdef USE_PADLOCK 122 int use_padlock; /* Padlock shall be used. */ 123#endif /*USE_PADLOCK*/ 124#ifdef USE_AESNI 125 int use_aesni; /* AES-NI shall be used. */ 126#endif /*USE_AESNI*/ 127} RIJNDAEL_context ATTR_ALIGNED_16; 128 129/* Macros defining alias for the keyschedules. */ 130#define keyschenc u1.keyschedule 131#define keyschdec u2.keyschedule 132#define padlockkey u1.padlock_key 133 134/* Two macros to be called prior and after the use of AESNI 135 instructions. There should be no external function calls between 136 the use of these macros. There purpose is to make sure that the 137 SSE regsiters are cleared and won't reveal any information about 138 the key or the data. */ 139#ifdef USE_AESNI 140# define aesni_prepare() do { } while (0) 141# define aesni_cleanup() \ 142 do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ 143 "pxor %%xmm1, %%xmm1\n" :: ); \ 144 } while (0) 145# define aesni_cleanup_2_4() \ 146 do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ 147 "pxor %%xmm3, %%xmm3\n" \ 148 "pxor %%xmm4, %%xmm4\n":: ); \ 149 } while (0) 150#else 151# define aesni_prepare() do { } while (0) 152# define aesni_cleanup() do { } while (0) 153#endif 154 155 156/* All the numbers. */ 157#include "rijndael-tables.h" 158 159 160 161/* Function prototypes. */ 162#ifdef USE_AESNI 163/* We don't want to inline these functions to help gcc allocate enough 164 registers. */ 165static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, 166 unsigned char *b, const unsigned char *a) 167 __attribute__ ((__noinline__)); 168static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, 169 unsigned char *b, const unsigned char *a) 170 __attribute__ ((__noinline__)); 171#endif /*USE_AESNI*/ 172 173static const char *selftest(void); 174 175 176 177/* Perform the key setup. */ 178static gcry_err_code_t 179do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) 180{ 181 static int initialized = 0; 182 static const char *selftest_failed=0; 183 int rounds; 184 int i,j, r, t, rconpointer = 0; 185 int KC; 186 union 187 { 188 PROPERLY_ALIGNED_TYPE dummy; 189 byte k[MAXKC][4]; 190 } k; 191#define k k.k 192 union 193 { 194 PROPERLY_ALIGNED_TYPE dummy; 195 byte tk[MAXKC][4]; 196 } tk; 197#define tk tk.tk 198 199 /* The on-the-fly self tests are only run in non-fips mode. In fips 200 mode explicit self-tests are required. Actually the on-the-fly 201 self-tests are not fully thread-safe and it might happen that a 202 failed self-test won't get noticed in another thread. 203 204 FIXME: We might want to have a central registry of succeeded 205 self-tests. */ 206 if (!fips_mode () && !initialized) 207 { 208 initialized = 1; 209 selftest_failed = selftest (); 210 if (selftest_failed) 211 log_error ("%s\n", selftest_failed ); 212 } 213 if (selftest_failed) 214 return GPG_ERR_SELFTEST_FAILED; 215 216 ctx->decryption_prepared = 0; 217#ifdef USE_PADLOCK 218 ctx->use_padlock = 0; 219#endif 220#ifdef USE_AESNI 221 ctx->use_aesni = 0; 222#endif 223 224 if( keylen == 128/8 ) 225 { 226 rounds = 10; 227 KC = 4; 228 229 if (0) 230 ; 231#ifdef USE_PADLOCK 232 else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES)) 233 { 234 ctx->use_padlock = 1; 235 memcpy (ctx->padlockkey, key, keylen); 236 } 237#endif 238#ifdef USE_AESNI 239 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 240 { 241 ctx->use_aesni = 1; 242 } 243#endif 244 } 245 else if ( keylen == 192/8 ) 246 { 247 rounds = 12; 248 KC = 6; 249 250 if (0) 251 { 252 ; 253 } 254#ifdef USE_AESNI 255 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 256 { 257 ctx->use_aesni = 1; 258 } 259#endif 260 } 261 else if ( keylen == 256/8 ) 262 { 263 rounds = 14; 264 KC = 8; 265 266 if (0) 267 { 268 ; 269 } 270#ifdef USE_AESNI 271 else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) 272 { 273 ctx->use_aesni = 1; 274 } 275#endif 276 } 277 else 278 return GPG_ERR_INV_KEYLEN; 279 280 ctx->rounds = rounds; 281 282 if (0) 283 ; 284#ifdef USE_PADLOCK 285 else if (ctx->use_padlock) 286 { 287 /* Nothing to do as we support only hardware key generation for 288 now. */ 289 } 290#endif /*USE_PADLOCK*/ 291#ifdef USE_AESNI_is_disabled_here 292 else if (ctx->use_aesni && ctx->rounds == 10) 293 { 294 /* Note: This code works for AES-128 but it is not much better 295 than using the standard key schedule. We disable it for 296 now and don't put any effort into implementing this for 297 AES-192 and AES-256. */ 298 asm volatile ("movl %[key], %%esi\n\t" 299 "movdqu (%%esi), %%xmm1\n\t" /* xmm1 := key */ 300 "movl %[ksch], %%esi\n\t" 301 "movdqa %%xmm1, (%%esi)\n\t" /* ksch[0] := xmm1 */ 302 "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t" 303 "call .Lexpand128_%=\n\t" 304 "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1 */ 305 "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t" 306 "call .Lexpand128_%=\n\t" 307 "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1 */ 308 "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t" 309 "call .Lexpand128_%=\n\t" 310 "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1 */ 311 "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t" 312 "call .Lexpand128_%=\n\t" 313 "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1 */ 314 "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t" 315 "call .Lexpand128_%=\n\t" 316 "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1 */ 317 "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t" 318 "call .Lexpand128_%=\n\t" 319 "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1 */ 320 "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t" 321 "call .Lexpand128_%=\n\t" 322 "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1 */ 323 "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t" 324 "call .Lexpand128_%=\n\t" 325 "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1 */ 326 "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t" 327 "call .Lexpand128_%=\n\t" 328 "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1 */ 329 "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t" 330 "call .Lexpand128_%=\n\t" 331 "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1 */ 332 "jmp .Lleave%=\n" 333 334 ".Lexpand128_%=:\n\t" 335 "pshufd $0xff, %%xmm2, %%xmm2\n\t" 336 "movdqa %%xmm1, %%xmm3\n\t" 337 "pslldq $4, %%xmm3\n\t" 338 "pxor %%xmm3, %%xmm1\n\t" 339 "pslldq $4, %%xmm3\n\t" 340 "pxor %%xmm3, %%xmm1\n\t" 341 "pslldq $4, %%xmm3\n\t" 342 "pxor %%xmm3, %%xmm2\n\t" 343 "pxor %%xmm2, %%xmm1\n\t" 344 "ret\n" 345 346 ".Lleave%=:\n\t" 347 "pxor %%xmm1, %%xmm1\n\t" 348 "pxor %%xmm2, %%xmm2\n\t" 349 "pxor %%xmm3, %%xmm3\n" 350 : 351 : [key] "g" (key), [ksch] "g" (ctx->keyschenc) 352 : "%esi", "cc", "memory" ); 353 } 354#endif /*USE_AESNI*/ 355 else 356 { 357#define W (ctx->keyschenc) 358 for (i = 0; i < keylen; i++) 359 { 360 k[i >> 2][i & 3] = key[i]; 361 } 362 363 for (j = KC-1; j >= 0; j--) 364 { 365 *((u32*)tk[j]) = *((u32*)k[j]); 366 } 367 r = 0; 368 t = 0; 369 /* Copy values into round key array. */ 370 for (j = 0; (j < KC) && (r < rounds + 1); ) 371 { 372 for (; (j < KC) && (t < 4); j++, t++) 373 { 374 *((u32*)W[r][t]) = *((u32*)tk[j]); 375 } 376 if (t == 4) 377 { 378 r++; 379 t = 0; 380 } 381 } 382 383 while (r < rounds + 1) 384 { 385 /* While not enough round key material calculated calculate 386 new values. */ 387 tk[0][0] ^= S[tk[KC-1][1]]; 388 tk[0][1] ^= S[tk[KC-1][2]]; 389 tk[0][2] ^= S[tk[KC-1][3]]; 390 tk[0][3] ^= S[tk[KC-1][0]]; 391 tk[0][0] ^= rcon[rconpointer++]; 392 393 if (KC != 8) 394 { 395 for (j = 1; j < KC; j++) 396 { 397 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 398 } 399 } 400 else 401 { 402 for (j = 1; j < KC/2; j++) 403 { 404 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 405 } 406 tk[KC/2][0] ^= S[tk[KC/2 - 1][0]]; 407 tk[KC/2][1] ^= S[tk[KC/2 - 1][1]]; 408 tk[KC/2][2] ^= S[tk[KC/2 - 1][2]]; 409 tk[KC/2][3] ^= S[tk[KC/2 - 1][3]]; 410 for (j = KC/2 + 1; j < KC; j++) 411 { 412 *((u32*)tk[j]) ^= *((u32*)tk[j-1]); 413 } 414 } 415 416 /* Copy values into round key array. */ 417 for (j = 0; (j < KC) && (r < rounds + 1); ) 418 { 419 for (; (j < KC) && (t < 4); j++, t++) 420 { 421 *((u32*)W[r][t]) = *((u32*)tk[j]); 422 } 423 if (t == 4) 424 { 425 r++; 426 t = 0; 427 } 428 } 429 } 430#undef W 431 } 432 433 return 0; 434#undef tk 435#undef k 436} 437 438 439static gcry_err_code_t 440rijndael_setkey (void *context, const byte *key, const unsigned keylen) 441{ 442 RIJNDAEL_context *ctx = context; 443 444 int rc = do_setkey (ctx, key, keylen); 445 _gcry_burn_stack ( 100 + 16*sizeof(int)); 446 return rc; 447} 448 449 450/* Make a decryption key from an encryption key. */ 451static void 452prepare_decryption( RIJNDAEL_context *ctx ) 453{ 454 int r; 455 456#ifdef USE_AESNI 457 if (ctx->use_aesni) 458 { 459 /* The AES-NI decrypt instructions use the Equivalent Inverse 460 Cipher, thus we can't use the the standard decrypt key 461 preparation. */ 462 m128i_t *ekey = (m128i_t*)ctx->keyschenc; 463 m128i_t *dkey = (m128i_t*)ctx->keyschdec; 464 int rr; 465 466 dkey[0] = ekey[ctx->rounds]; 467 for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--) 468 { 469 asm volatile 470 ("movdqu %[ekey], %%xmm1\n\t" 471 /*"aesimc %%xmm1, %%xmm1\n\t"*/ 472 ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" 473 "movdqu %%xmm1, %[dkey]" 474 : [dkey] "=m" (dkey[r]) 475 : [ekey] "m" (ekey[rr]) ); 476 } 477 dkey[r] = ekey[0]; 478 } 479 else 480#endif /*USE_AESNI*/ 481 { 482 union 483 { 484 PROPERLY_ALIGNED_TYPE dummy; 485 byte *w; 486 } w; 487#define w w.w 488 489 for (r=0; r < MAXROUNDS+1; r++ ) 490 { 491 *((u32*)ctx->keyschdec[r][0]) = *((u32*)ctx->keyschenc[r][0]); 492 *((u32*)ctx->keyschdec[r][1]) = *((u32*)ctx->keyschenc[r][1]); 493 *((u32*)ctx->keyschdec[r][2]) = *((u32*)ctx->keyschenc[r][2]); 494 *((u32*)ctx->keyschdec[r][3]) = *((u32*)ctx->keyschenc[r][3]); 495 } 496#define W (ctx->keyschdec) 497 for (r = 1; r < ctx->rounds; r++) 498 { 499 w = W[r][0]; 500 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 501 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 502 503 w = W[r][1]; 504 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 505 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 506 507 w = W[r][2]; 508 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 509 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 510 511 w = W[r][3]; 512 *((u32*)w) = *((u32*)U1[w[0]]) ^ *((u32*)U2[w[1]]) 513 ^ *((u32*)U3[w[2]]) ^ *((u32*)U4[w[3]]); 514 } 515#undef W 516#undef w 517 } 518} 519 520 521/* Encrypt one block. A and B need to be aligned on a 4 byte 522 boundary. A and B may be the same. */ 523static void 524do_encrypt_aligned (const RIJNDAEL_context *ctx, 525 unsigned char *b, const unsigned char *a) 526{ 527#define rk (ctx->keyschenc) 528 int rounds = ctx->rounds; 529 int r; 530 union 531 { 532 u32 tempu32[4]; /* Force correct alignment. */ 533 byte temp[4][4]; 534 } u; 535 536 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[0][0]); 537 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]); 538 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]); 539 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]); 540 *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) 541 ^ *((u32_a_t*)T2[u.temp[1][1]]) 542 ^ *((u32_a_t*)T3[u.temp[2][2]]) 543 ^ *((u32_a_t*)T4[u.temp[3][3]])); 544 *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) 545 ^ *((u32_a_t*)T2[u.temp[2][1]]) 546 ^ *((u32_a_t*)T3[u.temp[3][2]]) 547 ^ *((u32_a_t*)T4[u.temp[0][3]])); 548 *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) 549 ^ *((u32_a_t*)T2[u.temp[3][1]]) 550 ^ *((u32_a_t*)T3[u.temp[0][2]]) 551 ^ *((u32_a_t*)T4[u.temp[1][3]])); 552 *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) 553 ^ *((u32_a_t*)T2[u.temp[0][1]]) 554 ^ *((u32_a_t*)T3[u.temp[1][2]]) 555 ^ *((u32_a_t*)T4[u.temp[2][3]])); 556 557 for (r = 1; r < rounds-1; r++) 558 { 559 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); 560 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); 561 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); 562 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); 563 564 *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) 565 ^ *((u32_a_t*)T2[u.temp[1][1]]) 566 ^ *((u32_a_t*)T3[u.temp[2][2]]) 567 ^ *((u32_a_t*)T4[u.temp[3][3]])); 568 *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) 569 ^ *((u32_a_t*)T2[u.temp[2][1]]) 570 ^ *((u32_a_t*)T3[u.temp[3][2]]) 571 ^ *((u32_a_t*)T4[u.temp[0][3]])); 572 *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) 573 ^ *((u32_a_t*)T2[u.temp[3][1]]) 574 ^ *((u32_a_t*)T3[u.temp[0][2]]) 575 ^ *((u32_a_t*)T4[u.temp[1][3]])); 576 *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) 577 ^ *((u32_a_t*)T2[u.temp[0][1]]) 578 ^ *((u32_a_t*)T3[u.temp[1][2]]) 579 ^ *((u32_a_t*)T4[u.temp[2][3]])); 580 } 581 582 /* Last round is special. */ 583 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[rounds-1][0]); 584 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]); 585 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]); 586 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]); 587 b[ 0] = T1[u.temp[0][0]][1]; 588 b[ 1] = T1[u.temp[1][1]][1]; 589 b[ 2] = T1[u.temp[2][2]][1]; 590 b[ 3] = T1[u.temp[3][3]][1]; 591 b[ 4] = T1[u.temp[1][0]][1]; 592 b[ 5] = T1[u.temp[2][1]][1]; 593 b[ 6] = T1[u.temp[3][2]][1]; 594 b[ 7] = T1[u.temp[0][3]][1]; 595 b[ 8] = T1[u.temp[2][0]][1]; 596 b[ 9] = T1[u.temp[3][1]][1]; 597 b[10] = T1[u.temp[0][2]][1]; 598 b[11] = T1[u.temp[1][3]][1]; 599 b[12] = T1[u.temp[3][0]][1]; 600 b[13] = T1[u.temp[0][1]][1]; 601 b[14] = T1[u.temp[1][2]][1]; 602 b[15] = T1[u.temp[2][3]][1]; 603 *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[rounds][0]); 604 *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]); 605 *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]); 606 *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]); 607#undef rk 608} 609 610 611static void 612do_encrypt (const RIJNDAEL_context *ctx, 613 unsigned char *bx, const unsigned char *ax) 614{ 615 /* BX and AX are not necessary correctly aligned. Thus we might 616 need to copy them here. We try to align to a 16 bytes. */ 617 if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) 618 { 619 union 620 { 621 u32 dummy[4]; 622 byte a[16] ATTR_ALIGNED_16; 623 } a; 624 union 625 { 626 u32 dummy[4]; 627 byte b[16] ATTR_ALIGNED_16; 628 } b; 629 630 memcpy (a.a, ax, 16); 631 do_encrypt_aligned (ctx, b.b, a.a); 632 memcpy (bx, b.b, 16); 633 } 634 else 635 { 636 do_encrypt_aligned (ctx, bx, ax); 637 } 638} 639 640 641/* Encrypt or decrypt one block using the padlock engine. A and B may 642 be the same. */ 643#ifdef USE_PADLOCK 644static void 645do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag, 646 unsigned char *bx, const unsigned char *ax) 647{ 648 /* BX and AX are not necessary correctly aligned. Thus we need to 649 copy them here. */ 650 unsigned char a[16] __attribute__ ((aligned (16))); 651 unsigned char b[16] __attribute__ ((aligned (16))); 652 unsigned int cword[4] __attribute__ ((aligned (16))); 653 654 /* The control word fields are: 655 127:12 11:10 9 8 7 6 5 4 3:0 656 RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */ 657 cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */ 658 cword[1] = 0; 659 cword[2] = 0; 660 cword[3] = 0; 661 if (decrypt_flag) 662 cword[0] |= 0x00000200; 663 664 memcpy (a, ax, 16); 665 666 asm volatile 667 ("pushfl\n\t" /* Force key reload. */ 668 "popfl\n\t" 669 "xchg %3, %%ebx\n\t" /* Load key. */ 670 "movl $1, %%ecx\n\t" /* Init counter for just one block. */ 671 ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */ 672 "xchg %3, %%ebx\n" /* Restore GOT register. */ 673 : /* No output */ 674 : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey) 675 : "%ecx", "cc", "memory" 676 ); 677 678 memcpy (bx, b, 16); 679 680} 681#endif /*USE_PADLOCK*/ 682 683 684#ifdef USE_AESNI 685/* Encrypt one block using the Intel AES-NI instructions. A and B may 686 be the same; they need to be properly aligned to 16 bytes. 687 688 Our problem here is that gcc does not allow the "x" constraint for 689 SSE registers in asm unless you compile with -msse. The common 690 wisdom is to use a separate file for SSE instructions and build it 691 separately. This would require a lot of extra build system stuff, 692 similar to what we do in mpi/ for the asm stuff. What we do 693 instead is to use standard registers and a bit more of plain asm 694 which copies the data and key stuff to the SSE registers and later 695 back. If we decide to implement some block modes with parallelized 696 AES instructions, it might indeed be better to use plain asm ala 697 mpi/. */ 698static void 699do_aesni_enc_aligned (const RIJNDAEL_context *ctx, 700 unsigned char *b, const unsigned char *a) 701{ 702#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 703#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 704 /* Note: For now we relax the alignment requirement for A and B: It 705 does not make much difference because in many case we would need 706 to memcpy them to an extra buffer; using the movdqu is much faster 707 that memcpy and movdqa. For CFB we know that the IV is properly 708 aligned but that is a special case. We should better implement 709 CFB direct in asm. */ 710 asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ 711 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 712 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 713 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 714 "movdqa 0x10(%%esi), %%xmm1\n\t" 715 aesenc_xmm1_xmm0 716 "movdqa 0x20(%%esi), %%xmm1\n\t" 717 aesenc_xmm1_xmm0 718 "movdqa 0x30(%%esi), %%xmm1\n\t" 719 aesenc_xmm1_xmm0 720 "movdqa 0x40(%%esi), %%xmm1\n\t" 721 aesenc_xmm1_xmm0 722 "movdqa 0x50(%%esi), %%xmm1\n\t" 723 aesenc_xmm1_xmm0 724 "movdqa 0x60(%%esi), %%xmm1\n\t" 725 aesenc_xmm1_xmm0 726 "movdqa 0x70(%%esi), %%xmm1\n\t" 727 aesenc_xmm1_xmm0 728 "movdqa 0x80(%%esi), %%xmm1\n\t" 729 aesenc_xmm1_xmm0 730 "movdqa 0x90(%%esi), %%xmm1\n\t" 731 aesenc_xmm1_xmm0 732 "movdqa 0xa0(%%esi), %%xmm1\n\t" 733 "cmp $10, %[rounds]\n\t" 734 "jz .Lenclast%=\n\t" 735 aesenc_xmm1_xmm0 736 "movdqa 0xb0(%%esi), %%xmm1\n\t" 737 aesenc_xmm1_xmm0 738 "movdqa 0xc0(%%esi), %%xmm1\n\t" 739 "cmp $12, %[rounds]\n\t" 740 "jz .Lenclast%=\n\t" 741 aesenc_xmm1_xmm0 742 "movdqa 0xd0(%%esi), %%xmm1\n\t" 743 aesenc_xmm1_xmm0 744 "movdqa 0xe0(%%esi), %%xmm1\n" 745 746 ".Lenclast%=:\n\t" 747 aesenclast_xmm1_xmm0 748 "movdqu %%xmm0, %[dst]\n" 749 : [dst] "=m" (*b) 750 : [src] "m" (*a), 751 [key] "r" (ctx->keyschenc), 752 [rounds] "r" (ctx->rounds) 753 : "%esi", "cc", "memory"); 754#undef aesenc_xmm1_xmm0 755#undef aesenclast_xmm1_xmm0 756} 757 758 759static void 760do_aesni_dec_aligned (const RIJNDAEL_context *ctx, 761 unsigned char *b, const unsigned char *a) 762{ 763#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" 764#define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" 765 asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ 766 "movl %[key], %%esi\n\t" 767 "movdqa (%%esi), %%xmm1\n\t" 768 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 769 "movdqa 0x10(%%esi), %%xmm1\n\t" 770 aesdec_xmm1_xmm0 771 "movdqa 0x20(%%esi), %%xmm1\n\t" 772 aesdec_xmm1_xmm0 773 "movdqa 0x30(%%esi), %%xmm1\n\t" 774 aesdec_xmm1_xmm0 775 "movdqa 0x40(%%esi), %%xmm1\n\t" 776 aesdec_xmm1_xmm0 777 "movdqa 0x50(%%esi), %%xmm1\n\t" 778 aesdec_xmm1_xmm0 779 "movdqa 0x60(%%esi), %%xmm1\n\t" 780 aesdec_xmm1_xmm0 781 "movdqa 0x70(%%esi), %%xmm1\n\t" 782 aesdec_xmm1_xmm0 783 "movdqa 0x80(%%esi), %%xmm1\n\t" 784 aesdec_xmm1_xmm0 785 "movdqa 0x90(%%esi), %%xmm1\n\t" 786 aesdec_xmm1_xmm0 787 "movdqa 0xa0(%%esi), %%xmm1\n\t" 788 "cmp $10, %[rounds]\n\t" 789 "jz .Ldeclast%=\n\t" 790 aesdec_xmm1_xmm0 791 "movdqa 0xb0(%%esi), %%xmm1\n\t" 792 aesdec_xmm1_xmm0 793 "movdqa 0xc0(%%esi), %%xmm1\n\t" 794 "cmp $12, %[rounds]\n\t" 795 "jz .Ldeclast%=\n\t" 796 aesdec_xmm1_xmm0 797 "movdqa 0xd0(%%esi), %%xmm1\n\t" 798 aesdec_xmm1_xmm0 799 "movdqa 0xe0(%%esi), %%xmm1\n" 800 801 ".Ldeclast%=:\n\t" 802 aesdeclast_xmm1_xmm0 803 "movdqu %%xmm0, %[dst]\n" 804 : [dst] "=m" (*b) 805 : [src] "m" (*a), 806 [key] "r" (ctx->keyschdec), 807 [rounds] "r" (ctx->rounds) 808 : "%esi", "cc", "memory"); 809#undef aesdec_xmm1_xmm0 810#undef aesdeclast_xmm1_xmm0 811} 812 813 814/* Perform a CFB encryption or decryption round using the 815 initialization vector IV and the input block A. Write the result 816 to the output block B and update IV. IV needs to be 16 byte 817 aligned. */ 818static void 819do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag, 820 unsigned char *iv, unsigned char *b, const unsigned char *a) 821{ 822#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 823#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 824 asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */ 825 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 826 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 827 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 828 "movdqa 0x10(%%esi), %%xmm1\n\t" 829 aesenc_xmm1_xmm0 830 "movdqa 0x20(%%esi), %%xmm1\n\t" 831 aesenc_xmm1_xmm0 832 "movdqa 0x30(%%esi), %%xmm1\n\t" 833 aesenc_xmm1_xmm0 834 "movdqa 0x40(%%esi), %%xmm1\n\t" 835 aesenc_xmm1_xmm0 836 "movdqa 0x50(%%esi), %%xmm1\n\t" 837 aesenc_xmm1_xmm0 838 "movdqa 0x60(%%esi), %%xmm1\n\t" 839 aesenc_xmm1_xmm0 840 "movdqa 0x70(%%esi), %%xmm1\n\t" 841 aesenc_xmm1_xmm0 842 "movdqa 0x80(%%esi), %%xmm1\n\t" 843 aesenc_xmm1_xmm0 844 "movdqa 0x90(%%esi), %%xmm1\n\t" 845 aesenc_xmm1_xmm0 846 "movdqa 0xa0(%%esi), %%xmm1\n\t" 847 "cmp $10, %[rounds]\n\t" 848 "jz .Lenclast%=\n\t" 849 aesenc_xmm1_xmm0 850 "movdqa 0xb0(%%esi), %%xmm1\n\t" 851 aesenc_xmm1_xmm0 852 "movdqa 0xc0(%%esi), %%xmm1\n\t" 853 "cmp $12, %[rounds]\n\t" 854 "jz .Lenclast%=\n\t" 855 aesenc_xmm1_xmm0 856 "movdqa 0xd0(%%esi), %%xmm1\n\t" 857 aesenc_xmm1_xmm0 858 "movdqa 0xe0(%%esi), %%xmm1\n" 859 860 ".Lenclast%=:\n\t" 861 aesenclast_xmm1_xmm0 862 "movdqu %[src], %%xmm1\n\t" /* Save input. */ 863 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */ 864 865 "cmp $1, %[decrypt]\n\t" 866 "jz .Ldecrypt_%=\n\t" 867 "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */ 868 "jmp .Lleave_%=\n" 869 ".Ldecrypt_%=:\n\t" 870 "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */ 871 ".Lleave_%=:\n\t" 872 "movdqu %%xmm0, %[dst]\n" /* Store output. */ 873 : [iv] "+m" (*iv), [dst] "=m" (*b) 874 : [src] "m" (*a), 875 [key] "g" (ctx->keyschenc), 876 [rounds] "g" (ctx->rounds), 877 [decrypt] "m" (decrypt_flag) 878 : "%esi", "cc", "memory"); 879#undef aesenc_xmm1_xmm0 880#undef aesenclast_xmm1_xmm0 881} 882 883/* Perform a CTR encryption round using the counter CTR and the input 884 block A. Write the result to the output block B and update CTR. 885 CTR needs to be a 16 byte aligned little-endian value. */ 886static void 887do_aesni_ctr (const RIJNDAEL_context *ctx, 888 unsigned char *ctr, unsigned char *b, const unsigned char *a) 889{ 890#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 891#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 892 static unsigned char be_mask[16] __attribute__ ((aligned (16))) = 893 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 894 895 asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ 896 "movaps %%xmm0, %%xmm2\n\t" 897 "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */ 898 "movd %%esi, %%xmm1\n\t" 899 "pshufb %[mask], %%xmm2\n\t" 900 "paddq %%xmm1, %%xmm2\n\t" 901 "pshufb %[mask], %%xmm2\n\t" 902 "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */ 903 904 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 905 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 906 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 907 "movdqa 0x10(%%esi), %%xmm1\n\t" 908 aesenc_xmm1_xmm0 909 "movdqa 0x20(%%esi), %%xmm1\n\t" 910 aesenc_xmm1_xmm0 911 "movdqa 0x30(%%esi), %%xmm1\n\t" 912 aesenc_xmm1_xmm0 913 "movdqa 0x40(%%esi), %%xmm1\n\t" 914 aesenc_xmm1_xmm0 915 "movdqa 0x50(%%esi), %%xmm1\n\t" 916 aesenc_xmm1_xmm0 917 "movdqa 0x60(%%esi), %%xmm1\n\t" 918 aesenc_xmm1_xmm0 919 "movdqa 0x70(%%esi), %%xmm1\n\t" 920 aesenc_xmm1_xmm0 921 "movdqa 0x80(%%esi), %%xmm1\n\t" 922 aesenc_xmm1_xmm0 923 "movdqa 0x90(%%esi), %%xmm1\n\t" 924 aesenc_xmm1_xmm0 925 "movdqa 0xa0(%%esi), %%xmm1\n\t" 926 "cmp $10, %[rounds]\n\t" 927 "jz .Lenclast%=\n\t" 928 aesenc_xmm1_xmm0 929 "movdqa 0xb0(%%esi), %%xmm1\n\t" 930 aesenc_xmm1_xmm0 931 "movdqa 0xc0(%%esi), %%xmm1\n\t" 932 "cmp $12, %[rounds]\n\t" 933 "jz .Lenclast%=\n\t" 934 aesenc_xmm1_xmm0 935 "movdqa 0xd0(%%esi), %%xmm1\n\t" 936 aesenc_xmm1_xmm0 937 "movdqa 0xe0(%%esi), %%xmm1\n" 938 939 ".Lenclast%=:\n\t" 940 aesenclast_xmm1_xmm0 941 "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ 942 "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ 943 "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ 944 945 : [ctr] "+m" (*ctr), [dst] "=m" (*b) 946 : [src] "m" (*a), 947 [key] "g" (ctx->keyschenc), 948 [rounds] "g" (ctx->rounds), 949 [mask] "m" (*be_mask) 950 : "%esi", "cc", "memory"); 951#undef aesenc_xmm1_xmm0 952#undef aesenclast_xmm1_xmm0 953} 954 955 956/* Four blocks at a time variant of do_aesni_ctr. */ 957static void 958do_aesni_ctr_4 (const RIJNDAEL_context *ctx, 959 unsigned char *ctr, unsigned char *b, const unsigned char *a) 960{ 961#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" 962#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" 963#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" 964#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" 965#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" 966#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" 967#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" 968#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" 969 970 static unsigned char be_mask[16] __attribute__ ((aligned (16))) = 971 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 972 973 /* Register usage: 974 esi keyschedule 975 xmm0 CTR-0 976 xmm1 temp / round key 977 xmm2 CTR-1 978 xmm3 CTR-2 979 xmm4 CTR-3 980 xmm5 temp 981 */ 982 983 asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ 984 "movaps %%xmm0, %%xmm2\n\t" 985 "mov $1, %%esi\n\t" /* xmm1 := 1 */ 986 "movd %%esi, %%xmm1\n\t" 987 "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */ 988 "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */ 989 "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ 990 "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */ 991 "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ 992 "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */ 993 "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ 994 "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */ 995 "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */ 996 "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */ 997 "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */ 998 "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */ 999 "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */ 1000 1001 "movl %[key], %%esi\n\t" /* esi := keyschenc */ 1002 "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ 1003 "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ 1004 "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ 1005 "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ 1006 "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ 1007 "movdqa 0x10(%%esi), %%xmm1\n\t" 1008 aesenc_xmm1_xmm0 1009 aesenc_xmm1_xmm2 1010 aesenc_xmm1_xmm3 1011 aesenc_xmm1_xmm4 1012 "movdqa 0x20(%%esi), %%xmm1\n\t" 1013 aesenc_xmm1_xmm0 1014 aesenc_xmm1_xmm2 1015 aesenc_xmm1_xmm3 1016 aesenc_xmm1_xmm4 1017 "movdqa 0x30(%%esi), %%xmm1\n\t" 1018 aesenc_xmm1_xmm0 1019 aesenc_xmm1_xmm2 1020 aesenc_xmm1_xmm3 1021 aesenc_xmm1_xmm4 1022 "movdqa 0x40(%%esi), %%xmm1\n\t" 1023 aesenc_xmm1_xmm0 1024 aesenc_xmm1_xmm2 1025 aesenc_xmm1_xmm3 1026 aesenc_xmm1_xmm4 1027 "movdqa 0x50(%%esi), %%xmm1\n\t" 1028 aesenc_xmm1_xmm0 1029 aesenc_xmm1_xmm2 1030 aesenc_xmm1_xmm3 1031 aesenc_xmm1_xmm4 1032 "movdqa 0x60(%%esi), %%xmm1\n\t" 1033 aesenc_xmm1_xmm0 1034 aesenc_xmm1_xmm2 1035 aesenc_xmm1_xmm3 1036 aesenc_xmm1_xmm4 1037 "movdqa 0x70(%%esi), %%xmm1\n\t" 1038 aesenc_xmm1_xmm0 1039 aesenc_xmm1_xmm2 1040 aesenc_xmm1_xmm3 1041 aesenc_xmm1_xmm4 1042 "movdqa 0x80(%%esi), %%xmm1\n\t" 1043 aesenc_xmm1_xmm0 1044 aesenc_xmm1_xmm2 1045 aesenc_xmm1_xmm3 1046 aesenc_xmm1_xmm4 1047 "movdqa 0x90(%%esi), %%xmm1\n\t" 1048 aesenc_xmm1_xmm0 1049 aesenc_xmm1_xmm2 1050 aesenc_xmm1_xmm3 1051 aesenc_xmm1_xmm4 1052 "movdqa 0xa0(%%esi), %%xmm1\n\t" 1053 "cmp $10, %[rounds]\n\t" 1054 "jz .Lenclast%=\n\t" 1055 aesenc_xmm1_xmm0 1056 aesenc_xmm1_xmm2 1057 aesenc_xmm1_xmm3 1058 aesenc_xmm1_xmm4 1059 "movdqa 0xb0(%%esi), %%xmm1\n\t" 1060 aesenc_xmm1_xmm0 1061 aesenc_xmm1_xmm2 1062 aesenc_xmm1_xmm3 1063 aesenc_xmm1_xmm4 1064 "movdqa 0xc0(%%esi), %%xmm1\n\t" 1065 "cmp $12, %[rounds]\n\t" 1066 "jz .Lenclast%=\n\t" 1067 aesenc_xmm1_xmm0 1068 aesenc_xmm1_xmm2 1069 aesenc_xmm1_xmm3 1070 aesenc_xmm1_xmm4 1071 "movdqa 0xd0(%%esi), %%xmm1\n\t" 1072 aesenc_xmm1_xmm0 1073 aesenc_xmm1_xmm2 1074 aesenc_xmm1_xmm3 1075 aesenc_xmm1_xmm4 1076 "movdqa 0xe0(%%esi), %%xmm1\n" 1077 1078 ".Lenclast%=:\n\t" 1079 aesenclast_xmm1_xmm0 1080 aesenclast_xmm1_xmm2 1081 aesenclast_xmm1_xmm3 1082 aesenclast_xmm1_xmm4 1083 1084 "movdqu %[src], %%xmm1\n\t" /* Get block 1. */ 1085 "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ 1086 "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */ 1087 1088 "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */ 1089 "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ 1090 "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */ 1091 1092 "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */ 1093 "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ 1094 "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */ 1095 1096 "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */ 1097 "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ 1098 "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */ 1099 1100 : [ctr] "+m" (*ctr), [dst] "=m" (*b) 1101 : [src] "m" (*a), 1102 [key] "g" (ctx->keyschenc), 1103 [rounds] "g" (ctx->rounds), 1104 [mask] "m" (*be_mask) 1105 : "%esi", "cc", "memory"); 1106#undef aesenc_xmm1_xmm0 1107#undef aesenc_xmm1_xmm2 1108#undef aesenc_xmm1_xmm3 1109#undef aesenc_xmm1_xmm4 1110#undef aesenclast_xmm1_xmm0 1111#undef aesenclast_xmm1_xmm2 1112#undef aesenclast_xmm1_xmm3 1113#undef aesenclast_xmm1_xmm4 1114} 1115 1116 1117static void 1118do_aesni (RIJNDAEL_context *ctx, int decrypt_flag, 1119 unsigned char *bx, const unsigned char *ax) 1120{ 1121 1122 if (decrypt_flag) 1123 { 1124 if (!ctx->decryption_prepared ) 1125 { 1126 prepare_decryption ( ctx ); 1127 ctx->decryption_prepared = 1; 1128 } 1129 do_aesni_dec_aligned (ctx, bx, ax); 1130 } 1131 else 1132 do_aesni_enc_aligned (ctx, bx, ax); 1133} 1134#endif /*USE_AESNI*/ 1135 1136 1137static void 1138rijndael_encrypt (void *context, byte *b, const byte *a) 1139{ 1140 RIJNDAEL_context *ctx = context; 1141 1142 if (0) 1143 ; 1144#ifdef USE_PADLOCK 1145 else if (ctx->use_padlock) 1146 { 1147 do_padlock (ctx, 0, b, a); 1148 _gcry_burn_stack (48 + 15 /* possible padding for alignment */); 1149 } 1150#endif /*USE_PADLOCK*/ 1151#ifdef USE_AESNI 1152 else if (ctx->use_aesni) 1153 { 1154 aesni_prepare (); 1155 do_aesni (ctx, 0, b, a); 1156 aesni_cleanup (); 1157 } 1158#endif /*USE_AESNI*/ 1159 else 1160 { 1161 do_encrypt (ctx, b, a); 1162 _gcry_burn_stack (56 + 2*sizeof(int)); 1163 } 1164} 1165 1166 1167/* Bulk encryption of complete blocks in CFB mode. Caller needs to 1168 make sure that IV is aligned on an unsigned long boundary. This 1169 function is only intended for the bulk encryption feature of 1170 cipher.c. */ 1171void 1172_gcry_aes_cfb_enc (void *context, unsigned char *iv, 1173 void *outbuf_arg, const void *inbuf_arg, 1174 unsigned int nblocks) 1175{ 1176 RIJNDAEL_context *ctx = context; 1177 unsigned char *outbuf = outbuf_arg; 1178 const unsigned char *inbuf = inbuf_arg; 1179 unsigned char *ivp; 1180 int i; 1181 1182 if (0) 1183 ; 1184#ifdef USE_PADLOCK 1185 else if (ctx->use_padlock) 1186 { 1187 /* Fixme: Let Padlock do the CFBing. */ 1188 for ( ;nblocks; nblocks-- ) 1189 { 1190 /* Encrypt the IV. */ 1191 do_padlock (ctx, 0, iv, iv); 1192 /* XOR the input with the IV and store input into IV. */ 1193 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1194 *outbuf++ = (*ivp++ ^= *inbuf++); 1195 } 1196 } 1197#endif /*USE_PADLOCK*/ 1198#ifdef USE_AESNI 1199 else if (ctx->use_aesni) 1200 { 1201 aesni_prepare (); 1202 for ( ;nblocks; nblocks-- ) 1203 { 1204 do_aesni_cfb (ctx, 0, iv, outbuf, inbuf); 1205 outbuf += BLOCKSIZE; 1206 inbuf += BLOCKSIZE; 1207 } 1208 aesni_cleanup (); 1209 } 1210#endif /*USE_AESNI*/ 1211 else 1212 { 1213 for ( ;nblocks; nblocks-- ) 1214 { 1215 /* Encrypt the IV. */ 1216 do_encrypt_aligned (ctx, iv, iv); 1217 /* XOR the input with the IV and store input into IV. */ 1218 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1219 *outbuf++ = (*ivp++ ^= *inbuf++); 1220 } 1221 } 1222 1223 _gcry_burn_stack (48 + 2*sizeof(int)); 1224} 1225 1226 1227/* Bulk encryption of complete blocks in CBC mode. Caller needs to 1228 make sure that IV is aligned on an unsigned long boundary. This 1229 function is only intended for the bulk encryption feature of 1230 cipher.c. */ 1231void 1232_gcry_aes_cbc_enc (void *context, unsigned char *iv, 1233 void *outbuf_arg, const void *inbuf_arg, 1234 unsigned int nblocks, int cbc_mac) 1235{ 1236 RIJNDAEL_context *ctx = context; 1237 unsigned char *outbuf = outbuf_arg; 1238 const unsigned char *inbuf = inbuf_arg; 1239 unsigned char *ivp; 1240 int i; 1241 1242#ifdef USE_AESNI 1243 if (ctx->use_aesni) 1244 aesni_prepare (); 1245#endif /*USE_AESNI*/ 1246 1247 for ( ;nblocks; nblocks-- ) 1248 { 1249 for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) 1250 outbuf[i] = inbuf[i] ^ *ivp++; 1251 1252 if (0) 1253 ; 1254#ifdef USE_PADLOCK 1255 else if (ctx->use_padlock) 1256 do_padlock (ctx, 0, outbuf, outbuf); 1257#endif /*USE_PADLOCK*/ 1258#ifdef USE_AESNI 1259 else if (ctx->use_aesni) 1260 do_aesni (ctx, 0, outbuf, outbuf); 1261#endif /*USE_AESNI*/ 1262 else 1263 do_encrypt (ctx, outbuf, outbuf ); 1264 1265 memcpy (iv, outbuf, BLOCKSIZE); 1266 inbuf += BLOCKSIZE; 1267 if (!cbc_mac) 1268 outbuf += BLOCKSIZE; 1269 } 1270 1271#ifdef USE_AESNI 1272 if (ctx->use_aesni) 1273 aesni_cleanup (); 1274#endif /*USE_AESNI*/ 1275 1276 _gcry_burn_stack (48 + 2*sizeof(int)); 1277} 1278 1279 1280/* Bulk encryption of complete blocks in CTR mode. Caller needs to 1281 make sure that CTR is aligned on a 16 byte boundary if AESNI; the 1282 minimum alignment is for an u32. This function is only intended 1283 for the bulk encryption feature of cipher.c. CTR is expected to be 1284 of size BLOCKSIZE. */ 1285void 1286_gcry_aes_ctr_enc (void *context, unsigned char *ctr, 1287 void *outbuf_arg, const void *inbuf_arg, 1288 unsigned int nblocks) 1289{ 1290 RIJNDAEL_context *ctx = context; 1291 unsigned char *outbuf = outbuf_arg; 1292 const unsigned char *inbuf = inbuf_arg; 1293 unsigned char *p; 1294 int i; 1295 1296 if (0) 1297 ; 1298#ifdef USE_AESNI 1299 else if (ctx->use_aesni) 1300 { 1301 aesni_prepare (); 1302 for ( ;nblocks > 3 ; nblocks -= 4 ) 1303 { 1304 do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); 1305 outbuf += 4*BLOCKSIZE; 1306 inbuf += 4*BLOCKSIZE; 1307 } 1308 for ( ;nblocks; nblocks-- ) 1309 { 1310 do_aesni_ctr (ctx, ctr, outbuf, inbuf); 1311 outbuf += BLOCKSIZE; 1312 inbuf += BLOCKSIZE; 1313 } 1314 aesni_cleanup (); 1315 aesni_cleanup_2_4 (); 1316 } 1317#endif /*USE_AESNI*/ 1318 else 1319 { 1320 union { unsigned char x1[16]; u32 x32[4]; } tmp; 1321 1322 for ( ;nblocks; nblocks-- ) 1323 { 1324 /* Encrypt the counter. */ 1325 do_encrypt_aligned (ctx, tmp.x1, ctr); 1326 /* XOR the input with the encrypted counter and store in output. */ 1327 for (p=tmp.x1, i=0; i < BLOCKSIZE; i++) 1328 *outbuf++ = (*p++ ^= *inbuf++); 1329 /* Increment the counter. */ 1330 for (i = BLOCKSIZE; i > 0; i--) 1331 { 1332 ctr[i-1]++; 1333 if (ctr[i-1]) 1334 break; 1335 } 1336 } 1337 } 1338 1339 _gcry_burn_stack (48 + 2*sizeof(int)); 1340} 1341 1342 1343 1344/* Decrypt one block. A and B need to be aligned on a 4 byte boundary 1345 and the decryption must have been prepared. A and B may be the 1346 same. */ 1347static void 1348do_decrypt_aligned (RIJNDAEL_context *ctx, 1349 unsigned char *b, const unsigned char *a) 1350{ 1351#define rk (ctx->keyschdec) 1352 int rounds = ctx->rounds; 1353 int r; 1354 union 1355 { 1356 u32 tempu32[4]; /* Force correct alignment. */ 1357 byte temp[4][4]; 1358 } u; 1359 1360 1361 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[rounds][0]); 1362 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]); 1363 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]); 1364 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]); 1365 1366 *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) 1367 ^ *((u32_a_t*)T6[u.temp[3][1]]) 1368 ^ *((u32_a_t*)T7[u.temp[2][2]]) 1369 ^ *((u32_a_t*)T8[u.temp[1][3]])); 1370 *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) 1371 ^ *((u32_a_t*)T6[u.temp[0][1]]) 1372 ^ *((u32_a_t*)T7[u.temp[3][2]]) 1373 ^ *((u32_a_t*)T8[u.temp[2][3]])); 1374 *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) 1375 ^ *((u32_a_t*)T6[u.temp[1][1]]) 1376 ^ *((u32_a_t*)T7[u.temp[0][2]]) 1377 ^ *((u32_a_t*)T8[u.temp[3][3]])); 1378 *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) 1379 ^ *((u32_a_t*)T6[u.temp[2][1]]) 1380 ^ *((u32_a_t*)T7[u.temp[1][2]]) 1381 ^ *((u32_a_t*)T8[u.temp[0][3]])); 1382 1383 for (r = rounds-1; r > 1; r--) 1384 { 1385 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); 1386 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); 1387 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); 1388 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); 1389 *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) 1390 ^ *((u32_a_t*)T6[u.temp[3][1]]) 1391 ^ *((u32_a_t*)T7[u.temp[2][2]]) 1392 ^ *((u32_a_t*)T8[u.temp[1][3]])); 1393 *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) 1394 ^ *((u32_a_t*)T6[u.temp[0][1]]) 1395 ^ *((u32_a_t*)T7[u.temp[3][2]]) 1396 ^ *((u32_a_t*)T8[u.temp[2][3]])); 1397 *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) 1398 ^ *((u32_a_t*)T6[u.temp[1][1]]) 1399 ^ *((u32_a_t*)T7[u.temp[0][2]]) 1400 ^ *((u32_a_t*)T8[u.temp[3][3]])); 1401 *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) 1402 ^ *((u32_a_t*)T6[u.temp[2][1]]) 1403 ^ *((u32_a_t*)T7[u.temp[1][2]]) 1404 ^ *((u32_a_t*)T8[u.temp[0][3]])); 1405 } 1406 1407 /* Last round is special. */ 1408 *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[1][0]); 1409 *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]); 1410 *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]); 1411 *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]); 1412 b[ 0] = S5[u.temp[0][0]]; 1413 b[ 1] = S5[u.temp[3][1]]; 1414 b[ 2] = S5[u.temp[2][2]]; 1415 b[ 3] = S5[u.temp[1][3]]; 1416 b[ 4] = S5[u.temp[1][0]]; 1417 b[ 5] = S5[u.temp[0][1]]; 1418 b[ 6] = S5[u.temp[3][2]]; 1419 b[ 7] = S5[u.temp[2][3]]; 1420 b[ 8] = S5[u.temp[2][0]]; 1421 b[ 9] = S5[u.temp[1][1]]; 1422 b[10] = S5[u.temp[0][2]]; 1423 b[11] = S5[u.temp[3][3]]; 1424 b[12] = S5[u.temp[3][0]]; 1425 b[13] = S5[u.temp[2][1]]; 1426 b[14] = S5[u.temp[1][2]]; 1427 b[15] = S5[u.temp[0][3]]; 1428 *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[0][0]); 1429 *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]); 1430 *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]); 1431 *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]); 1432#undef rk 1433} 1434 1435 1436/* Decrypt one block. AX and BX may be the same. */ 1437static void 1438do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) 1439{ 1440 if ( !ctx->decryption_prepared ) 1441 { 1442 prepare_decryption ( ctx ); 1443 _gcry_burn_stack (64); 1444 ctx->decryption_prepared = 1; 1445 } 1446 1447 /* BX and AX are not necessary correctly aligned. Thus we might 1448 need to copy them here. We try to align to a 16 bytes. */ 1449 if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) 1450 { 1451 union 1452 { 1453 u32 dummy[4]; 1454 byte a[16] ATTR_ALIGNED_16; 1455 } a; 1456 union 1457 { 1458 u32 dummy[4]; 1459 byte b[16] ATTR_ALIGNED_16; 1460 } b; 1461 1462 memcpy (a.a, ax, 16); 1463 do_decrypt_aligned (ctx, b.b, a.a); 1464 memcpy (bx, b.b, 16); 1465 } 1466 else 1467 { 1468 do_decrypt_aligned (ctx, bx, ax); 1469 } 1470} 1471 1472 1473 1474 1475static void 1476rijndael_decrypt (void *context, byte *b, const byte *a) 1477{ 1478 RIJNDAEL_context *ctx = context; 1479 1480 if (0) 1481 ; 1482#ifdef USE_PADLOCK 1483 else if (ctx->use_padlock) 1484 { 1485 do_padlock (ctx, 1, b, a); 1486 _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */); 1487 } 1488#endif /*USE_PADLOCK*/ 1489#ifdef USE_AESNI 1490 else if (ctx->use_aesni) 1491 { 1492 aesni_prepare (); 1493 do_aesni (ctx, 1, b, a); 1494 aesni_cleanup (); 1495 } 1496#endif /*USE_AESNI*/ 1497 else 1498 { 1499 do_decrypt (ctx, b, a); 1500 _gcry_burn_stack (56+2*sizeof(int)); 1501 } 1502} 1503 1504 1505/* Bulk decryption of complete blocks in CFB mode. Caller needs to 1506 make sure that IV is aligned on an unisgned lonhg boundary. This 1507 function is only intended for the bulk encryption feature of 1508 cipher.c. */ 1509void 1510_gcry_aes_cfb_dec (void *context, unsigned char *iv, 1511 void *outbuf_arg, const void *inbuf_arg, 1512 unsigned int nblocks) 1513{ 1514 RIJNDAEL_context *ctx = context; 1515 unsigned char *outbuf = outbuf_arg; 1516 const unsigned char *inbuf = inbuf_arg; 1517 unsigned char *ivp; 1518 unsigned char temp; 1519 int i; 1520 1521 if (0) 1522 ; 1523#ifdef USE_PADLOCK 1524 else if (ctx->use_padlock) 1525 { 1526 /* Fixme: Let Padlock do the CFBing. */ 1527 for ( ;nblocks; nblocks-- ) 1528 { 1529 do_padlock (ctx, 0, iv, iv); 1530 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1531 { 1532 temp = *inbuf++; 1533 *outbuf++ = *ivp ^ temp; 1534 *ivp++ = temp; 1535 } 1536 } 1537 } 1538#endif /*USE_PADLOCK*/ 1539#ifdef USE_AESNI 1540 else if (ctx->use_aesni) 1541 { 1542 aesni_prepare (); 1543 for ( ;nblocks; nblocks-- ) 1544 { 1545 do_aesni_cfb (ctx, 1, iv, outbuf, inbuf); 1546 outbuf += BLOCKSIZE; 1547 inbuf += BLOCKSIZE; 1548 } 1549 aesni_cleanup (); 1550 } 1551#endif /*USE_AESNI*/ 1552 else 1553 { 1554 for ( ;nblocks; nblocks-- ) 1555 { 1556 do_encrypt_aligned (ctx, iv, iv); 1557 for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) 1558 { 1559 temp = *inbuf++; 1560 *outbuf++ = *ivp ^ temp; 1561 *ivp++ = temp; 1562 } 1563 } 1564 } 1565 1566 _gcry_burn_stack (48 + 2*sizeof(int)); 1567} 1568 1569 1570/* Bulk decryption of complete blocks in CBC mode. Caller needs to 1571 make sure that IV is aligned on an unsigned long boundary. This 1572 function is only intended for the bulk encryption feature of 1573 cipher.c. */ 1574void 1575_gcry_aes_cbc_dec (void *context, unsigned char *iv, 1576 void *outbuf_arg, const void *inbuf_arg, 1577 unsigned int nblocks) 1578{ 1579 RIJNDAEL_context *ctx = context; 1580 unsigned char *outbuf = outbuf_arg; 1581 const unsigned char *inbuf = inbuf_arg; 1582 unsigned char *ivp; 1583 int i; 1584 unsigned char savebuf[BLOCKSIZE]; 1585 1586#ifdef USE_AESNI 1587 if (ctx->use_aesni) 1588 aesni_prepare (); 1589#endif /*USE_AESNI*/ 1590 1591 for ( ;nblocks; nblocks-- ) 1592 { 1593 /* We need to save INBUF away because it may be identical to 1594 OUTBUF. */ 1595 memcpy (savebuf, inbuf, BLOCKSIZE); 1596 1597 if (0) 1598 ; 1599#ifdef USE_PADLOCK 1600 else if (ctx->use_padlock) 1601 do_padlock (ctx, 1, outbuf, inbuf); 1602#endif /*USE_PADLOCK*/ 1603#ifdef USE_AESNI 1604 else if (ctx->use_aesni) 1605 do_aesni (ctx, 1, outbuf, inbuf); 1606#endif /*USE_AESNI*/ 1607 else 1608 do_decrypt (ctx, outbuf, inbuf); 1609 1610 for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) 1611 outbuf[i] ^= *ivp++; 1612 memcpy (iv, savebuf, BLOCKSIZE); 1613 inbuf += BLOCKSIZE; 1614 outbuf += BLOCKSIZE; 1615 } 1616 1617#ifdef USE_AESNI 1618 if (ctx->use_aesni) 1619 aesni_cleanup (); 1620#endif /*USE_AESNI*/ 1621 1622 _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*)); 1623} 1624 1625 1626 1627 1628/* Run the self-tests for AES 128. Returns NULL on success. */ 1629static const char* 1630selftest_basic_128 (void) 1631{ 1632 RIJNDAEL_context ctx; 1633 unsigned char scratch[16]; 1634 1635 /* The test vectors are from the AES supplied ones; more or less 1636 randomly taken from ecb_tbl.txt (I=42,81,14) */ 1637#if 1 1638 static const unsigned char plaintext_128[16] = 1639 { 1640 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 1641 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A 1642 }; 1643 static const unsigned char key_128[16] = 1644 { 1645 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 1646 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA 1647 }; 1648 static const unsigned char ciphertext_128[16] = 1649 { 1650 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 1651 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD 1652 }; 1653#else 1654 /* Test vectors from fips-197, appendix C. */ 1655# warning debug test vectors in use 1656 static const unsigned char plaintext_128[16] = 1657 { 1658 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 1659 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff 1660 }; 1661 static const unsigned char key_128[16] = 1662 { 1663 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1664 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f 1665 /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ 1666 /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ 1667 }; 1668 static const unsigned char ciphertext_128[16] = 1669 { 1670 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 1671 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a 1672 }; 1673#endif 1674 1675 rijndael_setkey (&ctx, key_128, sizeof (key_128)); 1676 rijndael_encrypt (&ctx, scratch, plaintext_128); 1677 if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) 1678 return "AES-128 test encryption failed."; 1679 rijndael_decrypt (&ctx, scratch, scratch); 1680 if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) 1681 return "AES-128 test decryption failed."; 1682 1683 return NULL; 1684} 1685 1686/* Run the self-tests for AES 192. Returns NULL on success. */ 1687static const char* 1688selftest_basic_192 (void) 1689{ 1690 RIJNDAEL_context ctx; 1691 unsigned char scratch[16]; 1692 1693 static unsigned char plaintext_192[16] = 1694 { 1695 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 1696 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 1697 }; 1698 static unsigned char key_192[24] = 1699 { 1700 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 1701 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 1702 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 1703 }; 1704 static const unsigned char ciphertext_192[16] = 1705 { 1706 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 1707 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA 1708 }; 1709 1710 rijndael_setkey (&ctx, key_192, sizeof(key_192)); 1711 rijndael_encrypt (&ctx, scratch, plaintext_192); 1712 if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) 1713 return "AES-192 test encryption failed."; 1714 rijndael_decrypt (&ctx, scratch, scratch); 1715 if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) 1716 return "AES-192 test decryption failed."; 1717 1718 return NULL; 1719} 1720 1721 1722/* Run the self-tests for AES 256. Returns NULL on success. */ 1723static const char* 1724selftest_basic_256 (void) 1725{ 1726 RIJNDAEL_context ctx; 1727 unsigned char scratch[16]; 1728 1729 static unsigned char plaintext_256[16] = 1730 { 1731 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 1732 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 1733 }; 1734 static unsigned char key_256[32] = 1735 { 1736 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 1737 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 1738 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 1739 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E 1740 }; 1741 static const unsigned char ciphertext_256[16] = 1742 { 1743 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 1744 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 1745 }; 1746 1747 rijndael_setkey (&ctx, key_256, sizeof(key_256)); 1748 rijndael_encrypt (&ctx, scratch, plaintext_256); 1749 if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) 1750 return "AES-256 test encryption failed."; 1751 rijndael_decrypt (&ctx, scratch, scratch); 1752 if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) 1753 return "AES-256 test decryption failed."; 1754 1755 return NULL; 1756} 1757 1758/* Run all the self-tests and return NULL on success. This function 1759 is used for the on-the-fly self-tests. */ 1760static const char * 1761selftest (void) 1762{ 1763 const char *r; 1764 1765 if ( (r = selftest_basic_128 ()) 1766 || (r = selftest_basic_192 ()) 1767 || (r = selftest_basic_256 ()) ) 1768 return r; 1769 1770 return r; 1771} 1772 1773 1774/* SP800-38a.pdf for AES-128. */ 1775static const char * 1776selftest_fips_128_38a (int requested_mode) 1777{ 1778 struct tv 1779 { 1780 int mode; 1781 const unsigned char key[16]; 1782 const unsigned char iv[16]; 1783 struct 1784 { 1785 const unsigned char input[16]; 1786 const unsigned char output[16]; 1787 } data[4]; 1788 } tv[2] = 1789 { 1790 { 1791 GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ 1792 { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 1793 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, 1794 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1795 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, 1796 { 1797 { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 1798 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, 1799 { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 1800 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, 1801 1802 { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 1803 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, 1804 { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 1805 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, 1806 1807 { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 1808 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, 1809 { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 1810 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, 1811 1812 { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 1813 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, 1814 { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 1815 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } 1816 } 1817 }, 1818 { 1819 GCRY_CIPHER_MODE_OFB, 1820 { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 1821 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, 1822 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1823 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, 1824 { 1825 { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 1826 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, 1827 { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 1828 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, 1829 1830 { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 1831 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, 1832 { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 1833 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, 1834 1835 { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 1836 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, 1837 { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 1838 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, 1839 1840 { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 1841 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, 1842 { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 1843 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, 1844 } 1845 } 1846 }; 1847 unsigned char scratch[16]; 1848 gpg_error_t err; 1849 int tvi, idx; 1850 gcry_cipher_hd_t hdenc = NULL; 1851 gcry_cipher_hd_t hddec = NULL; 1852 1853#define Fail(a) do { \ 1854 _gcry_cipher_close (hdenc); \ 1855 _gcry_cipher_close (hddec); \ 1856 return a; \ 1857 } while (0) 1858 1859 gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); 1860 gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); 1861 1862 for (tvi=0; tvi < DIM (tv); tvi++) 1863 if (tv[tvi].mode == requested_mode) 1864 break; 1865 if (tvi == DIM (tv)) 1866 Fail ("no test data for this mode"); 1867 1868 err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); 1869 if (err) 1870 Fail ("open"); 1871 err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); 1872 if (err) 1873 Fail ("open"); 1874 err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); 1875 if (!err) 1876 err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); 1877 if (err) 1878 Fail ("set key"); 1879 err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); 1880 if (!err) 1881 err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); 1882 if (err) 1883 Fail ("set IV"); 1884 for (idx=0; idx < DIM (tv[tvi].data); idx++) 1885 { 1886 err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, 1887 tv[tvi].data[idx].input, 1888 sizeof tv[tvi].data[idx].input); 1889 if (err) 1890 Fail ("encrypt command"); 1891 if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) 1892 Fail ("encrypt mismatch"); 1893 err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, 1894 tv[tvi].data[idx].output, 1895 sizeof tv[tvi].data[idx].output); 1896 if (err) 1897 Fail ("decrypt command"); 1898 if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) 1899 Fail ("decrypt mismatch"); 1900 } 1901 1902#undef Fail 1903 _gcry_cipher_close (hdenc); 1904 _gcry_cipher_close (hddec); 1905 return NULL; 1906} 1907 1908 1909/* Complete selftest for AES-128 with all modes and driver code. */ 1910static gpg_err_code_t 1911selftest_fips_128 (int extended, selftest_report_func_t report) 1912{ 1913 const char *what; 1914 const char *errtxt; 1915 1916 what = "low-level"; 1917 errtxt = selftest_basic_128 (); 1918 if (errtxt) 1919 goto failed; 1920 1921 if (extended) 1922 { 1923 what = "cfb"; 1924 errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); 1925 if (errtxt) 1926 goto failed; 1927 1928 what = "ofb"; 1929 errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); 1930 if (errtxt) 1931 goto failed; 1932 } 1933 1934 return 0; /* Succeeded. */ 1935 1936 failed: 1937 if (report) 1938 report ("cipher", GCRY_CIPHER_AES128, what, errtxt); 1939 return GPG_ERR_SELFTEST_FAILED; 1940} 1941 1942/* Complete selftest for AES-192. */ 1943static gpg_err_code_t 1944selftest_fips_192 (int extended, selftest_report_func_t report) 1945{ 1946 const char *what; 1947 const char *errtxt; 1948 1949 (void)extended; /* No extended tests available. */ 1950 1951 what = "low-level"; 1952 errtxt = selftest_basic_192 (); 1953 if (errtxt) 1954 goto failed; 1955 1956 1957 return 0; /* Succeeded. */ 1958 1959 failed: 1960 if (report) 1961 report ("cipher", GCRY_CIPHER_AES192, what, errtxt); 1962 return GPG_ERR_SELFTEST_FAILED; 1963} 1964 1965 1966/* Complete selftest for AES-256. */ 1967static gpg_err_code_t 1968selftest_fips_256 (int extended, selftest_report_func_t report) 1969{ 1970 const char *what; 1971 const char *errtxt; 1972 1973 (void)extended; /* No extended tests available. */ 1974 1975 what = "low-level"; 1976 errtxt = selftest_basic_256 (); 1977 if (errtxt) 1978 goto failed; 1979 1980 return 0; /* Succeeded. */ 1981 1982 failed: 1983 if (report) 1984 report ("cipher", GCRY_CIPHER_AES256, what, errtxt); 1985 return GPG_ERR_SELFTEST_FAILED; 1986} 1987 1988 1989 1990/* Run a full self-test for ALGO and return 0 on success. */ 1991static gpg_err_code_t 1992run_selftests (int algo, int extended, selftest_report_func_t report) 1993{ 1994 gpg_err_code_t ec; 1995 1996 switch (algo) 1997 { 1998 case GCRY_CIPHER_AES128: 1999 ec = selftest_fips_128 (extended, report); 2000 break; 2001 case GCRY_CIPHER_AES192: 2002 ec = selftest_fips_192 (extended, report); 2003 break; 2004 case GCRY_CIPHER_AES256: 2005 ec = selftest_fips_256 (extended, report); 2006 break; 2007 default: 2008 ec = GPG_ERR_CIPHER_ALGO; 2009 break; 2010 2011 } 2012 return ec; 2013} 2014 2015 2016 2017 2018static const char *rijndael_names[] = 2019 { 2020 "RIJNDAEL", 2021 "AES128", 2022 "AES-128", 2023 NULL 2024 }; 2025 2026static gcry_cipher_oid_spec_t rijndael_oids[] = 2027 { 2028 { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, 2029 { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, 2030 { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, 2031 { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, 2032 { NULL } 2033 }; 2034 2035gcry_cipher_spec_t _gcry_cipher_spec_aes = 2036 { 2037 "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), 2038 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2039 }; 2040cipher_extra_spec_t _gcry_cipher_extraspec_aes = 2041 { 2042 run_selftests 2043 }; 2044 2045static const char *rijndael192_names[] = 2046 { 2047 "RIJNDAEL192", 2048 "AES-192", 2049 NULL 2050 }; 2051 2052static gcry_cipher_oid_spec_t rijndael192_oids[] = 2053 { 2054 { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, 2055 { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, 2056 { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, 2057 { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, 2058 { NULL } 2059 }; 2060 2061gcry_cipher_spec_t _gcry_cipher_spec_aes192 = 2062 { 2063 "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), 2064 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2065 }; 2066cipher_extra_spec_t _gcry_cipher_extraspec_aes192 = 2067 { 2068 run_selftests 2069 }; 2070 2071static const char *rijndael256_names[] = 2072 { 2073 "RIJNDAEL256", 2074 "AES-256", 2075 NULL 2076 }; 2077 2078static gcry_cipher_oid_spec_t rijndael256_oids[] = 2079 { 2080 { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, 2081 { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, 2082 { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, 2083 { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, 2084 { NULL } 2085 }; 2086 2087gcry_cipher_spec_t _gcry_cipher_spec_aes256 = 2088 { 2089 "AES256", rijndael256_names, rijndael256_oids, 16, 256, 2090 sizeof (RIJNDAEL_context), 2091 rijndael_setkey, rijndael_encrypt, rijndael_decrypt 2092 }; 2093 2094cipher_extra_spec_t _gcry_cipher_extraspec_aes256 = 2095 { 2096 run_selftests 2097 }; 2098