1/* 2 --------------------------------------------------------------------------- 3 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. 4 5 LICENSE TERMS 6 7 The free distribution and use of this software in both source and binary 8 form is allowed (with or without changes) provided that: 9 10 1. distributions of this source code include the above copyright 11 notice, this list of conditions and the following disclaimer; 12 13 2. distributions in binary form include the above copyright 14 notice, this list of conditions and the following disclaimer 15 in the documentation and/or other associated materials; 16 17 3. the copyright holder's name is not used to endorse products 18 built using this software without specific written permission. 19 20 ALTERNATIVELY, provided that this notice is retained in full, this product 21 may be distributed under the terms of the GNU General Public License (GPL), 22 in which case the provisions of the GPL apply INSTEAD OF those given above. 23 24 DISCLAIMER 25 26 This software is provided 'as is' with no explicit or implied warranties 27 in respect of its properties, including, but not limited to, correctness 28 and/or fitness for purpose. 29 --------------------------------------------------------------------------- 30 Issue 31/01/2006 31 32 This file contains the compilation options for AES (Rijndael) and code 33 that is common across encryption, key scheduling and table generation. 34 35 OPERATION 36 37 These source code files implement the AES algorithm Rijndael designed by 38 Joan Daemen and Vincent Rijmen. This version is designed for the standard 39 block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 40 and 32 bytes). 41 42 This version is designed for flexibility and speed using operations on 43 32-bit words rather than operations on bytes. It can be compiled with 44 either big or little endian internal byte order but is faster when the 45 native byte order for the processor is used. 46 47 THE CIPHER INTERFACE 48 49 The cipher interface is implemented as an array of bytes in which lower 50 AES bit sequence indexes map to higher numeric significance within bytes. 51 52 uint_8t (an unsigned 8-bit type) 53 uint_32t (an unsigned 32-bit type) 54 struct aes_encrypt_ctx (structure for the cipher encryption context) 55 struct aes_decrypt_ctx (structure for the cipher decryption context) 56 aes_rval the function return type 57 58 C subroutine calls: 59 60 aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); 61 aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); 62 aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); 63 aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, 64 const aes_encrypt_ctx cx[1]); 65 66 aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); 67 aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); 68 aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); 69 aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, 70 const aes_decrypt_ctx cx[1]); 71 72 IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that 73 you call gen_tabs() before AES is used so that the tables are initialised. 74 75 C++ aes class subroutines: 76 77 Class AESencrypt for encryption 78 79 Construtors: 80 AESencrypt(void) 81 AESencrypt(const unsigned char *key) - 128 bit key 82 Members: 83 aes_rval key128(const unsigned char *key) 84 aes_rval key192(const unsigned char *key) 85 aes_rval key256(const unsigned char *key) 86 aes_rval encrypt(const unsigned char *in, unsigned char *out) const 87 88 Class AESdecrypt for encryption 89 Construtors: 90 AESdecrypt(void) 91 AESdecrypt(const unsigned char *key) - 128 bit key 92 Members: 93 aes_rval key128(const unsigned char *key) 94 aes_rval key192(const unsigned char *key) 95 aes_rval key256(const unsigned char *key) 96 aes_rval decrypt(const unsigned char *in, unsigned char *out) const 97*/ 98 99#if !defined( _AESOPT_H ) 100#define _AESOPT_H 101 102#if defined( __cplusplus ) 103#include "aescpp.h" 104#else 105#include "crypto/aes.h" 106#endif 107 108/* PLATFORM SPECIFIC INCLUDES */ 109 110#include "edefs.h" 111 112/* CONFIGURATION - THE USE OF DEFINES 113 114 Later in this section there are a number of defines that control the 115 operation of the code. In each section, the purpose of each define is 116 explained so that the relevant form can be included or excluded by 117 setting either 1's or 0's respectively on the branches of the related 118 #if clauses. The following local defines should not be changed. 119*/ 120 121#define ENCRYPTION_IN_C 1 122#define DECRYPTION_IN_C 2 123#define ENC_KEYING_IN_C 4 124#define DEC_KEYING_IN_C 8 125 126#define NO_TABLES 0 127#define ONE_TABLE 1 128#define FOUR_TABLES 4 129#define NONE 0 130#define PARTIAL 1 131#define FULL 2 132 133/* --- START OF USER CONFIGURED OPTIONS --- */ 134 135/* 1. BYTE ORDER WITHIN 32 BIT WORDS 136 137 The fundamental data processing units in Rijndael are 8-bit bytes. The 138 input, output and key input are all enumerated arrays of bytes in which 139 bytes are numbered starting at zero and increasing to one less than the 140 number of bytes in the array in question. This enumeration is only used 141 for naming bytes and does not imply any adjacency or order relationship 142 from one byte to another. When these inputs and outputs are considered 143 as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to 144 byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. 145 In this implementation bits are numbered from 0 to 7 starting at the 146 numerically least significant end of each byte (bit n represents 2^n). 147 148 However, Rijndael can be implemented more efficiently using 32-bit 149 words by packing bytes into words so that bytes 4*n to 4*n+3 are placed 150 into word[n]. While in principle these bytes can be assembled into words 151 in any positions, this implementation only supports the two formats in 152 which bytes in adjacent positions within words also have adjacent byte 153 numbers. This order is called big-endian if the lowest numbered bytes 154 in words have the highest numeric significance and little-endian if the 155 opposite applies. 156 157 This code can work in either order irrespective of the order used by the 158 machine on which it runs. Normally the internal byte order will be set 159 to the order of the processor on which the code is to be run but this 160 define can be used to reverse this in special situations 161 162 WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. 163 This define will hence be redefined later (in section 4) if necessary 164*/ 165 166#if 1 167#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 168#elif 0 169#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN 170#elif 0 171#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN 172#else 173#error The algorithm byte order is not defined 174#endif 175 176/* 2. VIA ACE SUPPORT 177 178 Define this option if support for the VIA ACE is required. This uses 179 inline assembler instructions and is only implemented for the Microsoft, 180 Intel and GCC compilers. If VIA ACE is known to be present, then defining 181 ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption 182 code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if 183 it is detected (both present and enabled) but the normal AES code will 184 also be present. 185 186 When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte 187 aligned; other input/output buffers do not need to be 16 byte aligned 188 but there are very large performance gains if this can be arranged. 189 VIA ACE also requires the decryption key schedule to be in reverse 190 order (which the following defines ensure). 191*/ 192 193#if 0 && !defined( _WIN64 ) && !defined( USE_VIA_ACE_IF_PRESENT ) 194#define USE_VIA_ACE_IF_PRESENT 195#endif 196 197#if 0 && !defined( _WIN64 ) && !defined( ASSUME_VIA_ACE_PRESENT ) 198#define ASSUME_VIA_ACE_PRESENT 199#endif 200 201/* 3. ASSEMBLER SUPPORT 202 203 This define (which can be on the command line) enables the use of the 204 assembler code routines for encryption, decryption and key scheduling 205 as follows: 206 207 ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for 208 encryption and decryption and but with key scheduling in C 209 ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for 210 encryption, decryption and key scheduling 211 ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for 212 encryption and decryption and but with key scheduling in C 213 ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for 214 encryption and decryption and but with key scheduling in C 215 216 Change one 'if 0' below to 'if 1' to select the version or define 217 as a compilation option. 218*/ 219 220#if defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ) 221# if defined( _M_IX86 ) 222# if 0 && !defined( ASM_X86_V1C ) 223# define ASM_X86_V1C 224# elif 0 && !defined( ASM_X86_V2 ) 225# define ASM_X86_V2 226# elif 0 && !defined( ASM_X86_V2C ) 227# define ASM_X86_V2C 228# endif 229# else 230# error Assembler code is only available for x86 and AMD64 systems 231# endif 232#elif defined( ASM_AMD64_C ) 233# if defined( _M_X64 ) 234# if 0 && !defined( ASM_AMD64_C ) 235# define ASM_AMD64_C 236# endif 237# else 238# error Assembler code is only available for x86 and AMD64 systems 239# endif 240#endif 241 242/* 4. FAST INPUT/OUTPUT OPERATIONS. 243 244 On some machines it is possible to improve speed by transferring the 245 bytes in the input and output arrays to and from the internal 32-bit 246 variables by addressing these arrays as if they are arrays of 32-bit 247 words. On some machines this will always be possible but there may 248 be a large performance penalty if the byte arrays are not aligned on 249 the normal word boundaries. On other machines this technique will 250 lead to memory access errors when such 32-bit word accesses are not 251 properly aligned. The option SAFE_IO avoids such problems but will 252 often be slower on those machines that support misaligned access 253 (especially so if care is taken to align the input and output byte 254 arrays on 32-bit word boundaries). If SAFE_IO is not defined it is 255 assumed that access to byte arrays as if they are arrays of 32-bit 256 words will not cause problems when such accesses are misaligned. 257*/ 258#if 1 && !defined( _MSC_VER ) 259#define SAFE_IO 260#endif 261 262/* 5. LOOP UNROLLING 263 264 The code for encryption and decrytpion cycles through a number of rounds 265 that can be implemented either in a loop or by expanding the code into a 266 long sequence of instructions, the latter producing a larger program but 267 one that will often be much faster. The latter is called loop unrolling. 268 There are also potential speed advantages in expanding two iterations in 269 a loop with half the number of iterations, which is called partial loop 270 unrolling. The following options allow partial or full loop unrolling 271 to be set independently for encryption and decryption 272*/ 273#if 1 274#define ENC_UNROLL FULL 275#elif 0 276#define ENC_UNROLL PARTIAL 277#else 278#define ENC_UNROLL NONE 279#endif 280 281#if 1 282#define DEC_UNROLL FULL 283#elif 0 284#define DEC_UNROLL PARTIAL 285#else 286#define DEC_UNROLL NONE 287#endif 288 289/* 6. FAST FINITE FIELD OPERATIONS 290 291 If this section is included, tables are used to provide faster finite 292 field arithmetic (this has no effect if FIXED_TABLES is defined). 293*/ 294#if 1 295#define FF_TABLES 296#endif 297 298/* 7. INTERNAL STATE VARIABLE FORMAT 299 300 The internal state of Rijndael is stored in a number of local 32-bit 301 word varaibles which can be defined either as an array or as individual 302 names variables. Include this section if you want to store these local 303 varaibles in arrays. Otherwise individual local variables will be used. 304*/ 305#if 1 306#define ARRAYS 307#endif 308 309/* 8. FIXED OR DYNAMIC TABLES 310 311 When this section is included the tables used by the code are compiled 312 statically into the binary file. Otherwise the subroutine gen_tabs() 313 must be called to compute them before the code is first used. 314*/ 315#if 0 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) 316#define FIXED_TABLES 317#endif 318 319/* 9. TABLE ALIGNMENT 320 321 On some sytsems speed will be improved by aligning the AES large lookup 322 tables on particular boundaries. This define should be set to a power of 323 two giving the desired alignment. It can be left undefined if alignment 324 is not needed. This option is specific to the Microsft VC++ compiler - 325 it seems to sometimes cause trouble for the VC++ version 6 compiler. 326*/ 327 328#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) 329#define TABLE_ALIGN 32 330#endif 331 332/* 10. TABLE OPTIONS 333 334 This cipher proceeds by repeating in a number of cycles known as 'rounds' 335 which are implemented by a round function which can optionally be speeded 336 up using tables. The basic tables are each 256 32-bit words, with either 337 one or four tables being required for each round function depending on 338 how much speed is required. The encryption and decryption round functions 339 are different and the last encryption and decrytpion round functions are 340 different again making four different round functions in all. 341 342 This means that: 343 1. Normal encryption and decryption rounds can each use either 0, 1 344 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 345 2. The last encryption and decryption rounds can also use either 0, 1 346 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 347 348 Include or exclude the appropriate definitions below to set the number 349 of tables used by this implementation. 350*/ 351 352#if 1 /* set tables for the normal encryption round */ 353#define ENC_ROUND FOUR_TABLES 354#elif 0 355#define ENC_ROUND ONE_TABLE 356#else 357#define ENC_ROUND NO_TABLES 358#endif 359 360#if 1 /* set tables for the last encryption round */ 361#define LAST_ENC_ROUND FOUR_TABLES 362#elif 0 363#define LAST_ENC_ROUND ONE_TABLE 364#else 365#define LAST_ENC_ROUND NO_TABLES 366#endif 367 368#if 1 /* set tables for the normal decryption round */ 369#define DEC_ROUND FOUR_TABLES 370#elif 0 371#define DEC_ROUND ONE_TABLE 372#else 373#define DEC_ROUND NO_TABLES 374#endif 375 376#if 1 /* set tables for the last decryption round */ 377#define LAST_DEC_ROUND FOUR_TABLES 378#elif 0 379#define LAST_DEC_ROUND ONE_TABLE 380#else 381#define LAST_DEC_ROUND NO_TABLES 382#endif 383 384/* The decryption key schedule can be speeded up with tables in the same 385 way that the round functions can. Include or exclude the following 386 defines to set this requirement. 387*/ 388#if 1 389#define KEY_SCHED FOUR_TABLES 390#elif 0 391#define KEY_SCHED ONE_TABLE 392#else 393#define KEY_SCHED NO_TABLES 394#endif 395 396/* ---- END OF USER CONFIGURED OPTIONS ---- */ 397 398/* VIA ACE support is only available for VC++ and GCC */ 399 400#if !defined( _MSC_VER ) && !defined( __GNUC__ ) 401# if defined( ASSUME_VIA_ACE_PRESENT ) 402# undef ASSUME_VIA_ACE_PRESENT 403# endif 404# if defined( USE_VIA_ACE_IF_PRESENT ) 405# undef USE_VIA_ACE_IF_PRESENT 406# endif 407#endif 408 409#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) 410#define USE_VIA_ACE_IF_PRESENT 411#endif 412 413#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) 414#define AES_REV_DKS 415#endif 416 417/* Assembler support requires the use of platform byte order */ 418 419#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) 420#undef ALGORITHM_BYTE_ORDER 421#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 422#endif 423 424/* In this implementation the columns of the state array are each held in 425 32-bit words. The state array can be held in various ways: in an array 426 of words, in a number of individual word variables or in a number of 427 processor registers. The following define maps a variable name x and 428 a column number c to the way the state array variable is to be held. 429 The first define below maps the state into an array x[c] whereas the 430 second form maps the state into a number of individual variables x0, 431 x1, etc. Another form could map individual state colums to machine 432 register names. 433*/ 434 435#if defined( ARRAYS ) 436#define s(x,c) x[c] 437#else 438#define s(x,c) x##c 439#endif 440 441/* This implementation provides subroutines for encryption, decryption 442 and for setting the three key lengths (separately) for encryption 443 and decryption. Since not all functions are needed, masks are set 444 up here to determine which will be implemented in C 445*/ 446 447#if !defined( AES_ENCRYPT ) 448# define EFUNCS_IN_C 0 449#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) 450 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 451# define EFUNCS_IN_C ENC_KEYING_IN_C 452#elif !defined( ASM_X86_V2 ) 453# define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) 454#else 455# define EFUNCS_IN_C 0 456#endif 457 458#if !defined( AES_DECRYPT ) 459# define DFUNCS_IN_C 0 460#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) 461 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 462# define DFUNCS_IN_C DEC_KEYING_IN_C 463#elif !defined( ASM_X86_V2 ) 464# define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) 465#else 466# define DFUNCS_IN_C 0 467#endif 468 469#define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) 470 471/* END OF CONFIGURATION OPTIONS */ 472 473#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) 474 475/* Disable or report errors on some combinations of options */ 476 477#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES 478#undef LAST_ENC_ROUND 479#define LAST_ENC_ROUND NO_TABLES 480#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES 481#undef LAST_ENC_ROUND 482#define LAST_ENC_ROUND ONE_TABLE 483#endif 484 485#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE 486#undef ENC_UNROLL 487#define ENC_UNROLL NONE 488#endif 489 490#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES 491#undef LAST_DEC_ROUND 492#define LAST_DEC_ROUND NO_TABLES 493#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES 494#undef LAST_DEC_ROUND 495#define LAST_DEC_ROUND ONE_TABLE 496#endif 497 498#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE 499#undef DEC_UNROLL 500#define DEC_UNROLL NONE 501#endif 502 503#if defined( bswap32 ) 504#define aes_sw32 bswap32 505#elif defined( bswap_32 ) 506#define aes_sw32 bswap_32 507#else 508#define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) 509#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) 510#endif 511 512/* upr(x,n): rotates bytes within words by n positions, moving bytes to 513 higher index positions with wrap around into low positions 514 ups(x,n): moves bytes by n positions to higher index positions in 515 words but without wrap around 516 bval(x,n): extracts a byte from a word 517 518 WARNING: The definitions given here are intended only for use with 519 unsigned variables and with shift counts that are compile 520 time constants 521*/ 522 523#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) 524#define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n)))) 525#define ups(x,n) ((uint_32t) (x) << (8 * (n))) 526#define bval(x,n) ((uint_8t)((x) >> (8 * (n)))) 527#define bytes2word(b0, b1, b2, b3) \ 528 (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0)) 529#endif 530 531#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) 532#define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n)))) 533#define ups(x,n) ((uint_32t) (x) >> (8 * (n))) 534#define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n)))) 535#define bytes2word(b0, b1, b2, b3) \ 536 (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3)) 537#endif 538 539#if defined( SAFE_IO ) 540 541#define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \ 542 ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3]) 543#define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ 544 ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); } 545 546#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) 547 548#define word_in(x,c) (*((uint_32t*)(x)+(c))) 549#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v)) 550 551#else 552 553#define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c))) 554#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v)) 555 556#endif 557 558/* the finite field modular polynomial and elements */ 559 560#define WPOLY 0x011b 561#define BPOLY 0x1b 562 563/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ 564 565#define m1 0x80808080 566#define m2 0x7f7f7f7f 567#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) 568 569/* The following defines provide alternative definitions of gf_mulx that might 570 give improved performance if a fast 32-bit multiply is not available. Note 571 that a temporary variable u needs to be defined where gf_mulx is used. 572 573#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) 574#define m4 (0x01010101 * BPOLY) 575#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) 576*/ 577 578/* Work out which tables are needed for the different options */ 579 580#if defined( ASM_X86_V1C ) 581#if defined( ENC_ROUND ) 582#undef ENC_ROUND 583#endif 584#define ENC_ROUND FOUR_TABLES 585#if defined( LAST_ENC_ROUND ) 586#undef LAST_ENC_ROUND 587#endif 588#define LAST_ENC_ROUND FOUR_TABLES 589#if defined( DEC_ROUND ) 590#undef DEC_ROUND 591#endif 592#define DEC_ROUND FOUR_TABLES 593#if defined( LAST_DEC_ROUND ) 594#undef LAST_DEC_ROUND 595#endif 596#define LAST_DEC_ROUND FOUR_TABLES 597#if defined( KEY_SCHED ) 598#undef KEY_SCHED 599#define KEY_SCHED FOUR_TABLES 600#endif 601#endif 602 603#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) 604#if ENC_ROUND == ONE_TABLE 605#define FT1_SET 606#elif ENC_ROUND == FOUR_TABLES 607#define FT4_SET 608#else 609#define SBX_SET 610#endif 611#if LAST_ENC_ROUND == ONE_TABLE 612#define FL1_SET 613#elif LAST_ENC_ROUND == FOUR_TABLES 614#define FL4_SET 615#elif !defined( SBX_SET ) 616#define SBX_SET 617#endif 618#endif 619 620#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) 621#if DEC_ROUND == ONE_TABLE 622#define IT1_SET 623#elif DEC_ROUND == FOUR_TABLES 624#define IT4_SET 625#else 626#define ISB_SET 627#endif 628#if LAST_DEC_ROUND == ONE_TABLE 629#define IL1_SET 630#elif LAST_DEC_ROUND == FOUR_TABLES 631#define IL4_SET 632#elif !defined(ISB_SET) 633#define ISB_SET 634#endif 635#endif 636 637#if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C) 638#if KEY_SCHED == ONE_TABLE 639#define LS1_SET 640#elif KEY_SCHED == FOUR_TABLES 641#define LS4_SET 642#elif !defined( SBX_SET ) 643#define SBX_SET 644#endif 645#endif 646 647#if (FUNCS_IN_C & DEC_KEYING_IN_C) 648#if KEY_SCHED == ONE_TABLE 649#define IM1_SET 650#elif KEY_SCHED == FOUR_TABLES 651#define IM4_SET 652#elif !defined( SBX_SET ) 653#define SBX_SET 654#endif 655#endif 656 657/* generic definitions of Rijndael macros that use tables */ 658 659#define no_table(x,box,vf,rf,c) bytes2word( \ 660 box[bval(vf(x,0,c),rf(0,c))], \ 661 box[bval(vf(x,1,c),rf(1,c))], \ 662 box[bval(vf(x,2,c),rf(2,c))], \ 663 box[bval(vf(x,3,c),rf(3,c))]) 664 665#define one_table(x,op,tab,vf,rf,c) \ 666 ( tab[bval(vf(x,0,c),rf(0,c))] \ 667 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ 668 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ 669 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) 670 671#define four_tables(x,tab,vf,rf,c) \ 672 ( tab[0][bval(vf(x,0,c),rf(0,c))] \ 673 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ 674 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ 675 ^ tab[3][bval(vf(x,3,c),rf(3,c))]) 676 677#define vf1(x,r,c) (x) 678#define rf1(r,c) (r) 679#define rf2(r,c) ((8+r-c)&3) 680 681/* perform forward and inverse column mix operation on four bytes in long word x in */ 682/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ 683 684#if defined( FM4_SET ) /* not currently used */ 685#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) 686#elif defined( FM1_SET ) /* not currently used */ 687#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) 688#else 689#define dec_fmvars uint_32t g2 690#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) 691#endif 692 693#if defined( IM4_SET ) 694#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) 695#elif defined( IM1_SET ) 696#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) 697#else 698#define dec_imvars uint_32t g2, g4, g9 699#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ 700 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) 701#endif 702 703#if defined( FL4_SET ) 704#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) 705#elif defined( LS4_SET ) 706#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) 707#elif defined( FL1_SET ) 708#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) 709#elif defined( LS1_SET ) 710#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) 711#else 712#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) 713#endif 714 715#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) 716#define ISB_SET 717#endif 718 719#endif 720