1/*- 2 * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3 * Written by Michal Ludvig <michal@logix.cz> 4 * http://www.logix.cz/michal 5 * 6 * Big thanks to Andy Polyakov for a help with optimization, 7 * assembler fixes, port to MS Windows and a lot of other 8 * valuable work on this engine! 9 */ 10 11/* ==================================================================== 12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in 23 * the documentation and/or other materials provided with the 24 * distribution. 25 * 26 * 3. All advertising materials mentioning features or use of this 27 * software must display the following acknowledgment: 28 * "This product includes software developed by the OpenSSL Project 29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30 * 31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32 * endorse or promote products derived from this software without 33 * prior written permission. For written permission, please contact 34 * licensing@OpenSSL.org. 35 * 36 * 5. Products derived from this software may not be called "OpenSSL" 37 * nor may "OpenSSL" appear in their names without prior written 38 * permission of the OpenSSL Project. 39 * 40 * 6. Redistributions of any form whatsoever must retain the following 41 * acknowledgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56 * OF THE POSSIBILITY OF SUCH DAMAGE. 57 * ==================================================================== 58 * 59 * This product includes cryptographic software written by Eric Young 60 * (eay@cryptsoft.com). This product includes software written by Tim 61 * Hudson (tjh@cryptsoft.com). 62 * 63 */ 64 65#include <stdio.h> 66#include <string.h> 67 68#include <openssl/opensslconf.h> 69#include <openssl/crypto.h> 70#include <openssl/dso.h> 71#include <openssl/engine.h> 72#include <openssl/evp.h> 73#ifndef OPENSSL_NO_AES 74# include <openssl/aes.h> 75#endif 76#include <openssl/rand.h> 77#include <openssl/err.h> 78 79#ifndef OPENSSL_NO_HW 80# ifndef OPENSSL_NO_HW_PADLOCK 81 82/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 83# if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 84# ifndef OPENSSL_NO_DYNAMIC_ENGINE 85# define DYNAMIC_ENGINE 86# endif 87# elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 88# ifdef ENGINE_DYNAMIC_SUPPORT 89# define DYNAMIC_ENGINE 90# endif 91# else 92# error "Only OpenSSL >= 0.9.7 is supported" 93# endif 94 95/* 96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it 97 * doesn't exist elsewhere, but it even can't be compiled on other platforms! 98 * 99 * In addition, because of the heavy use of inline assembler, compiler choice 100 * is limited to GCC and Microsoft C. 101 */ 102# undef COMPILE_HW_PADLOCK 103# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105 (defined(_MSC_VER) && defined(_M_IX86)) 106# define COMPILE_HW_PADLOCK 107static ENGINE *ENGINE_padlock(void); 108# endif 109# endif 110 111void ENGINE_load_padlock(void) 112{ 113/* On non-x86 CPUs it just returns. */ 114# ifdef COMPILE_HW_PADLOCK 115 ENGINE *toadd = ENGINE_padlock(); 116 if (!toadd) 117 return; 118 ENGINE_add(toadd); 119 ENGINE_free(toadd); 120 ERR_clear_error(); 121# endif 122} 123 124# ifdef COMPILE_HW_PADLOCK 125/* 126 * We do these includes here to avoid header problems on platforms that do 127 * not have the VIA padlock anyway... 128 */ 129# ifdef _MSC_VER 130# include <malloc.h> 131# define alloca _alloca 132# elif defined(NETWARE_CLIB) && defined(__GNUC__) 133void *alloca(size_t); 134# define alloca(s) __builtin_alloca(s) 135# else 136# include <stdlib.h> 137# endif 138 139/* Function for ENGINE detection and control */ 140static int padlock_available(void); 141static int padlock_init(ENGINE *e); 142 143/* RNG Stuff */ 144static RAND_METHOD padlock_rand; 145 146/* Cipher Stuff */ 147# ifndef OPENSSL_NO_AES 148static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, 149 const int **nids, int nid); 150# endif 151 152/* Engine names */ 153static const char *padlock_id = "padlock"; 154static char padlock_name[100]; 155 156/* Available features */ 157static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 158static int padlock_use_rng = 0; /* Random Number Generator */ 159# ifndef OPENSSL_NO_AES 160static int padlock_aes_align_required = 1; 161# endif 162 163/* ===== Engine "management" functions ===== */ 164 165/* Prepare the ENGINE structure for registration */ 166static int padlock_bind_helper(ENGINE *e) 167{ 168 /* Check available features */ 169 padlock_available(); 170 171# if 1 /* disable RNG for now, see commentary in 172 * vicinity of RNG code */ 173 padlock_use_rng = 0; 174# endif 175 176 /* Generate a nice engine name with available features */ 177 BIO_snprintf(padlock_name, sizeof(padlock_name), 178 "VIA PadLock (%s, %s)", 179 padlock_use_rng ? "RNG" : "no-RNG", 180 padlock_use_ace ? "ACE" : "no-ACE"); 181 182 /* Register everything or return with an error */ 183 if (!ENGINE_set_id(e, padlock_id) || 184 !ENGINE_set_name(e, padlock_name) || 185 !ENGINE_set_init_function(e, padlock_init) || 186# ifndef OPENSSL_NO_AES 187 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || 188# endif 189 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { 190 return 0; 191 } 192 193 /* Everything looks good */ 194 return 1; 195} 196 197/* Constructor */ 198static ENGINE *ENGINE_padlock(void) 199{ 200 ENGINE *eng = ENGINE_new(); 201 202 if (!eng) { 203 return NULL; 204 } 205 206 if (!padlock_bind_helper(eng)) { 207 ENGINE_free(eng); 208 return NULL; 209 } 210 211 return eng; 212} 213 214/* Check availability of the engine */ 215static int padlock_init(ENGINE *e) 216{ 217 return (padlock_use_rng || padlock_use_ace); 218} 219 220/* 221 * This stuff is needed if this ENGINE is being compiled into a 222 * self-contained shared-library. 223 */ 224# ifdef DYNAMIC_ENGINE 225static int padlock_bind_fn(ENGINE *e, const char *id) 226{ 227 if (id && (strcmp(id, padlock_id) != 0)) { 228 return 0; 229 } 230 231 if (!padlock_bind_helper(e)) { 232 return 0; 233 } 234 235 return 1; 236} 237 238IMPLEMENT_DYNAMIC_CHECK_FN() 239 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) 240# endif /* DYNAMIC_ENGINE */ 241/* ===== Here comes the "real" engine ===== */ 242# ifndef OPENSSL_NO_AES 243/* Some AES-related constants */ 244# define AES_BLOCK_SIZE 16 245# define AES_KEY_SIZE_128 16 246# define AES_KEY_SIZE_192 24 247# define AES_KEY_SIZE_256 32 248 /* 249 * Here we store the status information relevant to the current context. 250 */ 251 /* 252 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on 253 * the order of items in this structure. Don't blindly modify, reorder, 254 * etc! 255 */ 256struct padlock_cipher_data { 257 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 258 union { 259 unsigned int pad[4]; 260 struct { 261 int rounds:4; 262 int dgst:1; /* n/a in C3 */ 263 int align:1; /* n/a in C3 */ 264 int ciphr:1; /* n/a in C3 */ 265 unsigned int keygen:1; 266 int interm:1; 267 unsigned int encdec:1; 268 int ksize:2; 269 } b; 270 } cword; /* Control word */ 271 AES_KEY ks; /* Encryption key */ 272}; 273 274/* 275 * Essentially this variable belongs in thread local storage. 276 * Having this variable global on the other hand can only cause 277 * few bogus key reloads [if any at all on single-CPU system], 278 * so we accept the penatly... 279 */ 280static volatile struct padlock_cipher_data *padlock_saved_context; 281# endif 282 283/*- 284 * ======================================================= 285 * Inline assembler section(s). 286 * ======================================================= 287 * Order of arguments is chosen to facilitate Windows port 288 * using __fastcall calling convention. If you wish to add 289 * more routines, keep in mind that first __fastcall 290 * argument is passed in %ecx and second - in %edx. 291 * ======================================================= 292 */ 293# if defined(__GNUC__) && __GNUC__>=2 294/* 295 * As for excessive "push %ebx"/"pop %ebx" found all over. 296 * When generating position-independent code GCC won't let 297 * us use "b" in assembler templates nor even respect "ebx" 298 * in "clobber description." Therefore the trouble... 299 */ 300 301/* 302 * Helper function - check if a CPUID instruction is available on this CPU 303 */ 304static int padlock_insn_cpuid_available(void) 305{ 306 int result = -1; 307 308 /* 309 * We're checking if the bit #21 of EFLAGS can be toggled. If yes = 310 * CPUID is available. 311 */ 312 asm volatile ("pushf\n" 313 "popl %%eax\n" 314 "xorl $0x200000, %%eax\n" 315 "movl %%eax, %%ecx\n" 316 "andl $0x200000, %%ecx\n" 317 "pushl %%eax\n" 318 "popf\n" 319 "pushf\n" 320 "popl %%eax\n" 321 "andl $0x200000, %%eax\n" 322 "xorl %%eax, %%ecx\n" 323 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); 324 325 return (result == 0); 326} 327 328/* 329 * Load supported features of the CPU to see if the PadLock is available. 330 */ 331static int padlock_available(void) 332{ 333 char vendor_string[16]; 334 unsigned int eax, edx; 335 336 /* First check if the CPUID instruction is available at all... */ 337 if (!padlock_insn_cpuid_available()) 338 return 0; 339 340 /* Are we running on the Centaur (VIA) CPU? */ 341 eax = 0x00000000; 342 vendor_string[12] = 0; 343 asm volatile ("pushl %%ebx\n" 344 "cpuid\n" 345 "movl %%ebx,(%%edi)\n" 346 "movl %%edx,4(%%edi)\n" 347 "movl %%ecx,8(%%edi)\n" 348 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); 349 if (strcmp(vendor_string, "CentaurHauls") != 0) 350 return 0; 351 352 /* Check for Centaur Extended Feature Flags presence */ 353 eax = 0xC0000000; 354 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); 355 if (eax < 0xC0000001) 356 return 0; 357 358 /* Read the Centaur Extended Feature Flags */ 359 eax = 0xC0000001; 360 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), 361 "=d"(edx)::"ecx"); 362 363 /* Fill up some flags */ 364 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); 365 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); 366 367 return padlock_use_ace + padlock_use_rng; 368} 369 370# ifndef OPENSSL_NO_AES 371/* Our own htonl()/ntohl() */ 372static inline void padlock_bswapl(AES_KEY *ks) 373{ 374 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); 375 unsigned int *key = ks->rd_key; 376 377 while (i--) { 378 asm volatile ("bswapl %0":"+r" (*key)); 379 key++; 380 } 381} 382# endif 383 384/* 385 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the 386 * stack clears EFLAGS[30] which does the trick. 387 */ 388static inline void padlock_reload_key(void) 389{ 390 asm volatile ("pushfl; popfl"); 391} 392 393# ifndef OPENSSL_NO_AES 394/* 395 * This is heuristic key context tracing. At first one 396 * believes that one should use atomic swap instructions, 397 * but it's not actually necessary. Point is that if 398 * padlock_saved_context was changed by another thread 399 * after we've read it and before we compare it with cdata, 400 * our key *shall* be reloaded upon thread context switch 401 * and we are therefore set in either case... 402 */ 403static inline void padlock_verify_context(struct padlock_cipher_data *cdata) 404{ 405 asm volatile ("pushfl\n" 406 " btl $30,(%%esp)\n" 407 " jnc 1f\n" 408 " cmpl %2,%1\n" 409 " je 1f\n" 410 " popfl\n" 411 " subl $4,%%esp\n" 412 "1: addl $4,%%esp\n" 413 " movl %2,%0":"+m" (padlock_saved_context) 414 :"r"(padlock_saved_context), "r"(cdata):"cc"); 415} 416 417/* Template for padlock_xcrypt_* modes */ 418/* 419 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items 420 * of the 'padlock_cipher_data' structure. 421 */ 422# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 423static inline void *name(size_t cnt, \ 424 struct padlock_cipher_data *cdata, \ 425 void *out, const void *inp) \ 426{ void *iv; \ 427 asm volatile ( "pushl %%ebx\n" \ 428 " leal 16(%0),%%edx\n" \ 429 " leal 32(%0),%%ebx\n" \ 430 rep_xcrypt "\n" \ 431 " popl %%ebx" \ 432 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 433 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 434 : "edx", "cc", "memory"); \ 435 return iv; \ 436} 437 438/* Generate all functions with appropriate opcodes */ 439/* rep xcryptecb */ 440PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") 441/* rep xcryptcbc */ 442 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") 443/* rep xcryptcfb */ 444 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") 445/* rep xcryptofb */ 446 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") 447# endif 448/* The RNG call itself */ 449static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) 450{ 451 unsigned int eax_out; 452 453 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 454 :"=a" (eax_out), "=m"(*(unsigned *)addr) 455 :"D"(addr), "d"(edx_in) 456 ); 457 458 return eax_out; 459} 460 461/* 462 * Why not inline 'rep movsd'? I failed to find information on what value in 463 * Direction Flag one can expect and consequently have to apply 464 * "better-safe-than-sorry" approach and assume "undefined." I could 465 * explicitly clear it and restore the original value upon return from 466 * padlock_aes_cipher, but it's presumably too much trouble for too little 467 * gain... In case you wonder 'rep xcrypt*' instructions above are *not* 468 * affected by the Direction Flag and pointers advance toward larger 469 * addresses unconditionally. 470 */ 471static inline unsigned char *padlock_memcpy(void *dst, const void *src, 472 size_t n) 473{ 474 long *d = dst; 475 const long *s = src; 476 477 n /= sizeof(*d); 478 do { 479 *d++ = *s++; 480 } while (--n); 481 482 return dst; 483} 484 485# elif defined(_MSC_VER) 486/* 487 * Unlike GCC these are real functions. In order to minimize impact 488 * on performance we adhere to __fastcall calling convention in 489 * order to get two first arguments passed through %ecx and %edx. 490 * Which kind of suits very well, as instructions in question use 491 * both %ecx and %edx as input:-) 492 */ 493# define REP_XCRYPT(code) \ 494 _asm _emit 0xf3 \ 495 _asm _emit 0x0f _asm _emit 0xa7 \ 496 _asm _emit code 497 498/* 499 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items 500 * of the 'padlock_cipher_data' structure. 501 */ 502# define PADLOCK_XCRYPT_ASM(name,code) \ 503static void * __fastcall \ 504 name (size_t cnt, void *cdata, \ 505 void *outp, const void *inp) \ 506{ _asm mov eax,edx \ 507 _asm lea edx,[eax+16] \ 508 _asm lea ebx,[eax+32] \ 509 _asm mov edi,outp \ 510 _asm mov esi,inp \ 511 REP_XCRYPT(code) \ 512} 513 514PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 515PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 516PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 517PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 518 519static int __fastcall padlock_xstore(void *outp, unsigned int code) 520{ 521 _asm mov edi,ecx 522 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 523} 524 525static void __fastcall padlock_reload_key(void) 526{ 527 _asm pushfd 528 _asm popfd 529} 530 531static void __fastcall padlock_verify_context(void *cdata) 532{ 533 _asm { 534 pushfd 535 bt DWORD PTR[esp],30 536 jnc skip 537 cmp ecx,padlock_saved_context 538 je skip 539 popfd 540 sub esp,4 541 skip: add esp,4 542 mov padlock_saved_context,ecx 543 } 544} 545 546static int 547padlock_available(void) 548{ 549 _asm { 550 pushfd 551 pop eax 552 mov ecx,eax 553 xor eax,1<<21 554 push eax 555 popfd 556 pushfd 557 pop eax 558 xor eax,ecx 559 bt eax,21 560 jnc noluck 561 mov eax,0 562 cpuid 563 xor eax,eax 564 cmp ebx,'tneC' 565 jne noluck 566 cmp edx,'Hrua' 567 jne noluck 568 cmp ecx,'slua' 569 jne noluck 570 mov eax,0xC0000000 571 cpuid 572 mov edx,eax 573 xor eax,eax 574 cmp edx,0xC0000001 575 jb noluck 576 mov eax,0xC0000001 577 cpuid 578 xor eax,eax 579 bt edx,6 580 jnc skip_a 581 bt edx,7 582 jnc skip_a 583 mov padlock_use_ace,1 584 inc eax 585 skip_a: bt edx,2 586 jnc skip_r 587 bt edx,3 588 jnc skip_r 589 mov padlock_use_rng,1 590 inc eax 591 skip_r: 592 noluck: 593 } 594} 595 596static void __fastcall padlock_bswapl(void *key) 597{ 598 _asm { 599 pushfd 600 cld 601 mov esi,ecx 602 mov edi,ecx 603 mov ecx,60 604 up: lodsd 605 bswap eax 606 stosd 607 loop up 608 popfd 609 } 610} 611 612/* 613 * MS actually specifies status of Direction Flag and compiler even manages 614 * to compile following as 'rep movsd' all by itself... 615 */ 616# define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 617# endif 618/* ===== AES encryption/decryption ===== */ 619# ifndef OPENSSL_NO_AES 620# if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 621# define NID_aes_128_cfb NID_aes_128_cfb128 622# endif 623# if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 624# define NID_aes_128_ofb NID_aes_128_ofb128 625# endif 626# if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 627# define NID_aes_192_cfb NID_aes_192_cfb128 628# endif 629# if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 630# define NID_aes_192_ofb NID_aes_192_ofb128 631# endif 632# if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 633# define NID_aes_256_cfb NID_aes_256_cfb128 634# endif 635# if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 636# define NID_aes_256_ofb NID_aes_256_ofb128 637# endif 638/* 639 * List of supported ciphers. 640 */ static int padlock_cipher_nids[] = { 641 NID_aes_128_ecb, 642 NID_aes_128_cbc, 643 NID_aes_128_cfb, 644 NID_aes_128_ofb, 645 646 NID_aes_192_ecb, 647 NID_aes_192_cbc, 648 NID_aes_192_cfb, 649 NID_aes_192_ofb, 650 651 NID_aes_256_ecb, 652 NID_aes_256_cbc, 653 NID_aes_256_cfb, 654 NID_aes_256_ofb, 655}; 656 657static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / 658 sizeof(padlock_cipher_nids[0])); 659 660/* Function prototypes ... */ 661static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 662 const unsigned char *iv, int enc); 663static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 664 const unsigned char *in, size_t nbytes); 665 666# define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 667 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 668# define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 669 NEAREST_ALIGNED(ctx->cipher_data)) 670 671# define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 672# define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 673# define EVP_CIPHER_block_size_OFB 1 674# define EVP_CIPHER_block_size_CFB 1 675 676/* 677 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit 678 * of preprocessor magic :-) 679 */ 680# define DECLARE_AES_EVP(ksize,lmode,umode) \ 681static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 682 NID_aes_##ksize##_##lmode, \ 683 EVP_CIPHER_block_size_##umode, \ 684 AES_KEY_SIZE_##ksize, \ 685 AES_BLOCK_SIZE, \ 686 0 | EVP_CIPH_##umode##_MODE, \ 687 padlock_aes_init_key, \ 688 padlock_aes_cipher, \ 689 NULL, \ 690 sizeof(struct padlock_cipher_data) + 16, \ 691 EVP_CIPHER_set_asn1_iv, \ 692 EVP_CIPHER_get_asn1_iv, \ 693 NULL, \ 694 NULL \ 695} 696 697DECLARE_AES_EVP(128, ecb, ECB); 698DECLARE_AES_EVP(128, cbc, CBC); 699DECLARE_AES_EVP(128, cfb, CFB); 700DECLARE_AES_EVP(128, ofb, OFB); 701 702DECLARE_AES_EVP(192, ecb, ECB); 703DECLARE_AES_EVP(192, cbc, CBC); 704DECLARE_AES_EVP(192, cfb, CFB); 705DECLARE_AES_EVP(192, ofb, OFB); 706 707DECLARE_AES_EVP(256, ecb, ECB); 708DECLARE_AES_EVP(256, cbc, CBC); 709DECLARE_AES_EVP(256, cfb, CFB); 710DECLARE_AES_EVP(256, ofb, OFB); 711 712static int 713padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, 714 int nid) 715{ 716 /* No specific cipher => return a list of supported nids ... */ 717 if (!cipher) { 718 *nids = padlock_cipher_nids; 719 return padlock_cipher_nids_num; 720 } 721 722 /* ... or the requested "cipher" otherwise */ 723 switch (nid) { 724 case NID_aes_128_ecb: 725 *cipher = &padlock_aes_128_ecb; 726 break; 727 case NID_aes_128_cbc: 728 *cipher = &padlock_aes_128_cbc; 729 break; 730 case NID_aes_128_cfb: 731 *cipher = &padlock_aes_128_cfb; 732 break; 733 case NID_aes_128_ofb: 734 *cipher = &padlock_aes_128_ofb; 735 break; 736 737 case NID_aes_192_ecb: 738 *cipher = &padlock_aes_192_ecb; 739 break; 740 case NID_aes_192_cbc: 741 *cipher = &padlock_aes_192_cbc; 742 break; 743 case NID_aes_192_cfb: 744 *cipher = &padlock_aes_192_cfb; 745 break; 746 case NID_aes_192_ofb: 747 *cipher = &padlock_aes_192_ofb; 748 break; 749 750 case NID_aes_256_ecb: 751 *cipher = &padlock_aes_256_ecb; 752 break; 753 case NID_aes_256_cbc: 754 *cipher = &padlock_aes_256_cbc; 755 break; 756 case NID_aes_256_cfb: 757 *cipher = &padlock_aes_256_cfb; 758 break; 759 case NID_aes_256_ofb: 760 *cipher = &padlock_aes_256_ofb; 761 break; 762 763 default: 764 /* Sorry, we don't support this NID */ 765 *cipher = NULL; 766 return 0; 767 } 768 769 return 1; 770} 771 772/* Prepare the encryption key for PadLock usage */ 773static int 774padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 775 const unsigned char *iv, int enc) 776{ 777 struct padlock_cipher_data *cdata; 778 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 779 780 if (key == NULL) 781 return 0; /* ERROR */ 782 783 cdata = ALIGNED_CIPHER_DATA(ctx); 784 memset(cdata, 0, sizeof(struct padlock_cipher_data)); 785 786 /* Prepare Control word. */ 787 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 788 cdata->cword.b.encdec = 0; 789 else 790 cdata->cword.b.encdec = (ctx->encrypt == 0); 791 cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 792 cdata->cword.b.ksize = (key_len - 128) / 64; 793 794 switch (key_len) { 795 case 128: 796 /* 797 * PadLock can generate an extended key for AES128 in hardware 798 */ 799 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 800 cdata->cword.b.keygen = 0; 801 break; 802 803 case 192: 804 case 256: 805 /* 806 * Generate an extended AES key in software. Needed for AES192/AES256 807 */ 808 /* 809 * Well, the above applies to Stepping 8 CPUs and is listed as 810 * hardware errata. They most likely will fix it at some point and 811 * then a check for stepping would be due here. 812 */ 813 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 814 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) 815 AES_set_encrypt_key(key, key_len, &cdata->ks); 816 else 817 AES_set_decrypt_key(key, key_len, &cdata->ks); 818# ifndef AES_ASM 819 /* 820 * OpenSSL C functions use byte-swapped extended key. 821 */ 822 padlock_bswapl(&cdata->ks); 823# endif 824 cdata->cword.b.keygen = 1; 825 break; 826 827 default: 828 /* ERROR */ 829 return 0; 830 } 831 832 /* 833 * This is done to cover for cases when user reuses the 834 * context for new key. The catch is that if we don't do 835 * this, padlock_eas_cipher might proceed with old key... 836 */ 837 padlock_reload_key(); 838 839 return 1; 840} 841 842/*- 843 * Simplified version of padlock_aes_cipher() used when 844 * 1) both input and output buffers are at aligned addresses. 845 * or when 846 * 2) running on a newer CPU that doesn't require aligned buffers. 847 */ 848static int 849padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 850 const unsigned char *in_arg, size_t nbytes) 851{ 852 struct padlock_cipher_data *cdata; 853 void *iv; 854 855 cdata = ALIGNED_CIPHER_DATA(ctx); 856 padlock_verify_context(cdata); 857 858 switch (EVP_CIPHER_CTX_mode(ctx)) { 859 case EVP_CIPH_ECB_MODE: 860 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 861 break; 862 863 case EVP_CIPH_CBC_MODE: 864 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 865 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 866 in_arg); 867 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 868 break; 869 870 case EVP_CIPH_CFB_MODE: 871 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 872 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 873 in_arg); 874 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 875 break; 876 877 case EVP_CIPH_OFB_MODE: 878 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 879 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 880 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 881 break; 882 883 default: 884 return 0; 885 } 886 887 memset(cdata->iv, 0, AES_BLOCK_SIZE); 888 889 return 1; 890} 891 892# ifndef PADLOCK_CHUNK 893# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 894# endif 895# if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 896# error "insane PADLOCK_CHUNK..." 897# endif 898 899/* 900 * Re-align the arguments to 16-Bytes boundaries and run the encryption 901 * function itself. This function is not AES-specific. 902 */ 903static int 904padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 905 const unsigned char *in_arg, size_t nbytes) 906{ 907 struct padlock_cipher_data *cdata; 908 const void *inp; 909 unsigned char *out; 910 void *iv; 911 int inp_misaligned, out_misaligned, realign_in_loop; 912 size_t chunk, allocated = 0; 913 914 /* 915 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... 916 */ 917 if ((chunk = ctx->num)) { /* borrow chunk variable */ 918 unsigned char *ivp = ctx->iv; 919 920 switch (EVP_CIPHER_CTX_mode(ctx)) { 921 case EVP_CIPH_CFB_MODE: 922 if (chunk >= AES_BLOCK_SIZE) 923 return 0; /* bogus value */ 924 925 if (ctx->encrypt) 926 while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 927 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 928 chunk++, nbytes--; 929 } else 930 while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 931 unsigned char c = *(in_arg++); 932 *(out_arg++) = c ^ ivp[chunk]; 933 ivp[chunk++] = c, nbytes--; 934 } 935 936 ctx->num = chunk % AES_BLOCK_SIZE; 937 break; 938 case EVP_CIPH_OFB_MODE: 939 if (chunk >= AES_BLOCK_SIZE) 940 return 0; /* bogus value */ 941 942 while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 943 *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 944 chunk++, nbytes--; 945 } 946 947 ctx->num = chunk % AES_BLOCK_SIZE; 948 break; 949 } 950 } 951 952 if (nbytes == 0) 953 return 1; 954# if 0 955 if (nbytes % AES_BLOCK_SIZE) 956 return 0; /* are we expected to do tail processing? */ 957# else 958 /* 959 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and 960 * arbitrary value in byte-oriented modes, such as CFB and OFB... 961 */ 962# endif 963 964 /* 965 * VIA promises CPUs that won't require alignment in the future. For now 966 * padlock_aes_align_required is initialized to 1 and the condition is 967 * never met... 968 */ 969 /* 970 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but 971 * performance penalties appear to be approximately same as for software 972 * alignment below or ~3x. They promise to improve it in the future, but 973 * for now we can just as well pretend that it can only handle aligned 974 * input... 975 */ 976 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) 977 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 978 979 inp_misaligned = (((size_t)in_arg) & 0x0F); 980 out_misaligned = (((size_t)out_arg) & 0x0F); 981 982 /* 983 * Note that even if output is aligned and input not, I still prefer to 984 * loop instead of copy the whole input and then encrypt in one stroke. 985 * This is done in order to improve L1 cache utilization... 986 */ 987 realign_in_loop = out_misaligned | inp_misaligned; 988 989 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) 990 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 991 992 /* this takes one "if" out of the loops */ 993 chunk = nbytes; 994 chunk %= PADLOCK_CHUNK; 995 if (chunk == 0) 996 chunk = PADLOCK_CHUNK; 997 998 if (out_misaligned) { 999 /* optmize for small input */ 1000 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); 1001 out = alloca(0x10 + allocated); 1002 out = NEAREST_ALIGNED(out); 1003 } else 1004 out = out_arg; 1005 1006 cdata = ALIGNED_CIPHER_DATA(ctx); 1007 padlock_verify_context(cdata); 1008 1009 switch (EVP_CIPHER_CTX_mode(ctx)) { 1010 case EVP_CIPH_ECB_MODE: 1011 do { 1012 if (inp_misaligned) 1013 inp = padlock_memcpy(out, in_arg, chunk); 1014 else 1015 inp = in_arg; 1016 in_arg += chunk; 1017 1018 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1019 1020 if (out_misaligned) 1021 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1022 else 1023 out = out_arg += chunk; 1024 1025 nbytes -= chunk; 1026 chunk = PADLOCK_CHUNK; 1027 } while (nbytes); 1028 break; 1029 1030 case EVP_CIPH_CBC_MODE: 1031 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1032 goto cbc_shortcut; 1033 do { 1034 if (iv != cdata->iv) 1035 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1036 chunk = PADLOCK_CHUNK; 1037 cbc_shortcut: /* optimize for small input */ 1038 if (inp_misaligned) 1039 inp = padlock_memcpy(out, in_arg, chunk); 1040 else 1041 inp = in_arg; 1042 in_arg += chunk; 1043 1044 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1045 1046 if (out_misaligned) 1047 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1048 else 1049 out = out_arg += chunk; 1050 1051 } while (nbytes -= chunk); 1052 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1053 break; 1054 1055 case EVP_CIPH_CFB_MODE: 1056 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1057 chunk &= ~(AES_BLOCK_SIZE - 1); 1058 if (chunk) 1059 goto cfb_shortcut; 1060 else 1061 goto cfb_skiploop; 1062 do { 1063 if (iv != cdata->iv) 1064 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1065 chunk = PADLOCK_CHUNK; 1066 cfb_shortcut: /* optimize for small input */ 1067 if (inp_misaligned) 1068 inp = padlock_memcpy(out, in_arg, chunk); 1069 else 1070 inp = in_arg; 1071 in_arg += chunk; 1072 1073 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1074 1075 if (out_misaligned) 1076 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1077 else 1078 out = out_arg += chunk; 1079 1080 nbytes -= chunk; 1081 } while (nbytes >= AES_BLOCK_SIZE); 1082 1083 cfb_skiploop: 1084 if (nbytes) { 1085 unsigned char *ivp = cdata->iv; 1086 1087 if (iv != ivp) { 1088 memcpy(ivp, iv, AES_BLOCK_SIZE); 1089 iv = ivp; 1090 } 1091 ctx->num = nbytes; 1092 if (cdata->cword.b.encdec) { 1093 cdata->cword.b.encdec = 0; 1094 padlock_reload_key(); 1095 padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1096 cdata->cword.b.encdec = 1; 1097 padlock_reload_key(); 1098 while (nbytes) { 1099 unsigned char c = *(in_arg++); 1100 *(out_arg++) = c ^ *ivp; 1101 *(ivp++) = c, nbytes--; 1102 } 1103 } else { 1104 padlock_reload_key(); 1105 padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1106 padlock_reload_key(); 1107 while (nbytes) { 1108 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1109 ivp++, nbytes--; 1110 } 1111 } 1112 } 1113 1114 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1115 break; 1116 1117 case EVP_CIPH_OFB_MODE: 1118 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1119 chunk &= ~(AES_BLOCK_SIZE - 1); 1120 if (chunk) 1121 do { 1122 if (inp_misaligned) 1123 inp = padlock_memcpy(out, in_arg, chunk); 1124 else 1125 inp = in_arg; 1126 in_arg += chunk; 1127 1128 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1129 1130 if (out_misaligned) 1131 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1132 else 1133 out = out_arg += chunk; 1134 1135 nbytes -= chunk; 1136 chunk = PADLOCK_CHUNK; 1137 } while (nbytes >= AES_BLOCK_SIZE); 1138 1139 if (nbytes) { 1140 unsigned char *ivp = cdata->iv; 1141 1142 ctx->num = nbytes; 1143 padlock_reload_key(); /* empirically found */ 1144 padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1145 padlock_reload_key(); /* empirically found */ 1146 while (nbytes) { 1147 *(out_arg++) = *(in_arg++) ^ *ivp; 1148 ivp++, nbytes--; 1149 } 1150 } 1151 1152 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1153 break; 1154 1155 default: 1156 return 0; 1157 } 1158 1159 /* Clean the realign buffer if it was used */ 1160 if (out_misaligned) { 1161 volatile unsigned long *p = (void *)out; 1162 size_t n = allocated / sizeof(*p); 1163 while (n--) 1164 *p++ = 0; 1165 } 1166 1167 memset(cdata->iv, 0, AES_BLOCK_SIZE); 1168 1169 return 1; 1170} 1171 1172# endif /* OPENSSL_NO_AES */ 1173 1174/* ===== Random Number Generator ===== */ 1175/* 1176 * This code is not engaged. The reason is that it does not comply 1177 * with recommendations for VIA RNG usage for secure applications 1178 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1179 * provide meaningful error control... 1180 */ 1181/* 1182 * Wrapper that provides an interface between the API and the raw PadLock 1183 * RNG 1184 */ 1185static int padlock_rand_bytes(unsigned char *output, int count) 1186{ 1187 unsigned int eax, buf; 1188 1189 while (count >= 8) { 1190 eax = padlock_xstore(output, 0); 1191 if (!(eax & (1 << 6))) 1192 return 0; /* RNG disabled */ 1193 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1194 if (eax & (0x1F << 10)) 1195 return 0; 1196 if ((eax & 0x1F) == 0) 1197 continue; /* no data, retry... */ 1198 if ((eax & 0x1F) != 8) 1199 return 0; /* fatal failure... */ 1200 output += 8; 1201 count -= 8; 1202 } 1203 while (count > 0) { 1204 eax = padlock_xstore(&buf, 3); 1205 if (!(eax & (1 << 6))) 1206 return 0; /* RNG disabled */ 1207 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1208 if (eax & (0x1F << 10)) 1209 return 0; 1210 if ((eax & 0x1F) == 0) 1211 continue; /* no data, retry... */ 1212 if ((eax & 0x1F) != 1) 1213 return 0; /* fatal failure... */ 1214 *output++ = (unsigned char)buf; 1215 count--; 1216 } 1217 *(volatile unsigned int *)&buf = 0; 1218 1219 return 1; 1220} 1221 1222/* Dummy but necessary function */ 1223static int padlock_rand_status(void) 1224{ 1225 return 1; 1226} 1227 1228/* Prepare structure for registration */ 1229static RAND_METHOD padlock_rand = { 1230 NULL, /* seed */ 1231 padlock_rand_bytes, /* bytes */ 1232 NULL, /* cleanup */ 1233 NULL, /* add */ 1234 padlock_rand_bytes, /* pseudorand */ 1235 padlock_rand_status, /* rand status */ 1236}; 1237 1238# endif /* COMPILE_HW_PADLOCK */ 1239 1240# endif /* !OPENSSL_NO_HW_PADLOCK */ 1241#endif /* !OPENSSL_NO_HW */ 1242