1/* 2 * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3 * Written by Michal Ludvig <michal@logix.cz> 4 * http://www.logix.cz/michal 5 * 6 * Big thanks to Andy Polyakov for a help with optimization, 7 * assembler fixes, port to MS Windows and a lot of other 8 * valuable work on this engine! 9 */ 10 11/* ==================================================================== 12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in 23 * the documentation and/or other materials provided with the 24 * distribution. 25 * 26 * 3. All advertising materials mentioning features or use of this 27 * software must display the following acknowledgment: 28 * "This product includes software developed by the OpenSSL Project 29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30 * 31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32 * endorse or promote products derived from this software without 33 * prior written permission. For written permission, please contact 34 * licensing@OpenSSL.org. 35 * 36 * 5. Products derived from this software may not be called "OpenSSL" 37 * nor may "OpenSSL" appear in their names without prior written 38 * permission of the OpenSSL Project. 39 * 40 * 6. Redistributions of any form whatsoever must retain the following 41 * acknowledgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56 * OF THE POSSIBILITY OF SUCH DAMAGE. 57 * ==================================================================== 58 * 59 * This product includes cryptographic software written by Eric Young 60 * (eay@cryptsoft.com). This product includes software written by Tim 61 * Hudson (tjh@cryptsoft.com). 62 * 63 */ 64 65 66#include <stdio.h> 67#include <string.h> 68 69#include <openssl/opensslconf.h> 70#include <openssl/crypto.h> 71#include <openssl/dso.h> 72#include <openssl/engine.h> 73#include <openssl/evp.h> 74#ifndef OPENSSL_NO_AES 75#include <openssl/aes.h> 76#endif 77#include <openssl/rand.h> 78#include <openssl/err.h> 79 80#ifndef OPENSSL_NO_HW 81#ifndef OPENSSL_NO_HW_PADLOCK 82 83/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 84#if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 85# ifndef OPENSSL_NO_DYNAMIC_ENGINE 86# define DYNAMIC_ENGINE 87# endif 88#elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 89# ifdef ENGINE_DYNAMIC_SUPPORT 90# define DYNAMIC_ENGINE 91# endif 92#else 93# error "Only OpenSSL >= 0.9.7 is supported" 94#endif 95 96/* VIA PadLock AES is available *ONLY* on some x86 CPUs. 97 Not only that it doesn't exist elsewhere, but it 98 even can't be compiled on other platforms! 99 100 In addition, because of the heavy use of inline assembler, 101 compiler choice is limited to GCC and Microsoft C. */ 102#undef COMPILE_HW_PADLOCK 103#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105 (defined(_MSC_VER) && defined(_M_IX86)) 106# define COMPILE_HW_PADLOCK 107static ENGINE *ENGINE_padlock (void); 108# endif 109#endif 110 111#ifdef OPENSSL_NO_DYNAMIC_ENGINE 112 113void ENGINE_load_padlock (void) 114{ 115/* On non-x86 CPUs it just returns. */ 116#ifdef COMPILE_HW_PADLOCK 117 ENGINE *toadd = ENGINE_padlock (); 118 if (!toadd) return; 119 ENGINE_add (toadd); 120 ENGINE_free (toadd); 121 ERR_clear_error (); 122#endif 123} 124 125#endif 126 127#ifdef COMPILE_HW_PADLOCK 128/* We do these includes here to avoid header problems on platforms that 129 do not have the VIA padlock anyway... */ 130#include <stdlib.h> 131#ifdef _WIN32 132# include <malloc.h> 133# ifndef alloca 134# define alloca _alloca 135# endif 136#elif defined(__GNUC__) 137# ifndef alloca 138# define alloca(s) __builtin_alloca(s) 139# endif 140#endif 141 142/* Function for ENGINE detection and control */ 143static int padlock_available(void); 144static int padlock_init(ENGINE *e); 145 146/* RNG Stuff */ 147static RAND_METHOD padlock_rand; 148 149/* Cipher Stuff */ 150#ifndef OPENSSL_NO_AES 151static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid); 152#endif 153 154/* Engine names */ 155static const char *padlock_id = "padlock"; 156static char padlock_name[100]; 157 158/* Available features */ 159static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 160static int padlock_use_rng = 0; /* Random Number Generator */ 161#ifndef OPENSSL_NO_AES 162static int padlock_aes_align_required = 1; 163#endif 164 165/* ===== Engine "management" functions ===== */ 166 167/* Prepare the ENGINE structure for registration */ 168static int 169padlock_bind_helper(ENGINE *e) 170{ 171 /* Check available features */ 172 padlock_available(); 173 174#if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ 175 padlock_use_rng=0; 176#endif 177 178 /* Generate a nice engine name with available features */ 179 BIO_snprintf(padlock_name, sizeof(padlock_name), 180 "VIA PadLock (%s, %s)", 181 padlock_use_rng ? "RNG" : "no-RNG", 182 padlock_use_ace ? "ACE" : "no-ACE"); 183 184 /* Register everything or return with an error */ 185 if (!ENGINE_set_id(e, padlock_id) || 186 !ENGINE_set_name(e, padlock_name) || 187 188 !ENGINE_set_init_function(e, padlock_init) || 189#ifndef OPENSSL_NO_AES 190 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || 191#endif 192 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { 193 return 0; 194 } 195 196 /* Everything looks good */ 197 return 1; 198} 199 200/* Constructor */ 201static ENGINE * 202ENGINE_padlock(void) 203{ 204 ENGINE *eng = ENGINE_new(); 205 206 if (!eng) { 207 return NULL; 208 } 209 210 if (!padlock_bind_helper(eng)) { 211 ENGINE_free(eng); 212 return NULL; 213 } 214 215 return eng; 216} 217 218/* Check availability of the engine */ 219static int 220padlock_init(ENGINE *e) 221{ 222 return (padlock_use_rng || padlock_use_ace); 223} 224 225/* This stuff is needed if this ENGINE is being compiled into a self-contained 226 * shared-library. 227 */ 228#ifdef DYNAMIC_ENGINE 229static int 230padlock_bind_fn(ENGINE *e, const char *id) 231{ 232 if (id && (strcmp(id, padlock_id) != 0)) { 233 return 0; 234 } 235 236 if (!padlock_bind_helper(e)) { 237 return 0; 238 } 239 240 return 1; 241} 242 243IMPLEMENT_DYNAMIC_CHECK_FN() 244IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) 245#endif /* DYNAMIC_ENGINE */ 246 247/* ===== Here comes the "real" engine ===== */ 248 249#ifndef OPENSSL_NO_AES 250/* Some AES-related constants */ 251#define AES_BLOCK_SIZE 16 252#define AES_KEY_SIZE_128 16 253#define AES_KEY_SIZE_192 24 254#define AES_KEY_SIZE_256 32 255 256/* Here we store the status information relevant to the 257 current context. */ 258/* BIG FAT WARNING: 259 * Inline assembler in PADLOCK_XCRYPT_ASM() 260 * depends on the order of items in this structure. 261 * Don't blindly modify, reorder, etc! 262 */ 263struct padlock_cipher_data 264{ 265 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 266 union { unsigned int pad[4]; 267 struct { 268 int rounds:4; 269 int dgst:1; /* n/a in C3 */ 270 int align:1; /* n/a in C3 */ 271 int ciphr:1; /* n/a in C3 */ 272 unsigned int keygen:1; 273 int interm:1; 274 unsigned int encdec:1; 275 int ksize:2; 276 } b; 277 } cword; /* Control word */ 278 AES_KEY ks; /* Encryption key */ 279}; 280 281/* 282 * Essentially this variable belongs in thread local storage. 283 * Having this variable global on the other hand can only cause 284 * few bogus key reloads [if any at all on single-CPU system], 285 * so we accept the penatly... 286 */ 287static volatile struct padlock_cipher_data *padlock_saved_context; 288#endif 289 290/* 291 * ======================================================= 292 * Inline assembler section(s). 293 * ======================================================= 294 * Order of arguments is chosen to facilitate Windows port 295 * using __fastcall calling convention. If you wish to add 296 * more routines, keep in mind that first __fastcall 297 * argument is passed in %ecx and second - in %edx. 298 * ======================================================= 299 */ 300#if defined(__GNUC__) && __GNUC__>=2 301/* 302 * As for excessive "push %ebx"/"pop %ebx" found all over. 303 * When generating position-independent code GCC won't let 304 * us use "b" in assembler templates nor even respect "ebx" 305 * in "clobber description." Therefore the trouble... 306 */ 307 308/* Helper function - check if a CPUID instruction 309 is available on this CPU */ 310static int 311padlock_insn_cpuid_available(void) 312{ 313 int result = -1; 314 315 /* We're checking if the bit #21 of EFLAGS 316 can be toggled. If yes = CPUID is available. */ 317 asm volatile ( 318 "pushf\n" 319 "popl %%eax\n" 320 "xorl $0x200000, %%eax\n" 321 "movl %%eax, %%ecx\n" 322 "andl $0x200000, %%ecx\n" 323 "pushl %%eax\n" 324 "popf\n" 325 "pushf\n" 326 "popl %%eax\n" 327 "andl $0x200000, %%eax\n" 328 "xorl %%eax, %%ecx\n" 329 "movl %%ecx, %0\n" 330 : "=r" (result) : : "eax", "ecx"); 331 332 return (result == 0); 333} 334 335/* Load supported features of the CPU to see if 336 the PadLock is available. */ 337static int 338padlock_available(void) 339{ 340 char vendor_string[16]; 341 unsigned int eax, edx; 342 343 /* First check if the CPUID instruction is available at all... */ 344 if (! padlock_insn_cpuid_available()) 345 return 0; 346 347 /* Are we running on the Centaur (VIA) CPU? */ 348 eax = 0x00000000; 349 vendor_string[12] = 0; 350 asm volatile ( 351 "pushl %%ebx\n" 352 "cpuid\n" 353 "movl %%ebx,(%%edi)\n" 354 "movl %%edx,4(%%edi)\n" 355 "movl %%ecx,8(%%edi)\n" 356 "popl %%ebx" 357 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); 358 if (strcmp(vendor_string, "CentaurHauls") != 0) 359 return 0; 360 361 /* Check for Centaur Extended Feature Flags presence */ 362 eax = 0xC0000000; 363 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 364 : "+a"(eax) : : "ecx", "edx"); 365 if (eax < 0xC0000001) 366 return 0; 367 368 /* Read the Centaur Extended Feature Flags */ 369 eax = 0xC0000001; 370 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 371 : "+a"(eax), "=d"(edx) : : "ecx"); 372 373 /* Fill up some flags */ 374 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); 375 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); 376 377 return padlock_use_ace + padlock_use_rng; 378} 379 380#ifndef OPENSSL_NO_AES 381/* Our own htonl()/ntohl() */ 382static inline void 383padlock_bswapl(AES_KEY *ks) 384{ 385 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); 386 unsigned int *key = ks->rd_key; 387 388 while (i--) { 389 asm volatile ("bswapl %0" : "+r"(*key)); 390 key++; 391 } 392} 393#endif 394 395/* Force key reload from memory to the CPU microcode. 396 Loading EFLAGS from the stack clears EFLAGS[30] 397 which does the trick. */ 398static inline void 399padlock_reload_key(void) 400{ 401 asm volatile ("pushfl; popfl"); 402} 403 404#ifndef OPENSSL_NO_AES 405/* 406 * This is heuristic key context tracing. At first one 407 * believes that one should use atomic swap instructions, 408 * but it's not actually necessary. Point is that if 409 * padlock_saved_context was changed by another thread 410 * after we've read it and before we compare it with cdata, 411 * our key *shall* be reloaded upon thread context switch 412 * and we are therefore set in either case... 413 */ 414static inline void 415padlock_verify_context(struct padlock_cipher_data *cdata) 416{ 417 asm volatile ( 418 "pushfl\n" 419" btl $30,(%%esp)\n" 420" jnc 1f\n" 421" cmpl %2,%1\n" 422" je 1f\n" 423" popfl\n" 424" subl $4,%%esp\n" 425"1: addl $4,%%esp\n" 426" movl %2,%0" 427 :"+m"(padlock_saved_context) 428 : "r"(padlock_saved_context), "r"(cdata) : "cc"); 429} 430 431/* Template for padlock_xcrypt_* modes */ 432/* BIG FAT WARNING: 433 * The offsets used with 'leal' instructions 434 * describe items of the 'padlock_cipher_data' 435 * structure. 436 */ 437#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 438static inline void *name(size_t cnt, \ 439 struct padlock_cipher_data *cdata, \ 440 void *out, const void *inp) \ 441{ void *iv; \ 442 asm volatile ( "pushl %%ebx\n" \ 443 " leal 16(%0),%%edx\n" \ 444 " leal 32(%0),%%ebx\n" \ 445 rep_xcrypt "\n" \ 446 " popl %%ebx" \ 447 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 448 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 449 : "edx", "cc", "memory"); \ 450 return iv; \ 451} 452 453/* Generate all functions with appropriate opcodes */ 454PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ 455PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ 456PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ 457PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ 458#endif 459 460/* The RNG call itself */ 461static inline unsigned int 462padlock_xstore(void *addr, unsigned int edx_in) 463{ 464 unsigned int eax_out; 465 466 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 467 : "=a"(eax_out),"=m"(*(unsigned *)addr) 468 : "D"(addr), "d" (edx_in) 469 ); 470 471 return eax_out; 472} 473 474/* Why not inline 'rep movsd'? I failed to find information on what 475 * value in Direction Flag one can expect and consequently have to 476 * apply "better-safe-than-sorry" approach and assume "undefined." 477 * I could explicitly clear it and restore the original value upon 478 * return from padlock_aes_cipher, but it's presumably too much 479 * trouble for too little gain... 480 * 481 * In case you wonder 'rep xcrypt*' instructions above are *not* 482 * affected by the Direction Flag and pointers advance toward 483 * larger addresses unconditionally. 484 */ 485static inline unsigned char * 486padlock_memcpy(void *dst,const void *src,size_t n) 487{ 488 long *d=dst; 489 const long *s=src; 490 491 n /= sizeof(*d); 492 do { *d++ = *s++; } while (--n); 493 494 return dst; 495} 496 497#elif defined(_MSC_VER) 498/* 499 * Unlike GCC these are real functions. In order to minimize impact 500 * on performance we adhere to __fastcall calling convention in 501 * order to get two first arguments passed through %ecx and %edx. 502 * Which kind of suits very well, as instructions in question use 503 * both %ecx and %edx as input:-) 504 */ 505#define REP_XCRYPT(code) \ 506 _asm _emit 0xf3 \ 507 _asm _emit 0x0f _asm _emit 0xa7 \ 508 _asm _emit code 509 510/* BIG FAT WARNING: 511 * The offsets used with 'lea' instructions 512 * describe items of the 'padlock_cipher_data' 513 * structure. 514 */ 515#define PADLOCK_XCRYPT_ASM(name,code) \ 516static void * __fastcall \ 517 name (size_t cnt, void *cdata, \ 518 void *outp, const void *inp) \ 519{ _asm mov eax,edx \ 520 _asm lea edx,[eax+16] \ 521 _asm lea ebx,[eax+32] \ 522 _asm mov edi,outp \ 523 _asm mov esi,inp \ 524 REP_XCRYPT(code) \ 525} 526 527PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 528PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 529PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 530PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 531 532static int __fastcall 533padlock_xstore(void *outp,unsigned int code) 534{ _asm mov edi,ecx 535 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 536} 537 538static void __fastcall 539padlock_reload_key(void) 540{ _asm pushfd _asm popfd } 541 542static void __fastcall 543padlock_verify_context(void *cdata) 544{ _asm { 545 pushfd 546 bt DWORD PTR[esp],30 547 jnc skip 548 cmp ecx,padlock_saved_context 549 je skip 550 popfd 551 sub esp,4 552 skip: add esp,4 553 mov padlock_saved_context,ecx 554 } 555} 556 557static int 558padlock_available(void) 559{ _asm { 560 pushfd 561 pop eax 562 mov ecx,eax 563 xor eax,1<<21 564 push eax 565 popfd 566 pushfd 567 pop eax 568 xor eax,ecx 569 bt eax,21 570 jnc noluck 571 mov eax,0 572 cpuid 573 xor eax,eax 574 cmp ebx,'tneC' 575 jne noluck 576 cmp edx,'Hrua' 577 jne noluck 578 cmp ecx,'slua' 579 jne noluck 580 mov eax,0xC0000000 581 cpuid 582 mov edx,eax 583 xor eax,eax 584 cmp edx,0xC0000001 585 jb noluck 586 mov eax,0xC0000001 587 cpuid 588 xor eax,eax 589 bt edx,6 590 jnc skip_a 591 bt edx,7 592 jnc skip_a 593 mov padlock_use_ace,1 594 inc eax 595 skip_a: bt edx,2 596 jnc skip_r 597 bt edx,3 598 jnc skip_r 599 mov padlock_use_rng,1 600 inc eax 601 skip_r: 602 noluck: 603 } 604} 605 606static void __fastcall 607padlock_bswapl(void *key) 608{ _asm { 609 pushfd 610 cld 611 mov esi,ecx 612 mov edi,ecx 613 mov ecx,60 614 up: lodsd 615 bswap eax 616 stosd 617 loop up 618 popfd 619 } 620} 621 622/* MS actually specifies status of Direction Flag and compiler even 623 * manages to compile following as 'rep movsd' all by itself... 624 */ 625#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 626#endif 627 628/* ===== AES encryption/decryption ===== */ 629#ifndef OPENSSL_NO_AES 630 631#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 632#define NID_aes_128_cfb NID_aes_128_cfb128 633#endif 634 635#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 636#define NID_aes_128_ofb NID_aes_128_ofb128 637#endif 638 639#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 640#define NID_aes_192_cfb NID_aes_192_cfb128 641#endif 642 643#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 644#define NID_aes_192_ofb NID_aes_192_ofb128 645#endif 646 647#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 648#define NID_aes_256_cfb NID_aes_256_cfb128 649#endif 650 651#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 652#define NID_aes_256_ofb NID_aes_256_ofb128 653#endif 654 655/* List of supported ciphers. */ 656static int padlock_cipher_nids[] = { 657 NID_aes_128_ecb, 658 NID_aes_128_cbc, 659 NID_aes_128_cfb, 660 NID_aes_128_ofb, 661 662 NID_aes_192_ecb, 663 NID_aes_192_cbc, 664 NID_aes_192_cfb, 665 NID_aes_192_ofb, 666 667 NID_aes_256_ecb, 668 NID_aes_256_cbc, 669 NID_aes_256_cfb, 670 NID_aes_256_ofb, 671}; 672static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ 673 sizeof(padlock_cipher_nids[0])); 674 675/* Function prototypes ... */ 676static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 677 const unsigned char *iv, int enc); 678static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 679 const unsigned char *in, size_t nbytes); 680 681#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 682 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 683#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 684 NEAREST_ALIGNED(ctx->cipher_data)) 685 686#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 687#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 688#define EVP_CIPHER_block_size_OFB 1 689#define EVP_CIPHER_block_size_CFB 1 690 691/* Declaring so many ciphers by hand would be a pain. 692 Instead introduce a bit of preprocessor magic :-) */ 693#define DECLARE_AES_EVP(ksize,lmode,umode) \ 694static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 695 NID_aes_##ksize##_##lmode, \ 696 EVP_CIPHER_block_size_##umode, \ 697 AES_KEY_SIZE_##ksize, \ 698 AES_BLOCK_SIZE, \ 699 0 | EVP_CIPH_##umode##_MODE, \ 700 padlock_aes_init_key, \ 701 padlock_aes_cipher, \ 702 NULL, \ 703 sizeof(struct padlock_cipher_data) + 16, \ 704 EVP_CIPHER_set_asn1_iv, \ 705 EVP_CIPHER_get_asn1_iv, \ 706 NULL, \ 707 NULL \ 708} 709 710DECLARE_AES_EVP(128,ecb,ECB); 711DECLARE_AES_EVP(128,cbc,CBC); 712DECLARE_AES_EVP(128,cfb,CFB); 713DECLARE_AES_EVP(128,ofb,OFB); 714 715DECLARE_AES_EVP(192,ecb,ECB); 716DECLARE_AES_EVP(192,cbc,CBC); 717DECLARE_AES_EVP(192,cfb,CFB); 718DECLARE_AES_EVP(192,ofb,OFB); 719 720DECLARE_AES_EVP(256,ecb,ECB); 721DECLARE_AES_EVP(256,cbc,CBC); 722DECLARE_AES_EVP(256,cfb,CFB); 723DECLARE_AES_EVP(256,ofb,OFB); 724 725static int 726padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) 727{ 728 /* No specific cipher => return a list of supported nids ... */ 729 if (!cipher) { 730 *nids = padlock_cipher_nids; 731 return padlock_cipher_nids_num; 732 } 733 734 /* ... or the requested "cipher" otherwise */ 735 switch (nid) { 736 case NID_aes_128_ecb: 737 *cipher = &padlock_aes_128_ecb; 738 break; 739 case NID_aes_128_cbc: 740 *cipher = &padlock_aes_128_cbc; 741 break; 742 case NID_aes_128_cfb: 743 *cipher = &padlock_aes_128_cfb; 744 break; 745 case NID_aes_128_ofb: 746 *cipher = &padlock_aes_128_ofb; 747 break; 748 749 case NID_aes_192_ecb: 750 *cipher = &padlock_aes_192_ecb; 751 break; 752 case NID_aes_192_cbc: 753 *cipher = &padlock_aes_192_cbc; 754 break; 755 case NID_aes_192_cfb: 756 *cipher = &padlock_aes_192_cfb; 757 break; 758 case NID_aes_192_ofb: 759 *cipher = &padlock_aes_192_ofb; 760 break; 761 762 case NID_aes_256_ecb: 763 *cipher = &padlock_aes_256_ecb; 764 break; 765 case NID_aes_256_cbc: 766 *cipher = &padlock_aes_256_cbc; 767 break; 768 case NID_aes_256_cfb: 769 *cipher = &padlock_aes_256_cfb; 770 break; 771 case NID_aes_256_ofb: 772 *cipher = &padlock_aes_256_ofb; 773 break; 774 775 default: 776 /* Sorry, we don't support this NID */ 777 *cipher = NULL; 778 return 0; 779 } 780 781 return 1; 782} 783 784/* Prepare the encryption key for PadLock usage */ 785static int 786padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, 787 const unsigned char *iv, int enc) 788{ 789 struct padlock_cipher_data *cdata; 790 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 791 792 if (key==NULL) return 0; /* ERROR */ 793 794 cdata = ALIGNED_CIPHER_DATA(ctx); 795 memset(cdata, 0, sizeof(struct padlock_cipher_data)); 796 797 /* Prepare Control word. */ 798 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 799 cdata->cword.b.encdec = 0; 800 else 801 cdata->cword.b.encdec = (ctx->encrypt == 0); 802 cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 803 cdata->cword.b.ksize = (key_len - 128) / 64; 804 805 switch(key_len) { 806 case 128: 807 /* PadLock can generate an extended key for 808 AES128 in hardware */ 809 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 810 cdata->cword.b.keygen = 0; 811 break; 812 813 case 192: 814 case 256: 815 /* Generate an extended AES key in software. 816 Needed for AES192/AES256 */ 817 /* Well, the above applies to Stepping 8 CPUs 818 and is listed as hardware errata. They most 819 likely will fix it at some point and then 820 a check for stepping would be due here. */ 821 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 822 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || 823 enc) 824 AES_set_encrypt_key(key, key_len, &cdata->ks); 825 else 826 AES_set_decrypt_key(key, key_len, &cdata->ks); 827#ifndef AES_ASM 828 /* OpenSSL C functions use byte-swapped extended key. */ 829 padlock_bswapl(&cdata->ks); 830#endif 831 cdata->cword.b.keygen = 1; 832 break; 833 834 default: 835 /* ERROR */ 836 return 0; 837 } 838 839 /* 840 * This is done to cover for cases when user reuses the 841 * context for new key. The catch is that if we don't do 842 * this, padlock_eas_cipher might proceed with old key... 843 */ 844 padlock_reload_key (); 845 846 return 1; 847} 848 849/* 850 * Simplified version of padlock_aes_cipher() used when 851 * 1) both input and output buffers are at aligned addresses. 852 * or when 853 * 2) running on a newer CPU that doesn't require aligned buffers. 854 */ 855static int 856padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 857 const unsigned char *in_arg, size_t nbytes) 858{ 859 struct padlock_cipher_data *cdata; 860 void *iv; 861 862 cdata = ALIGNED_CIPHER_DATA(ctx); 863 padlock_verify_context(cdata); 864 865 switch (EVP_CIPHER_CTX_mode(ctx)) { 866 case EVP_CIPH_ECB_MODE: 867 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 868 break; 869 870 case EVP_CIPH_CBC_MODE: 871 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 872 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 873 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 874 break; 875 876 case EVP_CIPH_CFB_MODE: 877 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 878 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 879 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 880 break; 881 882 case EVP_CIPH_OFB_MODE: 883 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 884 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 885 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 886 break; 887 888 default: 889 return 0; 890 } 891 892 memset(cdata->iv, 0, AES_BLOCK_SIZE); 893 894 return 1; 895} 896 897#ifndef PADLOCK_CHUNK 898# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 899#endif 900#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 901# error "insane PADLOCK_CHUNK..." 902#endif 903 904/* Re-align the arguments to 16-Bytes boundaries and run the 905 encryption function itself. This function is not AES-specific. */ 906static int 907padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 908 const unsigned char *in_arg, size_t nbytes) 909{ 910 struct padlock_cipher_data *cdata; 911 const void *inp; 912 unsigned char *out; 913 void *iv; 914 int inp_misaligned, out_misaligned, realign_in_loop; 915 size_t chunk, allocated=0; 916 917 /* ctx->num is maintained in byte-oriented modes, 918 such as CFB and OFB... */ 919 if ((chunk = ctx->num)) { /* borrow chunk variable */ 920 unsigned char *ivp=ctx->iv; 921 922 switch (EVP_CIPHER_CTX_mode(ctx)) { 923 case EVP_CIPH_CFB_MODE: 924 if (chunk >= AES_BLOCK_SIZE) 925 return 0; /* bogus value */ 926 927 if (ctx->encrypt) 928 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 929 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 930 chunk++, nbytes--; 931 } 932 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 933 unsigned char c = *(in_arg++); 934 *(out_arg++) = c ^ ivp[chunk]; 935 ivp[chunk++] = c, nbytes--; 936 } 937 938 ctx->num = chunk%AES_BLOCK_SIZE; 939 break; 940 case EVP_CIPH_OFB_MODE: 941 if (chunk >= AES_BLOCK_SIZE) 942 return 0; /* bogus value */ 943 944 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 945 *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 946 chunk++, nbytes--; 947 } 948 949 ctx->num = chunk%AES_BLOCK_SIZE; 950 break; 951 } 952 } 953 954 if (nbytes == 0) 955 return 1; 956#if 0 957 if (nbytes % AES_BLOCK_SIZE) 958 return 0; /* are we expected to do tail processing? */ 959#else 960 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC 961 modes and arbitrary value in byte-oriented modes, such as 962 CFB and OFB... */ 963#endif 964 965 /* VIA promises CPUs that won't require alignment in the future. 966 For now padlock_aes_align_required is initialized to 1 and 967 the condition is never met... */ 968 /* C7 core is capable to manage unaligned input in non-ECB[!] 969 mode, but performance penalties appear to be approximately 970 same as for software alignment below or ~3x. They promise to 971 improve it in the future, but for now we can just as well 972 pretend that it can only handle aligned input... */ 973 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) 974 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 975 976 inp_misaligned = (((size_t)in_arg) & 0x0F); 977 out_misaligned = (((size_t)out_arg) & 0x0F); 978 979 /* Note that even if output is aligned and input not, 980 * I still prefer to loop instead of copy the whole 981 * input and then encrypt in one stroke. This is done 982 * in order to improve L1 cache utilization... */ 983 realign_in_loop = out_misaligned|inp_misaligned; 984 985 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) 986 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 987 988 /* this takes one "if" out of the loops */ 989 chunk = nbytes; 990 chunk %= PADLOCK_CHUNK; 991 if (chunk==0) chunk = PADLOCK_CHUNK; 992 993 if (out_misaligned) { 994 /* optmize for small input */ 995 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); 996 out = alloca(0x10 + allocated); 997 out = NEAREST_ALIGNED(out); 998 } 999 else 1000 out = out_arg; 1001 1002 cdata = ALIGNED_CIPHER_DATA(ctx); 1003 padlock_verify_context(cdata); 1004 1005 switch (EVP_CIPHER_CTX_mode(ctx)) { 1006 case EVP_CIPH_ECB_MODE: 1007 do { 1008 if (inp_misaligned) 1009 inp = padlock_memcpy(out, in_arg, chunk); 1010 else 1011 inp = in_arg; 1012 in_arg += chunk; 1013 1014 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1015 1016 if (out_misaligned) 1017 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1018 else 1019 out = out_arg+=chunk; 1020 1021 nbytes -= chunk; 1022 chunk = PADLOCK_CHUNK; 1023 } while (nbytes); 1024 break; 1025 1026 case EVP_CIPH_CBC_MODE: 1027 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1028 goto cbc_shortcut; 1029 do { 1030 if (iv != cdata->iv) 1031 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1032 chunk = PADLOCK_CHUNK; 1033 cbc_shortcut: /* optimize for small input */ 1034 if (inp_misaligned) 1035 inp = padlock_memcpy(out, in_arg, chunk); 1036 else 1037 inp = in_arg; 1038 in_arg += chunk; 1039 1040 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1041 1042 if (out_misaligned) 1043 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1044 else 1045 out = out_arg+=chunk; 1046 1047 } while (nbytes -= chunk); 1048 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1049 break; 1050 1051 case EVP_CIPH_CFB_MODE: 1052 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1053 chunk &= ~(AES_BLOCK_SIZE-1); 1054 if (chunk) goto cfb_shortcut; 1055 else goto cfb_skiploop; 1056 do { 1057 if (iv != cdata->iv) 1058 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1059 chunk = PADLOCK_CHUNK; 1060 cfb_shortcut: /* optimize for small input */ 1061 if (inp_misaligned) 1062 inp = padlock_memcpy(out, in_arg, chunk); 1063 else 1064 inp = in_arg; 1065 in_arg += chunk; 1066 1067 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1068 1069 if (out_misaligned) 1070 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1071 else 1072 out = out_arg+=chunk; 1073 1074 nbytes -= chunk; 1075 } while (nbytes >= AES_BLOCK_SIZE); 1076 1077 cfb_skiploop: 1078 if (nbytes) { 1079 unsigned char *ivp = cdata->iv; 1080 1081 if (iv != ivp) { 1082 memcpy(ivp, iv, AES_BLOCK_SIZE); 1083 iv = ivp; 1084 } 1085 ctx->num = nbytes; 1086 if (cdata->cword.b.encdec) { 1087 cdata->cword.b.encdec=0; 1088 padlock_reload_key(); 1089 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1090 cdata->cword.b.encdec=1; 1091 padlock_reload_key(); 1092 while(nbytes) { 1093 unsigned char c = *(in_arg++); 1094 *(out_arg++) = c ^ *ivp; 1095 *(ivp++) = c, nbytes--; 1096 } 1097 } 1098 else { padlock_reload_key(); 1099 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1100 padlock_reload_key(); 1101 while (nbytes) { 1102 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1103 ivp++, nbytes--; 1104 } 1105 } 1106 } 1107 1108 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1109 break; 1110 1111 case EVP_CIPH_OFB_MODE: 1112 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1113 chunk &= ~(AES_BLOCK_SIZE-1); 1114 if (chunk) do { 1115 if (inp_misaligned) 1116 inp = padlock_memcpy(out, in_arg, chunk); 1117 else 1118 inp = in_arg; 1119 in_arg += chunk; 1120 1121 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1122 1123 if (out_misaligned) 1124 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1125 else 1126 out = out_arg+=chunk; 1127 1128 nbytes -= chunk; 1129 chunk = PADLOCK_CHUNK; 1130 } while (nbytes >= AES_BLOCK_SIZE); 1131 1132 if (nbytes) { 1133 unsigned char *ivp = cdata->iv; 1134 1135 ctx->num = nbytes; 1136 padlock_reload_key(); /* empirically found */ 1137 padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1138 padlock_reload_key(); /* empirically found */ 1139 while (nbytes) { 1140 *(out_arg++) = *(in_arg++) ^ *ivp; 1141 ivp++, nbytes--; 1142 } 1143 } 1144 1145 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1146 break; 1147 1148 default: 1149 return 0; 1150 } 1151 1152 /* Clean the realign buffer if it was used */ 1153 if (out_misaligned) { 1154 volatile unsigned long *p=(void *)out; 1155 size_t n = allocated/sizeof(*p); 1156 while (n--) *p++=0; 1157 } 1158 1159 memset(cdata->iv, 0, AES_BLOCK_SIZE); 1160 1161 return 1; 1162} 1163 1164#endif /* OPENSSL_NO_AES */ 1165 1166/* ===== Random Number Generator ===== */ 1167/* 1168 * This code is not engaged. The reason is that it does not comply 1169 * with recommendations for VIA RNG usage for secure applications 1170 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1171 * provide meaningful error control... 1172 */ 1173/* Wrapper that provides an interface between the API and 1174 the raw PadLock RNG */ 1175static int 1176padlock_rand_bytes(unsigned char *output, int count) 1177{ 1178 unsigned int eax, buf; 1179 1180 while (count >= 8) { 1181 eax = padlock_xstore(output, 0); 1182 if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1183 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1184 if (eax&(0x1F<<10)) return 0; 1185 if ((eax&0x1F)==0) continue; /* no data, retry... */ 1186 if ((eax&0x1F)!=8) return 0; /* fatal failure... */ 1187 output += 8; 1188 count -= 8; 1189 } 1190 while (count > 0) { 1191 eax = padlock_xstore(&buf, 3); 1192 if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1193 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1194 if (eax&(0x1F<<10)) return 0; 1195 if ((eax&0x1F)==0) continue; /* no data, retry... */ 1196 if ((eax&0x1F)!=1) return 0; /* fatal failure... */ 1197 *output++ = (unsigned char)buf; 1198 count--; 1199 } 1200 *(volatile unsigned int *)&buf=0; 1201 1202 return 1; 1203} 1204 1205/* Dummy but necessary function */ 1206static int 1207padlock_rand_status(void) 1208{ 1209 return 1; 1210} 1211 1212/* Prepare structure for registration */ 1213static RAND_METHOD padlock_rand = { 1214 NULL, /* seed */ 1215 padlock_rand_bytes, /* bytes */ 1216 NULL, /* cleanup */ 1217 NULL, /* add */ 1218 padlock_rand_bytes, /* pseudorand */ 1219 padlock_rand_status, /* rand status */ 1220}; 1221 1222#else /* !COMPILE_HW_PADLOCK */ 1223#ifndef OPENSSL_NO_DYNAMIC_ENGINE 1224OPENSSL_EXPORT 1225int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); 1226OPENSSL_EXPORT 1227int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; } 1228IMPLEMENT_DYNAMIC_CHECK_FN() 1229#endif 1230#endif /* COMPILE_HW_PADLOCK */ 1231 1232#endif /* !OPENSSL_NO_HW_PADLOCK */ 1233#endif /* !OPENSSL_NO_HW */ 1234