1238384Sjkim/* 2238384Sjkim * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3238384Sjkim * Written by Michal Ludvig <michal@logix.cz> 4238384Sjkim * http://www.logix.cz/michal 5238384Sjkim * 6238384Sjkim * Big thanks to Andy Polyakov for a help with optimization, 7238384Sjkim * assembler fixes, port to MS Windows and a lot of other 8238384Sjkim * valuable work on this engine! 9238384Sjkim */ 10238384Sjkim 11238384Sjkim/* ==================================================================== 12238384Sjkim * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13238384Sjkim * 14238384Sjkim * Redistribution and use in source and binary forms, with or without 15238384Sjkim * modification, are permitted provided that the following conditions 16238384Sjkim * are met: 17238384Sjkim * 18238384Sjkim * 1. Redistributions of source code must retain the above copyright 19238384Sjkim * notice, this list of conditions and the following disclaimer. 20238384Sjkim * 21238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 22238384Sjkim * notice, this list of conditions and the following disclaimer in 23238384Sjkim * the documentation and/or other materials provided with the 24238384Sjkim * distribution. 25238384Sjkim * 26238384Sjkim * 3. All advertising materials mentioning features or use of this 27238384Sjkim * software must display the following acknowledgment: 28238384Sjkim * "This product includes software developed by the OpenSSL Project 29238384Sjkim * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30238384Sjkim * 31238384Sjkim * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32238384Sjkim * endorse or promote products derived from this software without 33238384Sjkim * prior written permission. For written permission, please contact 34238384Sjkim * licensing@OpenSSL.org. 35238384Sjkim * 36238384Sjkim * 5. Products derived from this software may not be called "OpenSSL" 37238384Sjkim * nor may "OpenSSL" appear in their names without prior written 38238384Sjkim * permission of the OpenSSL Project. 39238384Sjkim * 40238384Sjkim * 6. Redistributions of any form whatsoever must retain the following 41238384Sjkim * acknowledgment: 42238384Sjkim * "This product includes software developed by the OpenSSL Project 43238384Sjkim * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44238384Sjkim * 45238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46238384Sjkim * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48238384Sjkim * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49238384Sjkim * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50238384Sjkim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51238384Sjkim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52238384Sjkim * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54238384Sjkim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55238384Sjkim * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56238384Sjkim * OF THE POSSIBILITY OF SUCH DAMAGE. 57238384Sjkim * ==================================================================== 58238384Sjkim * 59238384Sjkim * This product includes cryptographic software written by Eric Young 60238384Sjkim * (eay@cryptsoft.com). This product includes software written by Tim 61238384Sjkim * Hudson (tjh@cryptsoft.com). 62238384Sjkim * 63238384Sjkim */ 64238384Sjkim 65238384Sjkim 66238384Sjkim#include <stdio.h> 67238384Sjkim#include <string.h> 68238384Sjkim 69238384Sjkim#include <openssl/opensslconf.h> 70238384Sjkim#include <openssl/crypto.h> 71238384Sjkim#include <openssl/dso.h> 72238384Sjkim#include <openssl/engine.h> 73238384Sjkim#include <openssl/evp.h> 74238384Sjkim#ifndef OPENSSL_NO_AES 75238384Sjkim#include <openssl/aes.h> 76238384Sjkim#endif 77238384Sjkim#include <openssl/rand.h> 78238384Sjkim#include <openssl/err.h> 79238384Sjkim 80238384Sjkim#ifndef OPENSSL_NO_HW 81238384Sjkim#ifndef OPENSSL_NO_HW_PADLOCK 82238384Sjkim 83238384Sjkim/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 84238384Sjkim#if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 85238384Sjkim# ifndef OPENSSL_NO_DYNAMIC_ENGINE 86238384Sjkim# define DYNAMIC_ENGINE 87238384Sjkim# endif 88238384Sjkim#elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 89238384Sjkim# ifdef ENGINE_DYNAMIC_SUPPORT 90238384Sjkim# define DYNAMIC_ENGINE 91238384Sjkim# endif 92238384Sjkim#else 93238384Sjkim# error "Only OpenSSL >= 0.9.7 is supported" 94238384Sjkim#endif 95238384Sjkim 96238384Sjkim/* VIA PadLock AES is available *ONLY* on some x86 CPUs. 97238384Sjkim Not only that it doesn't exist elsewhere, but it 98238384Sjkim even can't be compiled on other platforms! 99238384Sjkim 100238384Sjkim In addition, because of the heavy use of inline assembler, 101238384Sjkim compiler choice is limited to GCC and Microsoft C. */ 102238384Sjkim#undef COMPILE_HW_PADLOCK 103238384Sjkim#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104238384Sjkim# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105238384Sjkim (defined(_MSC_VER) && defined(_M_IX86)) 106238384Sjkim# define COMPILE_HW_PADLOCK 107238384Sjkim# endif 108238384Sjkim#endif 109238384Sjkim 110238384Sjkim#ifdef OPENSSL_NO_DYNAMIC_ENGINE 111238384Sjkim#ifdef COMPILE_HW_PADLOCK 112238384Sjkimstatic ENGINE *ENGINE_padlock (void); 113238384Sjkim#endif 114238384Sjkim 115238384Sjkimvoid ENGINE_load_padlock (void) 116238384Sjkim{ 117238384Sjkim/* On non-x86 CPUs it just returns. */ 118238384Sjkim#ifdef COMPILE_HW_PADLOCK 119238384Sjkim ENGINE *toadd = ENGINE_padlock (); 120238384Sjkim if (!toadd) return; 121238384Sjkim ENGINE_add (toadd); 122238384Sjkim ENGINE_free (toadd); 123238384Sjkim ERR_clear_error (); 124238384Sjkim#endif 125238384Sjkim} 126238384Sjkim 127238384Sjkim#endif 128238384Sjkim 129238384Sjkim#ifdef COMPILE_HW_PADLOCK 130238384Sjkim/* We do these includes here to avoid header problems on platforms that 131238384Sjkim do not have the VIA padlock anyway... */ 132238384Sjkim#include <stdlib.h> 133238384Sjkim#ifdef _WIN32 134238384Sjkim# include <malloc.h> 135238384Sjkim# ifndef alloca 136238384Sjkim# define alloca _alloca 137238384Sjkim# endif 138238384Sjkim#elif defined(__GNUC__) 139238384Sjkim# ifndef alloca 140238384Sjkim# define alloca(s) __builtin_alloca(s) 141238384Sjkim# endif 142238384Sjkim#endif 143238384Sjkim 144238384Sjkim/* Function for ENGINE detection and control */ 145238384Sjkimstatic int padlock_available(void); 146238384Sjkimstatic int padlock_init(ENGINE *e); 147238384Sjkim 148238384Sjkim/* RNG Stuff */ 149238384Sjkimstatic RAND_METHOD padlock_rand; 150238384Sjkim 151238384Sjkim/* Cipher Stuff */ 152238384Sjkim#ifndef OPENSSL_NO_AES 153238384Sjkimstatic int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid); 154238384Sjkim#endif 155238384Sjkim 156238384Sjkim/* Engine names */ 157238384Sjkimstatic const char *padlock_id = "padlock"; 158238384Sjkimstatic char padlock_name[100]; 159238384Sjkim 160238384Sjkim/* Available features */ 161238384Sjkimstatic int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 162238384Sjkimstatic int padlock_use_rng = 0; /* Random Number Generator */ 163238384Sjkim#ifndef OPENSSL_NO_AES 164238384Sjkimstatic int padlock_aes_align_required = 1; 165238384Sjkim#endif 166238384Sjkim 167238384Sjkim/* ===== Engine "management" functions ===== */ 168238384Sjkim 169238384Sjkim/* Prepare the ENGINE structure for registration */ 170238384Sjkimstatic int 171238384Sjkimpadlock_bind_helper(ENGINE *e) 172238384Sjkim{ 173238384Sjkim /* Check available features */ 174238384Sjkim padlock_available(); 175238384Sjkim 176238384Sjkim#if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ 177238384Sjkim padlock_use_rng=0; 178238384Sjkim#endif 179238384Sjkim 180238384Sjkim /* Generate a nice engine name with available features */ 181238384Sjkim BIO_snprintf(padlock_name, sizeof(padlock_name), 182238384Sjkim "VIA PadLock (%s, %s)", 183238384Sjkim padlock_use_rng ? "RNG" : "no-RNG", 184238384Sjkim padlock_use_ace ? "ACE" : "no-ACE"); 185238384Sjkim 186238384Sjkim /* Register everything or return with an error */ 187238384Sjkim if (!ENGINE_set_id(e, padlock_id) || 188238384Sjkim !ENGINE_set_name(e, padlock_name) || 189238384Sjkim 190238384Sjkim !ENGINE_set_init_function(e, padlock_init) || 191238384Sjkim#ifndef OPENSSL_NO_AES 192238384Sjkim (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || 193238384Sjkim#endif 194238384Sjkim (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { 195238384Sjkim return 0; 196238384Sjkim } 197238384Sjkim 198238384Sjkim /* Everything looks good */ 199238384Sjkim return 1; 200238384Sjkim} 201238384Sjkim 202238384Sjkim#ifdef OPENSSL_NO_DYNAMIC_ENGINE 203238384Sjkim 204238384Sjkim/* Constructor */ 205238384Sjkimstatic ENGINE * 206238384SjkimENGINE_padlock(void) 207238384Sjkim{ 208238384Sjkim ENGINE *eng = ENGINE_new(); 209238384Sjkim 210238384Sjkim if (!eng) { 211238384Sjkim return NULL; 212238384Sjkim } 213238384Sjkim 214238384Sjkim if (!padlock_bind_helper(eng)) { 215238384Sjkim ENGINE_free(eng); 216238384Sjkim return NULL; 217238384Sjkim } 218238384Sjkim 219238384Sjkim return eng; 220238384Sjkim} 221238384Sjkim 222238384Sjkim#endif 223238384Sjkim 224238384Sjkim/* Check availability of the engine */ 225238384Sjkimstatic int 226238384Sjkimpadlock_init(ENGINE *e) 227238384Sjkim{ 228238384Sjkim return (padlock_use_rng || padlock_use_ace); 229238384Sjkim} 230238384Sjkim 231238384Sjkim/* This stuff is needed if this ENGINE is being compiled into a self-contained 232238384Sjkim * shared-library. 233238384Sjkim */ 234238384Sjkim#ifdef DYNAMIC_ENGINE 235238384Sjkimstatic int 236238384Sjkimpadlock_bind_fn(ENGINE *e, const char *id) 237238384Sjkim{ 238238384Sjkim if (id && (strcmp(id, padlock_id) != 0)) { 239238384Sjkim return 0; 240238384Sjkim } 241238384Sjkim 242238384Sjkim if (!padlock_bind_helper(e)) { 243238384Sjkim return 0; 244238384Sjkim } 245238384Sjkim 246238384Sjkim return 1; 247238384Sjkim} 248238384Sjkim 249238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 250238384SjkimIMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) 251238384Sjkim#endif /* DYNAMIC_ENGINE */ 252238384Sjkim 253238384Sjkim/* ===== Here comes the "real" engine ===== */ 254238384Sjkim 255238384Sjkim#ifndef OPENSSL_NO_AES 256238384Sjkim/* Some AES-related constants */ 257238384Sjkim#define AES_BLOCK_SIZE 16 258238384Sjkim#define AES_KEY_SIZE_128 16 259238384Sjkim#define AES_KEY_SIZE_192 24 260238384Sjkim#define AES_KEY_SIZE_256 32 261238384Sjkim 262238384Sjkim/* Here we store the status information relevant to the 263238384Sjkim current context. */ 264238384Sjkim/* BIG FAT WARNING: 265238384Sjkim * Inline assembler in PADLOCK_XCRYPT_ASM() 266238384Sjkim * depends on the order of items in this structure. 267238384Sjkim * Don't blindly modify, reorder, etc! 268238384Sjkim */ 269238384Sjkimstruct padlock_cipher_data 270238384Sjkim{ 271238384Sjkim unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 272238384Sjkim union { unsigned int pad[4]; 273238384Sjkim struct { 274238384Sjkim int rounds:4; 275238384Sjkim int dgst:1; /* n/a in C3 */ 276238384Sjkim int align:1; /* n/a in C3 */ 277238384Sjkim int ciphr:1; /* n/a in C3 */ 278238384Sjkim unsigned int keygen:1; 279238384Sjkim int interm:1; 280238384Sjkim unsigned int encdec:1; 281238384Sjkim int ksize:2; 282238384Sjkim } b; 283238384Sjkim } cword; /* Control word */ 284238384Sjkim AES_KEY ks; /* Encryption key */ 285238384Sjkim}; 286238384Sjkim 287238384Sjkim/* 288238384Sjkim * Essentially this variable belongs in thread local storage. 289238384Sjkim * Having this variable global on the other hand can only cause 290238384Sjkim * few bogus key reloads [if any at all on single-CPU system], 291238384Sjkim * so we accept the penatly... 292238384Sjkim */ 293238384Sjkimstatic volatile struct padlock_cipher_data *padlock_saved_context; 294238384Sjkim#endif 295238384Sjkim 296238384Sjkim/* 297238384Sjkim * ======================================================= 298238384Sjkim * Inline assembler section(s). 299238384Sjkim * ======================================================= 300238384Sjkim * Order of arguments is chosen to facilitate Windows port 301238384Sjkim * using __fastcall calling convention. If you wish to add 302238384Sjkim * more routines, keep in mind that first __fastcall 303238384Sjkim * argument is passed in %ecx and second - in %edx. 304238384Sjkim * ======================================================= 305238384Sjkim */ 306238384Sjkim#if defined(__GNUC__) && __GNUC__>=2 307238384Sjkim/* 308238384Sjkim * As for excessive "push %ebx"/"pop %ebx" found all over. 309238384Sjkim * When generating position-independent code GCC won't let 310238384Sjkim * us use "b" in assembler templates nor even respect "ebx" 311238384Sjkim * in "clobber description." Therefore the trouble... 312238384Sjkim */ 313238384Sjkim 314238384Sjkim/* Helper function - check if a CPUID instruction 315238384Sjkim is available on this CPU */ 316238384Sjkimstatic int 317238384Sjkimpadlock_insn_cpuid_available(void) 318238384Sjkim{ 319238384Sjkim int result = -1; 320238384Sjkim 321238384Sjkim /* We're checking if the bit #21 of EFLAGS 322238384Sjkim can be toggled. If yes = CPUID is available. */ 323238384Sjkim asm volatile ( 324238384Sjkim "pushf\n" 325238384Sjkim "popl %%eax\n" 326238384Sjkim "xorl $0x200000, %%eax\n" 327238384Sjkim "movl %%eax, %%ecx\n" 328238384Sjkim "andl $0x200000, %%ecx\n" 329238384Sjkim "pushl %%eax\n" 330238384Sjkim "popf\n" 331238384Sjkim "pushf\n" 332238384Sjkim "popl %%eax\n" 333238384Sjkim "andl $0x200000, %%eax\n" 334238384Sjkim "xorl %%eax, %%ecx\n" 335238384Sjkim "movl %%ecx, %0\n" 336238384Sjkim : "=r" (result) : : "eax", "ecx"); 337238384Sjkim 338238384Sjkim return (result == 0); 339238384Sjkim} 340238384Sjkim 341238384Sjkim/* Load supported features of the CPU to see if 342238384Sjkim the PadLock is available. */ 343238384Sjkimstatic int 344238384Sjkimpadlock_available(void) 345238384Sjkim{ 346238384Sjkim char vendor_string[16]; 347238384Sjkim unsigned int eax, edx; 348238384Sjkim 349238384Sjkim /* First check if the CPUID instruction is available at all... */ 350238384Sjkim if (! padlock_insn_cpuid_available()) 351238384Sjkim return 0; 352238384Sjkim 353238384Sjkim /* Are we running on the Centaur (VIA) CPU? */ 354238384Sjkim eax = 0x00000000; 355238384Sjkim vendor_string[12] = 0; 356238384Sjkim asm volatile ( 357238384Sjkim "pushl %%ebx\n" 358238384Sjkim "cpuid\n" 359238384Sjkim "movl %%ebx,(%%edi)\n" 360238384Sjkim "movl %%edx,4(%%edi)\n" 361238384Sjkim "movl %%ecx,8(%%edi)\n" 362238384Sjkim "popl %%ebx" 363238384Sjkim : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); 364238384Sjkim if (strcmp(vendor_string, "CentaurHauls") != 0) 365238384Sjkim return 0; 366238384Sjkim 367238384Sjkim /* Check for Centaur Extended Feature Flags presence */ 368238384Sjkim eax = 0xC0000000; 369238384Sjkim asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 370238384Sjkim : "+a"(eax) : : "ecx", "edx"); 371238384Sjkim if (eax < 0xC0000001) 372238384Sjkim return 0; 373238384Sjkim 374238384Sjkim /* Read the Centaur Extended Feature Flags */ 375238384Sjkim eax = 0xC0000001; 376238384Sjkim asm volatile ("pushl %%ebx; cpuid; popl %%ebx" 377238384Sjkim : "+a"(eax), "=d"(edx) : : "ecx"); 378238384Sjkim 379238384Sjkim /* Fill up some flags */ 380238384Sjkim padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); 381238384Sjkim padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); 382238384Sjkim 383238384Sjkim return padlock_use_ace + padlock_use_rng; 384238384Sjkim} 385238384Sjkim 386238384Sjkim#ifndef OPENSSL_NO_AES 387279264Sdelphij#ifndef AES_ASM 388238384Sjkim/* Our own htonl()/ntohl() */ 389238384Sjkimstatic inline void 390238384Sjkimpadlock_bswapl(AES_KEY *ks) 391238384Sjkim{ 392238384Sjkim size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); 393238384Sjkim unsigned int *key = ks->rd_key; 394238384Sjkim 395238384Sjkim while (i--) { 396238384Sjkim asm volatile ("bswapl %0" : "+r"(*key)); 397238384Sjkim key++; 398238384Sjkim } 399238384Sjkim} 400238384Sjkim#endif 401279264Sdelphij#endif 402238384Sjkim 403238384Sjkim/* Force key reload from memory to the CPU microcode. 404238384Sjkim Loading EFLAGS from the stack clears EFLAGS[30] 405238384Sjkim which does the trick. */ 406238384Sjkimstatic inline void 407238384Sjkimpadlock_reload_key(void) 408238384Sjkim{ 409238384Sjkim asm volatile ("pushfl; popfl"); 410238384Sjkim} 411238384Sjkim 412238384Sjkim#ifndef OPENSSL_NO_AES 413238384Sjkim/* 414238384Sjkim * This is heuristic key context tracing. At first one 415238384Sjkim * believes that one should use atomic swap instructions, 416238384Sjkim * but it's not actually necessary. Point is that if 417238384Sjkim * padlock_saved_context was changed by another thread 418238384Sjkim * after we've read it and before we compare it with cdata, 419238384Sjkim * our key *shall* be reloaded upon thread context switch 420238384Sjkim * and we are therefore set in either case... 421238384Sjkim */ 422238384Sjkimstatic inline void 423238384Sjkimpadlock_verify_context(struct padlock_cipher_data *cdata) 424238384Sjkim{ 425238384Sjkim asm volatile ( 426238384Sjkim "pushfl\n" 427238384Sjkim" btl $30,(%%esp)\n" 428238384Sjkim" jnc 1f\n" 429238384Sjkim" cmpl %2,%1\n" 430238384Sjkim" je 1f\n" 431238384Sjkim" popfl\n" 432238384Sjkim" subl $4,%%esp\n" 433238384Sjkim"1: addl $4,%%esp\n" 434238384Sjkim" movl %2,%0" 435238384Sjkim :"+m"(padlock_saved_context) 436238384Sjkim : "r"(padlock_saved_context), "r"(cdata) : "cc"); 437238384Sjkim} 438238384Sjkim 439238384Sjkim/* Template for padlock_xcrypt_* modes */ 440238384Sjkim/* BIG FAT WARNING: 441238384Sjkim * The offsets used with 'leal' instructions 442238384Sjkim * describe items of the 'padlock_cipher_data' 443238384Sjkim * structure. 444238384Sjkim */ 445238384Sjkim#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 446238384Sjkimstatic inline void *name(size_t cnt, \ 447238384Sjkim struct padlock_cipher_data *cdata, \ 448238384Sjkim void *out, const void *inp) \ 449238384Sjkim{ void *iv; \ 450238384Sjkim asm volatile ( "pushl %%ebx\n" \ 451238384Sjkim " leal 16(%0),%%edx\n" \ 452238384Sjkim " leal 32(%0),%%ebx\n" \ 453238384Sjkim rep_xcrypt "\n" \ 454238384Sjkim " popl %%ebx" \ 455238384Sjkim : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 456238384Sjkim : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 457238384Sjkim : "edx", "cc", "memory"); \ 458238384Sjkim return iv; \ 459238384Sjkim} 460238384Sjkim 461238384Sjkim/* Generate all functions with appropriate opcodes */ 462238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ 463238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ 464238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ 465238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ 466238384Sjkim#endif 467238384Sjkim 468238384Sjkim/* The RNG call itself */ 469238384Sjkimstatic inline unsigned int 470238384Sjkimpadlock_xstore(void *addr, unsigned int edx_in) 471238384Sjkim{ 472238384Sjkim unsigned int eax_out; 473238384Sjkim 474238384Sjkim asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 475238384Sjkim : "=a"(eax_out),"=m"(*(unsigned *)addr) 476238384Sjkim : "D"(addr), "d" (edx_in) 477238384Sjkim ); 478238384Sjkim 479238384Sjkim return eax_out; 480238384Sjkim} 481238384Sjkim 482238384Sjkim/* Why not inline 'rep movsd'? I failed to find information on what 483238384Sjkim * value in Direction Flag one can expect and consequently have to 484238384Sjkim * apply "better-safe-than-sorry" approach and assume "undefined." 485238384Sjkim * I could explicitly clear it and restore the original value upon 486238384Sjkim * return from padlock_aes_cipher, but it's presumably too much 487238384Sjkim * trouble for too little gain... 488238384Sjkim * 489238384Sjkim * In case you wonder 'rep xcrypt*' instructions above are *not* 490238384Sjkim * affected by the Direction Flag and pointers advance toward 491238384Sjkim * larger addresses unconditionally. 492238384Sjkim */ 493238384Sjkimstatic inline unsigned char * 494238384Sjkimpadlock_memcpy(void *dst,const void *src,size_t n) 495238384Sjkim{ 496238384Sjkim long *d=dst; 497238384Sjkim const long *s=src; 498238384Sjkim 499238384Sjkim n /= sizeof(*d); 500238384Sjkim do { *d++ = *s++; } while (--n); 501238384Sjkim 502238384Sjkim return dst; 503238384Sjkim} 504238384Sjkim 505238384Sjkim#elif defined(_MSC_VER) 506238384Sjkim/* 507238384Sjkim * Unlike GCC these are real functions. In order to minimize impact 508238384Sjkim * on performance we adhere to __fastcall calling convention in 509238384Sjkim * order to get two first arguments passed through %ecx and %edx. 510238384Sjkim * Which kind of suits very well, as instructions in question use 511238384Sjkim * both %ecx and %edx as input:-) 512238384Sjkim */ 513238384Sjkim#define REP_XCRYPT(code) \ 514238384Sjkim _asm _emit 0xf3 \ 515238384Sjkim _asm _emit 0x0f _asm _emit 0xa7 \ 516238384Sjkim _asm _emit code 517238384Sjkim 518238384Sjkim/* BIG FAT WARNING: 519238384Sjkim * The offsets used with 'lea' instructions 520238384Sjkim * describe items of the 'padlock_cipher_data' 521238384Sjkim * structure. 522238384Sjkim */ 523238384Sjkim#define PADLOCK_XCRYPT_ASM(name,code) \ 524238384Sjkimstatic void * __fastcall \ 525238384Sjkim name (size_t cnt, void *cdata, \ 526238384Sjkim void *outp, const void *inp) \ 527238384Sjkim{ _asm mov eax,edx \ 528238384Sjkim _asm lea edx,[eax+16] \ 529238384Sjkim _asm lea ebx,[eax+32] \ 530238384Sjkim _asm mov edi,outp \ 531238384Sjkim _asm mov esi,inp \ 532238384Sjkim REP_XCRYPT(code) \ 533238384Sjkim} 534238384Sjkim 535238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 536238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 537238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 538238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 539238384Sjkim 540238384Sjkimstatic int __fastcall 541238384Sjkimpadlock_xstore(void *outp,unsigned int code) 542238384Sjkim{ _asm mov edi,ecx 543238384Sjkim _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 544238384Sjkim} 545238384Sjkim 546238384Sjkimstatic void __fastcall 547238384Sjkimpadlock_reload_key(void) 548238384Sjkim{ _asm pushfd _asm popfd } 549238384Sjkim 550238384Sjkimstatic void __fastcall 551238384Sjkimpadlock_verify_context(void *cdata) 552238384Sjkim{ _asm { 553238384Sjkim pushfd 554238384Sjkim bt DWORD PTR[esp],30 555238384Sjkim jnc skip 556238384Sjkim cmp ecx,padlock_saved_context 557238384Sjkim je skip 558238384Sjkim popfd 559238384Sjkim sub esp,4 560238384Sjkim skip: add esp,4 561238384Sjkim mov padlock_saved_context,ecx 562238384Sjkim } 563238384Sjkim} 564238384Sjkim 565238384Sjkimstatic int 566238384Sjkimpadlock_available(void) 567238384Sjkim{ _asm { 568238384Sjkim pushfd 569238384Sjkim pop eax 570238384Sjkim mov ecx,eax 571238384Sjkim xor eax,1<<21 572238384Sjkim push eax 573238384Sjkim popfd 574238384Sjkim pushfd 575238384Sjkim pop eax 576238384Sjkim xor eax,ecx 577238384Sjkim bt eax,21 578238384Sjkim jnc noluck 579238384Sjkim mov eax,0 580238384Sjkim cpuid 581238384Sjkim xor eax,eax 582238384Sjkim cmp ebx,'tneC' 583238384Sjkim jne noluck 584238384Sjkim cmp edx,'Hrua' 585238384Sjkim jne noluck 586238384Sjkim cmp ecx,'slua' 587238384Sjkim jne noluck 588238384Sjkim mov eax,0xC0000000 589238384Sjkim cpuid 590238384Sjkim mov edx,eax 591238384Sjkim xor eax,eax 592238384Sjkim cmp edx,0xC0000001 593238384Sjkim jb noluck 594238384Sjkim mov eax,0xC0000001 595238384Sjkim cpuid 596238384Sjkim xor eax,eax 597238384Sjkim bt edx,6 598238384Sjkim jnc skip_a 599238384Sjkim bt edx,7 600238384Sjkim jnc skip_a 601238384Sjkim mov padlock_use_ace,1 602238384Sjkim inc eax 603238384Sjkim skip_a: bt edx,2 604238384Sjkim jnc skip_r 605238384Sjkim bt edx,3 606238384Sjkim jnc skip_r 607238384Sjkim mov padlock_use_rng,1 608238384Sjkim inc eax 609238384Sjkim skip_r: 610238384Sjkim noluck: 611238384Sjkim } 612238384Sjkim} 613238384Sjkim 614238384Sjkimstatic void __fastcall 615238384Sjkimpadlock_bswapl(void *key) 616238384Sjkim{ _asm { 617238384Sjkim pushfd 618238384Sjkim cld 619238384Sjkim mov esi,ecx 620238384Sjkim mov edi,ecx 621238384Sjkim mov ecx,60 622238384Sjkim up: lodsd 623238384Sjkim bswap eax 624238384Sjkim stosd 625238384Sjkim loop up 626238384Sjkim popfd 627238384Sjkim } 628238384Sjkim} 629238384Sjkim 630238384Sjkim/* MS actually specifies status of Direction Flag and compiler even 631238384Sjkim * manages to compile following as 'rep movsd' all by itself... 632238384Sjkim */ 633238384Sjkim#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 634238384Sjkim#endif 635238384Sjkim 636238384Sjkim/* ===== AES encryption/decryption ===== */ 637238384Sjkim#ifndef OPENSSL_NO_AES 638238384Sjkim 639238384Sjkim#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 640238384Sjkim#define NID_aes_128_cfb NID_aes_128_cfb128 641238384Sjkim#endif 642238384Sjkim 643238384Sjkim#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 644238384Sjkim#define NID_aes_128_ofb NID_aes_128_ofb128 645238384Sjkim#endif 646238384Sjkim 647238384Sjkim#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 648238384Sjkim#define NID_aes_192_cfb NID_aes_192_cfb128 649238384Sjkim#endif 650238384Sjkim 651238384Sjkim#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 652238384Sjkim#define NID_aes_192_ofb NID_aes_192_ofb128 653238384Sjkim#endif 654238384Sjkim 655238384Sjkim#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 656238384Sjkim#define NID_aes_256_cfb NID_aes_256_cfb128 657238384Sjkim#endif 658238384Sjkim 659238384Sjkim#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 660238384Sjkim#define NID_aes_256_ofb NID_aes_256_ofb128 661238384Sjkim#endif 662238384Sjkim 663238384Sjkim/* List of supported ciphers. */ 664238384Sjkimstatic int padlock_cipher_nids[] = { 665238384Sjkim NID_aes_128_ecb, 666238384Sjkim NID_aes_128_cbc, 667238384Sjkim NID_aes_128_cfb, 668238384Sjkim NID_aes_128_ofb, 669238384Sjkim 670238384Sjkim NID_aes_192_ecb, 671238384Sjkim NID_aes_192_cbc, 672238384Sjkim NID_aes_192_cfb, 673238384Sjkim NID_aes_192_ofb, 674238384Sjkim 675238384Sjkim NID_aes_256_ecb, 676238384Sjkim NID_aes_256_cbc, 677238384Sjkim NID_aes_256_cfb, 678238384Sjkim NID_aes_256_ofb, 679238384Sjkim}; 680238384Sjkimstatic int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ 681238384Sjkim sizeof(padlock_cipher_nids[0])); 682238384Sjkim 683238384Sjkim/* Function prototypes ... */ 684238384Sjkimstatic int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 685238384Sjkim const unsigned char *iv, int enc); 686238384Sjkimstatic int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 687238384Sjkim const unsigned char *in, size_t nbytes); 688238384Sjkim 689238384Sjkim#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 690238384Sjkim ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 691238384Sjkim#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 692238384Sjkim NEAREST_ALIGNED(ctx->cipher_data)) 693238384Sjkim 694238384Sjkim#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 695238384Sjkim#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 696238384Sjkim#define EVP_CIPHER_block_size_OFB 1 697238384Sjkim#define EVP_CIPHER_block_size_CFB 1 698238384Sjkim 699238384Sjkim/* Declaring so many ciphers by hand would be a pain. 700238384Sjkim Instead introduce a bit of preprocessor magic :-) */ 701238384Sjkim#define DECLARE_AES_EVP(ksize,lmode,umode) \ 702238384Sjkimstatic const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 703238384Sjkim NID_aes_##ksize##_##lmode, \ 704238384Sjkim EVP_CIPHER_block_size_##umode, \ 705238384Sjkim AES_KEY_SIZE_##ksize, \ 706238384Sjkim AES_BLOCK_SIZE, \ 707238384Sjkim 0 | EVP_CIPH_##umode##_MODE, \ 708238384Sjkim padlock_aes_init_key, \ 709238384Sjkim padlock_aes_cipher, \ 710238384Sjkim NULL, \ 711238384Sjkim sizeof(struct padlock_cipher_data) + 16, \ 712238384Sjkim EVP_CIPHER_set_asn1_iv, \ 713238384Sjkim EVP_CIPHER_get_asn1_iv, \ 714238384Sjkim NULL, \ 715238384Sjkim NULL \ 716238384Sjkim} 717238384Sjkim 718238384SjkimDECLARE_AES_EVP(128,ecb,ECB); 719238384SjkimDECLARE_AES_EVP(128,cbc,CBC); 720238384SjkimDECLARE_AES_EVP(128,cfb,CFB); 721238384SjkimDECLARE_AES_EVP(128,ofb,OFB); 722238384Sjkim 723238384SjkimDECLARE_AES_EVP(192,ecb,ECB); 724238384SjkimDECLARE_AES_EVP(192,cbc,CBC); 725238384SjkimDECLARE_AES_EVP(192,cfb,CFB); 726238384SjkimDECLARE_AES_EVP(192,ofb,OFB); 727238384Sjkim 728238384SjkimDECLARE_AES_EVP(256,ecb,ECB); 729238384SjkimDECLARE_AES_EVP(256,cbc,CBC); 730238384SjkimDECLARE_AES_EVP(256,cfb,CFB); 731238384SjkimDECLARE_AES_EVP(256,ofb,OFB); 732238384Sjkim 733238384Sjkimstatic int 734238384Sjkimpadlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) 735238384Sjkim{ 736238384Sjkim /* No specific cipher => return a list of supported nids ... */ 737238384Sjkim if (!cipher) { 738238384Sjkim *nids = padlock_cipher_nids; 739238384Sjkim return padlock_cipher_nids_num; 740238384Sjkim } 741238384Sjkim 742238384Sjkim /* ... or the requested "cipher" otherwise */ 743238384Sjkim switch (nid) { 744238384Sjkim case NID_aes_128_ecb: 745238384Sjkim *cipher = &padlock_aes_128_ecb; 746238384Sjkim break; 747238384Sjkim case NID_aes_128_cbc: 748238384Sjkim *cipher = &padlock_aes_128_cbc; 749238384Sjkim break; 750238384Sjkim case NID_aes_128_cfb: 751238384Sjkim *cipher = &padlock_aes_128_cfb; 752238384Sjkim break; 753238384Sjkim case NID_aes_128_ofb: 754238384Sjkim *cipher = &padlock_aes_128_ofb; 755238384Sjkim break; 756238384Sjkim 757238384Sjkim case NID_aes_192_ecb: 758238384Sjkim *cipher = &padlock_aes_192_ecb; 759238384Sjkim break; 760238384Sjkim case NID_aes_192_cbc: 761238384Sjkim *cipher = &padlock_aes_192_cbc; 762238384Sjkim break; 763238384Sjkim case NID_aes_192_cfb: 764238384Sjkim *cipher = &padlock_aes_192_cfb; 765238384Sjkim break; 766238384Sjkim case NID_aes_192_ofb: 767238384Sjkim *cipher = &padlock_aes_192_ofb; 768238384Sjkim break; 769238384Sjkim 770238384Sjkim case NID_aes_256_ecb: 771238384Sjkim *cipher = &padlock_aes_256_ecb; 772238384Sjkim break; 773238384Sjkim case NID_aes_256_cbc: 774238384Sjkim *cipher = &padlock_aes_256_cbc; 775238384Sjkim break; 776238384Sjkim case NID_aes_256_cfb: 777238384Sjkim *cipher = &padlock_aes_256_cfb; 778238384Sjkim break; 779238384Sjkim case NID_aes_256_ofb: 780238384Sjkim *cipher = &padlock_aes_256_ofb; 781238384Sjkim break; 782238384Sjkim 783238384Sjkim default: 784238384Sjkim /* Sorry, we don't support this NID */ 785238384Sjkim *cipher = NULL; 786238384Sjkim return 0; 787238384Sjkim } 788238384Sjkim 789238384Sjkim return 1; 790238384Sjkim} 791238384Sjkim 792238384Sjkim/* Prepare the encryption key for PadLock usage */ 793238384Sjkimstatic int 794238384Sjkimpadlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, 795238384Sjkim const unsigned char *iv, int enc) 796238384Sjkim{ 797238384Sjkim struct padlock_cipher_data *cdata; 798238384Sjkim int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 799238384Sjkim 800238384Sjkim if (key==NULL) return 0; /* ERROR */ 801238384Sjkim 802238384Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 803238384Sjkim memset(cdata, 0, sizeof(struct padlock_cipher_data)); 804238384Sjkim 805238384Sjkim /* Prepare Control word. */ 806238384Sjkim if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 807238384Sjkim cdata->cword.b.encdec = 0; 808238384Sjkim else 809238384Sjkim cdata->cword.b.encdec = (ctx->encrypt == 0); 810238384Sjkim cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 811238384Sjkim cdata->cword.b.ksize = (key_len - 128) / 64; 812238384Sjkim 813238384Sjkim switch(key_len) { 814238384Sjkim case 128: 815238384Sjkim /* PadLock can generate an extended key for 816238384Sjkim AES128 in hardware */ 817238384Sjkim memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 818238384Sjkim cdata->cword.b.keygen = 0; 819238384Sjkim break; 820238384Sjkim 821238384Sjkim case 192: 822238384Sjkim case 256: 823238384Sjkim /* Generate an extended AES key in software. 824238384Sjkim Needed for AES192/AES256 */ 825238384Sjkim /* Well, the above applies to Stepping 8 CPUs 826238384Sjkim and is listed as hardware errata. They most 827238384Sjkim likely will fix it at some point and then 828238384Sjkim a check for stepping would be due here. */ 829238384Sjkim if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 830238384Sjkim EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || 831238384Sjkim enc) 832238384Sjkim AES_set_encrypt_key(key, key_len, &cdata->ks); 833238384Sjkim else 834238384Sjkim AES_set_decrypt_key(key, key_len, &cdata->ks); 835238384Sjkim#ifndef AES_ASM 836238384Sjkim /* OpenSSL C functions use byte-swapped extended key. */ 837238384Sjkim padlock_bswapl(&cdata->ks); 838238384Sjkim#endif 839238384Sjkim cdata->cword.b.keygen = 1; 840238384Sjkim break; 841238384Sjkim 842238384Sjkim default: 843238384Sjkim /* ERROR */ 844238384Sjkim return 0; 845238384Sjkim } 846238384Sjkim 847238384Sjkim /* 848238384Sjkim * This is done to cover for cases when user reuses the 849238384Sjkim * context for new key. The catch is that if we don't do 850238384Sjkim * this, padlock_eas_cipher might proceed with old key... 851238384Sjkim */ 852238384Sjkim padlock_reload_key (); 853238384Sjkim 854238384Sjkim return 1; 855238384Sjkim} 856238384Sjkim 857238384Sjkim/* 858238384Sjkim * Simplified version of padlock_aes_cipher() used when 859238384Sjkim * 1) both input and output buffers are at aligned addresses. 860238384Sjkim * or when 861238384Sjkim * 2) running on a newer CPU that doesn't require aligned buffers. 862238384Sjkim */ 863238384Sjkimstatic int 864238384Sjkimpadlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 865238384Sjkim const unsigned char *in_arg, size_t nbytes) 866238384Sjkim{ 867238384Sjkim struct padlock_cipher_data *cdata; 868238384Sjkim void *iv; 869238384Sjkim 870238384Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 871238384Sjkim padlock_verify_context(cdata); 872238384Sjkim 873238384Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 874238384Sjkim case EVP_CIPH_ECB_MODE: 875238384Sjkim padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 876238384Sjkim break; 877238384Sjkim 878238384Sjkim case EVP_CIPH_CBC_MODE: 879238384Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 880238384Sjkim iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 881238384Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 882238384Sjkim break; 883238384Sjkim 884238384Sjkim case EVP_CIPH_CFB_MODE: 885238384Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 886238384Sjkim iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 887238384Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 888238384Sjkim break; 889238384Sjkim 890238384Sjkim case EVP_CIPH_OFB_MODE: 891238384Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 892238384Sjkim padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); 893238384Sjkim memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 894238384Sjkim break; 895238384Sjkim 896238384Sjkim default: 897238384Sjkim return 0; 898238384Sjkim } 899238384Sjkim 900238384Sjkim memset(cdata->iv, 0, AES_BLOCK_SIZE); 901238384Sjkim 902238384Sjkim return 1; 903238384Sjkim} 904238384Sjkim 905238384Sjkim#ifndef PADLOCK_CHUNK 906238384Sjkim# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 907238384Sjkim#endif 908238384Sjkim#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 909238384Sjkim# error "insane PADLOCK_CHUNK..." 910238384Sjkim#endif 911238384Sjkim 912238384Sjkim/* Re-align the arguments to 16-Bytes boundaries and run the 913238384Sjkim encryption function itself. This function is not AES-specific. */ 914238384Sjkimstatic int 915238384Sjkimpadlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 916238384Sjkim const unsigned char *in_arg, size_t nbytes) 917238384Sjkim{ 918238384Sjkim struct padlock_cipher_data *cdata; 919238384Sjkim const void *inp; 920238384Sjkim unsigned char *out; 921238384Sjkim void *iv; 922238384Sjkim int inp_misaligned, out_misaligned, realign_in_loop; 923238384Sjkim size_t chunk, allocated=0; 924238384Sjkim 925238384Sjkim /* ctx->num is maintained in byte-oriented modes, 926238384Sjkim such as CFB and OFB... */ 927238384Sjkim if ((chunk = ctx->num)) { /* borrow chunk variable */ 928238384Sjkim unsigned char *ivp=ctx->iv; 929238384Sjkim 930238384Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 931238384Sjkim case EVP_CIPH_CFB_MODE: 932238384Sjkim if (chunk >= AES_BLOCK_SIZE) 933238384Sjkim return 0; /* bogus value */ 934238384Sjkim 935238384Sjkim if (ctx->encrypt) 936238384Sjkim while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 937238384Sjkim ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 938238384Sjkim chunk++, nbytes--; 939238384Sjkim } 940238384Sjkim else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 941238384Sjkim unsigned char c = *(in_arg++); 942238384Sjkim *(out_arg++) = c ^ ivp[chunk]; 943238384Sjkim ivp[chunk++] = c, nbytes--; 944238384Sjkim } 945238384Sjkim 946238384Sjkim ctx->num = chunk%AES_BLOCK_SIZE; 947238384Sjkim break; 948238384Sjkim case EVP_CIPH_OFB_MODE: 949238384Sjkim if (chunk >= AES_BLOCK_SIZE) 950238384Sjkim return 0; /* bogus value */ 951238384Sjkim 952238384Sjkim while (chunk<AES_BLOCK_SIZE && nbytes!=0) { 953238384Sjkim *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 954238384Sjkim chunk++, nbytes--; 955238384Sjkim } 956238384Sjkim 957238384Sjkim ctx->num = chunk%AES_BLOCK_SIZE; 958238384Sjkim break; 959238384Sjkim } 960238384Sjkim } 961238384Sjkim 962238384Sjkim if (nbytes == 0) 963238384Sjkim return 1; 964238384Sjkim#if 0 965238384Sjkim if (nbytes % AES_BLOCK_SIZE) 966238384Sjkim return 0; /* are we expected to do tail processing? */ 967238384Sjkim#else 968238384Sjkim /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC 969238384Sjkim modes and arbitrary value in byte-oriented modes, such as 970238384Sjkim CFB and OFB... */ 971238384Sjkim#endif 972238384Sjkim 973238384Sjkim /* VIA promises CPUs that won't require alignment in the future. 974238384Sjkim For now padlock_aes_align_required is initialized to 1 and 975238384Sjkim the condition is never met... */ 976238384Sjkim /* C7 core is capable to manage unaligned input in non-ECB[!] 977238384Sjkim mode, but performance penalties appear to be approximately 978238384Sjkim same as for software alignment below or ~3x. They promise to 979238384Sjkim improve it in the future, but for now we can just as well 980238384Sjkim pretend that it can only handle aligned input... */ 981238384Sjkim if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) 982238384Sjkim return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 983238384Sjkim 984238384Sjkim inp_misaligned = (((size_t)in_arg) & 0x0F); 985238384Sjkim out_misaligned = (((size_t)out_arg) & 0x0F); 986238384Sjkim 987238384Sjkim /* Note that even if output is aligned and input not, 988238384Sjkim * I still prefer to loop instead of copy the whole 989238384Sjkim * input and then encrypt in one stroke. This is done 990238384Sjkim * in order to improve L1 cache utilization... */ 991238384Sjkim realign_in_loop = out_misaligned|inp_misaligned; 992238384Sjkim 993238384Sjkim if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) 994238384Sjkim return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 995238384Sjkim 996238384Sjkim /* this takes one "if" out of the loops */ 997238384Sjkim chunk = nbytes; 998238384Sjkim chunk %= PADLOCK_CHUNK; 999238384Sjkim if (chunk==0) chunk = PADLOCK_CHUNK; 1000238384Sjkim 1001238384Sjkim if (out_misaligned) { 1002238384Sjkim /* optmize for small input */ 1003238384Sjkim allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); 1004238384Sjkim out = alloca(0x10 + allocated); 1005238384Sjkim out = NEAREST_ALIGNED(out); 1006238384Sjkim } 1007238384Sjkim else 1008238384Sjkim out = out_arg; 1009238384Sjkim 1010238384Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 1011238384Sjkim padlock_verify_context(cdata); 1012238384Sjkim 1013238384Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 1014238384Sjkim case EVP_CIPH_ECB_MODE: 1015238384Sjkim do { 1016238384Sjkim if (inp_misaligned) 1017238384Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1018238384Sjkim else 1019238384Sjkim inp = in_arg; 1020238384Sjkim in_arg += chunk; 1021238384Sjkim 1022238384Sjkim padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1023238384Sjkim 1024238384Sjkim if (out_misaligned) 1025238384Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1026238384Sjkim else 1027238384Sjkim out = out_arg+=chunk; 1028238384Sjkim 1029238384Sjkim nbytes -= chunk; 1030238384Sjkim chunk = PADLOCK_CHUNK; 1031238384Sjkim } while (nbytes); 1032238384Sjkim break; 1033238384Sjkim 1034238384Sjkim case EVP_CIPH_CBC_MODE: 1035238384Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1036238384Sjkim goto cbc_shortcut; 1037238384Sjkim do { 1038238384Sjkim if (iv != cdata->iv) 1039238384Sjkim memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1040238384Sjkim chunk = PADLOCK_CHUNK; 1041238384Sjkim cbc_shortcut: /* optimize for small input */ 1042238384Sjkim if (inp_misaligned) 1043238384Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1044238384Sjkim else 1045238384Sjkim inp = in_arg; 1046238384Sjkim in_arg += chunk; 1047238384Sjkim 1048238384Sjkim iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1049238384Sjkim 1050238384Sjkim if (out_misaligned) 1051238384Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1052238384Sjkim else 1053238384Sjkim out = out_arg+=chunk; 1054238384Sjkim 1055238384Sjkim } while (nbytes -= chunk); 1056238384Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1057238384Sjkim break; 1058238384Sjkim 1059238384Sjkim case EVP_CIPH_CFB_MODE: 1060238384Sjkim memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1061238384Sjkim chunk &= ~(AES_BLOCK_SIZE-1); 1062238384Sjkim if (chunk) goto cfb_shortcut; 1063238384Sjkim else goto cfb_skiploop; 1064238384Sjkim do { 1065238384Sjkim if (iv != cdata->iv) 1066238384Sjkim memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1067238384Sjkim chunk = PADLOCK_CHUNK; 1068238384Sjkim cfb_shortcut: /* optimize for small input */ 1069238384Sjkim if (inp_misaligned) 1070238384Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1071238384Sjkim else 1072238384Sjkim inp = in_arg; 1073238384Sjkim in_arg += chunk; 1074238384Sjkim 1075238384Sjkim iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1076238384Sjkim 1077238384Sjkim if (out_misaligned) 1078238384Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1079238384Sjkim else 1080238384Sjkim out = out_arg+=chunk; 1081238384Sjkim 1082238384Sjkim nbytes -= chunk; 1083238384Sjkim } while (nbytes >= AES_BLOCK_SIZE); 1084238384Sjkim 1085238384Sjkim cfb_skiploop: 1086238384Sjkim if (nbytes) { 1087238384Sjkim unsigned char *ivp = cdata->iv; 1088238384Sjkim 1089238384Sjkim if (iv != ivp) { 1090238384Sjkim memcpy(ivp, iv, AES_BLOCK_SIZE); 1091238384Sjkim iv = ivp; 1092238384Sjkim } 1093238384Sjkim ctx->num = nbytes; 1094238384Sjkim if (cdata->cword.b.encdec) { 1095238384Sjkim cdata->cword.b.encdec=0; 1096238384Sjkim padlock_reload_key(); 1097238384Sjkim padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1098238384Sjkim cdata->cword.b.encdec=1; 1099238384Sjkim padlock_reload_key(); 1100238384Sjkim while(nbytes) { 1101238384Sjkim unsigned char c = *(in_arg++); 1102238384Sjkim *(out_arg++) = c ^ *ivp; 1103238384Sjkim *(ivp++) = c, nbytes--; 1104238384Sjkim } 1105238384Sjkim } 1106238384Sjkim else { padlock_reload_key(); 1107238384Sjkim padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1108238384Sjkim padlock_reload_key(); 1109238384Sjkim while (nbytes) { 1110238384Sjkim *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1111238384Sjkim ivp++, nbytes--; 1112238384Sjkim } 1113238384Sjkim } 1114238384Sjkim } 1115238384Sjkim 1116238384Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1117238384Sjkim break; 1118238384Sjkim 1119238384Sjkim case EVP_CIPH_OFB_MODE: 1120238384Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1121238384Sjkim chunk &= ~(AES_BLOCK_SIZE-1); 1122238384Sjkim if (chunk) do { 1123238384Sjkim if (inp_misaligned) 1124238384Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1125238384Sjkim else 1126238384Sjkim inp = in_arg; 1127238384Sjkim in_arg += chunk; 1128238384Sjkim 1129238384Sjkim padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp); 1130238384Sjkim 1131238384Sjkim if (out_misaligned) 1132238384Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1133238384Sjkim else 1134238384Sjkim out = out_arg+=chunk; 1135238384Sjkim 1136238384Sjkim nbytes -= chunk; 1137238384Sjkim chunk = PADLOCK_CHUNK; 1138238384Sjkim } while (nbytes >= AES_BLOCK_SIZE); 1139238384Sjkim 1140238384Sjkim if (nbytes) { 1141238384Sjkim unsigned char *ivp = cdata->iv; 1142238384Sjkim 1143238384Sjkim ctx->num = nbytes; 1144238384Sjkim padlock_reload_key(); /* empirically found */ 1145238384Sjkim padlock_xcrypt_ecb(1,cdata,ivp,ivp); 1146238384Sjkim padlock_reload_key(); /* empirically found */ 1147238384Sjkim while (nbytes) { 1148238384Sjkim *(out_arg++) = *(in_arg++) ^ *ivp; 1149238384Sjkim ivp++, nbytes--; 1150238384Sjkim } 1151238384Sjkim } 1152238384Sjkim 1153238384Sjkim memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1154238384Sjkim break; 1155238384Sjkim 1156238384Sjkim default: 1157238384Sjkim return 0; 1158238384Sjkim } 1159238384Sjkim 1160238384Sjkim /* Clean the realign buffer if it was used */ 1161238384Sjkim if (out_misaligned) { 1162238384Sjkim volatile unsigned long *p=(void *)out; 1163238384Sjkim size_t n = allocated/sizeof(*p); 1164238384Sjkim while (n--) *p++=0; 1165238384Sjkim } 1166238384Sjkim 1167238384Sjkim memset(cdata->iv, 0, AES_BLOCK_SIZE); 1168238384Sjkim 1169238384Sjkim return 1; 1170238384Sjkim} 1171238384Sjkim 1172238384Sjkim#endif /* OPENSSL_NO_AES */ 1173238384Sjkim 1174238384Sjkim/* ===== Random Number Generator ===== */ 1175238384Sjkim/* 1176238384Sjkim * This code is not engaged. The reason is that it does not comply 1177238384Sjkim * with recommendations for VIA RNG usage for secure applications 1178238384Sjkim * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1179238384Sjkim * provide meaningful error control... 1180238384Sjkim */ 1181238384Sjkim/* Wrapper that provides an interface between the API and 1182238384Sjkim the raw PadLock RNG */ 1183238384Sjkimstatic int 1184238384Sjkimpadlock_rand_bytes(unsigned char *output, int count) 1185238384Sjkim{ 1186238384Sjkim unsigned int eax, buf; 1187238384Sjkim 1188238384Sjkim while (count >= 8) { 1189238384Sjkim eax = padlock_xstore(output, 0); 1190238384Sjkim if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1191238384Sjkim /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1192238384Sjkim if (eax&(0x1F<<10)) return 0; 1193238384Sjkim if ((eax&0x1F)==0) continue; /* no data, retry... */ 1194238384Sjkim if ((eax&0x1F)!=8) return 0; /* fatal failure... */ 1195238384Sjkim output += 8; 1196238384Sjkim count -= 8; 1197238384Sjkim } 1198238384Sjkim while (count > 0) { 1199238384Sjkim eax = padlock_xstore(&buf, 3); 1200238384Sjkim if (!(eax&(1<<6))) return 0; /* RNG disabled */ 1201238384Sjkim /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1202238384Sjkim if (eax&(0x1F<<10)) return 0; 1203238384Sjkim if ((eax&0x1F)==0) continue; /* no data, retry... */ 1204238384Sjkim if ((eax&0x1F)!=1) return 0; /* fatal failure... */ 1205238384Sjkim *output++ = (unsigned char)buf; 1206238384Sjkim count--; 1207238384Sjkim } 1208238384Sjkim *(volatile unsigned int *)&buf=0; 1209238384Sjkim 1210238384Sjkim return 1; 1211238384Sjkim} 1212238384Sjkim 1213238384Sjkim/* Dummy but necessary function */ 1214238384Sjkimstatic int 1215238384Sjkimpadlock_rand_status(void) 1216238384Sjkim{ 1217238384Sjkim return 1; 1218238384Sjkim} 1219238384Sjkim 1220238384Sjkim/* Prepare structure for registration */ 1221238384Sjkimstatic RAND_METHOD padlock_rand = { 1222238384Sjkim NULL, /* seed */ 1223238384Sjkim padlock_rand_bytes, /* bytes */ 1224238384Sjkim NULL, /* cleanup */ 1225238384Sjkim NULL, /* add */ 1226238384Sjkim padlock_rand_bytes, /* pseudorand */ 1227238384Sjkim padlock_rand_status, /* rand status */ 1228238384Sjkim}; 1229238384Sjkim 1230238384Sjkim#else /* !COMPILE_HW_PADLOCK */ 1231238384Sjkim#ifndef OPENSSL_NO_DYNAMIC_ENGINE 1232238384SjkimOPENSSL_EXPORT 1233238384Sjkimint bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); 1234238384SjkimOPENSSL_EXPORT 1235238384Sjkimint bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; } 1236238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 1237238384Sjkim#endif 1238238384Sjkim#endif /* COMPILE_HW_PADLOCK */ 1239238384Sjkim 1240238384Sjkim#endif /* !OPENSSL_NO_HW_PADLOCK */ 1241238384Sjkim#endif /* !OPENSSL_NO_HW */ 1242