1280304Sjkim/*- 2238384Sjkim * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3238384Sjkim * Written by Michal Ludvig <michal@logix.cz> 4238384Sjkim * http://www.logix.cz/michal 5238384Sjkim * 6280304Sjkim * Big thanks to Andy Polyakov for a help with optimization, 7280304Sjkim * assembler fixes, port to MS Windows and a lot of other 8238384Sjkim * valuable work on this engine! 9238384Sjkim */ 10238384Sjkim 11238384Sjkim/* ==================================================================== 12238384Sjkim * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13238384Sjkim * 14238384Sjkim * Redistribution and use in source and binary forms, with or without 15238384Sjkim * modification, are permitted provided that the following conditions 16238384Sjkim * are met: 17238384Sjkim * 18238384Sjkim * 1. Redistributions of source code must retain the above copyright 19238384Sjkim * notice, this list of conditions and the following disclaimer. 20238384Sjkim * 21238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 22238384Sjkim * notice, this list of conditions and the following disclaimer in 23238384Sjkim * the documentation and/or other materials provided with the 24238384Sjkim * distribution. 25238384Sjkim * 26238384Sjkim * 3. All advertising materials mentioning features or use of this 27238384Sjkim * software must display the following acknowledgment: 28238384Sjkim * "This product includes software developed by the OpenSSL Project 29238384Sjkim * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30238384Sjkim * 31238384Sjkim * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32238384Sjkim * endorse or promote products derived from this software without 33238384Sjkim * prior written permission. For written permission, please contact 34238384Sjkim * licensing@OpenSSL.org. 35238384Sjkim * 36238384Sjkim * 5. Products derived from this software may not be called "OpenSSL" 37238384Sjkim * nor may "OpenSSL" appear in their names without prior written 38238384Sjkim * permission of the OpenSSL Project. 39238384Sjkim * 40238384Sjkim * 6. Redistributions of any form whatsoever must retain the following 41238384Sjkim * acknowledgment: 42238384Sjkim * "This product includes software developed by the OpenSSL Project 43238384Sjkim * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44238384Sjkim * 45238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46238384Sjkim * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48238384Sjkim * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49238384Sjkim * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50238384Sjkim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51238384Sjkim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52238384Sjkim * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54238384Sjkim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55238384Sjkim * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56238384Sjkim * OF THE POSSIBILITY OF SUCH DAMAGE. 57238384Sjkim * ==================================================================== 58238384Sjkim * 59238384Sjkim * This product includes cryptographic software written by Eric Young 60238384Sjkim * (eay@cryptsoft.com). This product includes software written by Tim 61238384Sjkim * Hudson (tjh@cryptsoft.com). 62238384Sjkim * 63238384Sjkim */ 64238384Sjkim 65238384Sjkim#include <stdio.h> 66238384Sjkim#include <string.h> 67238384Sjkim 68238384Sjkim#include <openssl/opensslconf.h> 69238384Sjkim#include <openssl/crypto.h> 70238384Sjkim#include <openssl/dso.h> 71238384Sjkim#include <openssl/engine.h> 72238384Sjkim#include <openssl/evp.h> 73238384Sjkim#ifndef OPENSSL_NO_AES 74280304Sjkim# include <openssl/aes.h> 75238384Sjkim#endif 76238384Sjkim#include <openssl/rand.h> 77238384Sjkim#include <openssl/err.h> 78238384Sjkim 79238384Sjkim#ifndef OPENSSL_NO_HW 80280304Sjkim# ifndef OPENSSL_NO_HW_PADLOCK 81238384Sjkim 82238384Sjkim/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 83280304Sjkim# if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 84280304Sjkim# ifndef OPENSSL_NO_DYNAMIC_ENGINE 85238384Sjkim# define DYNAMIC_ENGINE 86280304Sjkim# endif 87280304Sjkim# elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 88280304Sjkim# ifdef ENGINE_DYNAMIC_SUPPORT 89238384Sjkim# define DYNAMIC_ENGINE 90280304Sjkim# endif 91280304Sjkim# else 92280304Sjkim# error "Only OpenSSL >= 0.9.7 is supported" 93238384Sjkim# endif 94238384Sjkim 95280304Sjkim/* 96280304Sjkim * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it 97280304Sjkim * doesn't exist elsewhere, but it even can't be compiled on other platforms! 98280304Sjkim * 99280304Sjkim * In addition, because of the heavy use of inline assembler, compiler choice 100280304Sjkim * is limited to GCC and Microsoft C. 101280304Sjkim */ 102280304Sjkim# undef COMPILE_HW_PADLOCK 103280304Sjkim# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104280304Sjkim# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105238384Sjkim (defined(_MSC_VER) && defined(_M_IX86)) 106280304Sjkim# define COMPILE_HW_PADLOCK 107280304Sjkim# endif 108280304Sjkim# endif 109238384Sjkim 110280304Sjkim# ifdef OPENSSL_NO_DYNAMIC_ENGINE 111280304Sjkim# ifdef COMPILE_HW_PADLOCK 112280304Sjkimstatic ENGINE *ENGINE_padlock(void); 113280304Sjkim# endif 114238384Sjkim 115280304Sjkimvoid ENGINE_load_padlock(void) 116238384Sjkim{ 117238384Sjkim/* On non-x86 CPUs it just returns. */ 118280304Sjkim# ifdef COMPILE_HW_PADLOCK 119280304Sjkim ENGINE *toadd = ENGINE_padlock(); 120280304Sjkim if (!toadd) 121280304Sjkim return; 122280304Sjkim ENGINE_add(toadd); 123280304Sjkim ENGINE_free(toadd); 124280304Sjkim ERR_clear_error(); 125280304Sjkim# endif 126238384Sjkim} 127238384Sjkim 128280304Sjkim# endif 129238384Sjkim 130280304Sjkim# ifdef COMPILE_HW_PADLOCK 131280304Sjkim/* 132280304Sjkim * We do these includes here to avoid header problems on platforms that do 133280304Sjkim * not have the VIA padlock anyway... 134280304Sjkim */ 135280304Sjkim# include <stdlib.h> 136280304Sjkim# ifdef _WIN32 137280304Sjkim# include <malloc.h> 138280304Sjkim# ifndef alloca 139280304Sjkim# define alloca _alloca 140280304Sjkim# endif 141280304Sjkim# elif defined(__GNUC__) 142280304Sjkim# ifndef alloca 143280304Sjkim# define alloca(s) __builtin_alloca(s) 144280304Sjkim# endif 145280304Sjkim# endif 146238384Sjkim 147238384Sjkim/* Function for ENGINE detection and control */ 148238384Sjkimstatic int padlock_available(void); 149238384Sjkimstatic int padlock_init(ENGINE *e); 150238384Sjkim 151238384Sjkim/* RNG Stuff */ 152238384Sjkimstatic RAND_METHOD padlock_rand; 153238384Sjkim 154238384Sjkim/* Cipher Stuff */ 155280304Sjkim# ifndef OPENSSL_NO_AES 156280304Sjkimstatic int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, 157280304Sjkim const int **nids, int nid); 158280304Sjkim# endif 159238384Sjkim 160238384Sjkim/* Engine names */ 161238384Sjkimstatic const char *padlock_id = "padlock"; 162238384Sjkimstatic char padlock_name[100]; 163238384Sjkim 164238384Sjkim/* Available features */ 165280304Sjkimstatic int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 166280304Sjkimstatic int padlock_use_rng = 0; /* Random Number Generator */ 167280304Sjkim# ifndef OPENSSL_NO_AES 168238384Sjkimstatic int padlock_aes_align_required = 1; 169280304Sjkim# endif 170238384Sjkim 171238384Sjkim/* ===== Engine "management" functions ===== */ 172238384Sjkim 173238384Sjkim/* Prepare the ENGINE structure for registration */ 174280304Sjkimstatic int padlock_bind_helper(ENGINE *e) 175238384Sjkim{ 176280304Sjkim /* Check available features */ 177280304Sjkim padlock_available(); 178238384Sjkim 179280304Sjkim# if 1 /* disable RNG for now, see commentary in 180280304Sjkim * vicinity of RNG code */ 181280304Sjkim padlock_use_rng = 0; 182280304Sjkim# endif 183238384Sjkim 184280304Sjkim /* Generate a nice engine name with available features */ 185280304Sjkim BIO_snprintf(padlock_name, sizeof(padlock_name), 186280304Sjkim "VIA PadLock (%s, %s)", 187280304Sjkim padlock_use_rng ? "RNG" : "no-RNG", 188280304Sjkim padlock_use_ace ? "ACE" : "no-ACE"); 189238384Sjkim 190280304Sjkim /* Register everything or return with an error */ 191280304Sjkim if (!ENGINE_set_id(e, padlock_id) || 192280304Sjkim !ENGINE_set_name(e, padlock_name) || 193280304Sjkim !ENGINE_set_init_function(e, padlock_init) || 194280304Sjkim# ifndef OPENSSL_NO_AES 195280304Sjkim (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || 196280304Sjkim# endif 197280304Sjkim (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { 198280304Sjkim return 0; 199280304Sjkim } 200238384Sjkim 201280304Sjkim /* Everything looks good */ 202280304Sjkim return 1; 203238384Sjkim} 204238384Sjkim 205280304Sjkim# ifdef OPENSSL_NO_DYNAMIC_ENGINE 206238384Sjkim 207238384Sjkim/* Constructor */ 208280304Sjkimstatic ENGINE *ENGINE_padlock(void) 209238384Sjkim{ 210280304Sjkim ENGINE *eng = ENGINE_new(); 211238384Sjkim 212280304Sjkim if (!eng) { 213280304Sjkim return NULL; 214280304Sjkim } 215238384Sjkim 216280304Sjkim if (!padlock_bind_helper(eng)) { 217280304Sjkim ENGINE_free(eng); 218280304Sjkim return NULL; 219280304Sjkim } 220238384Sjkim 221280304Sjkim return eng; 222238384Sjkim} 223238384Sjkim 224280304Sjkim# endif 225238384Sjkim 226238384Sjkim/* Check availability of the engine */ 227280304Sjkimstatic int padlock_init(ENGINE *e) 228238384Sjkim{ 229280304Sjkim return (padlock_use_rng || padlock_use_ace); 230238384Sjkim} 231238384Sjkim 232280304Sjkim/* 233280304Sjkim * This stuff is needed if this ENGINE is being compiled into a 234280304Sjkim * self-contained shared-library. 235238384Sjkim */ 236280304Sjkim# ifdef DYNAMIC_ENGINE 237280304Sjkimstatic int padlock_bind_fn(ENGINE *e, const char *id) 238238384Sjkim{ 239280304Sjkim if (id && (strcmp(id, padlock_id) != 0)) { 240280304Sjkim return 0; 241280304Sjkim } 242238384Sjkim 243280304Sjkim if (!padlock_bind_helper(e)) { 244280304Sjkim return 0; 245280304Sjkim } 246238384Sjkim 247280304Sjkim return 1; 248238384Sjkim} 249238384Sjkim 250238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 251280304Sjkim IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) 252280304Sjkim# endif /* DYNAMIC_ENGINE */ 253238384Sjkim/* ===== Here comes the "real" engine ===== */ 254280304Sjkim# ifndef OPENSSL_NO_AES 255238384Sjkim/* Some AES-related constants */ 256280304Sjkim# define AES_BLOCK_SIZE 16 257280304Sjkim# define AES_KEY_SIZE_128 16 258280304Sjkim# define AES_KEY_SIZE_192 24 259280304Sjkim# define AES_KEY_SIZE_256 32 260280304Sjkim /* 261280304Sjkim * Here we store the status information relevant to the current context. 262280304Sjkim */ 263280304Sjkim /* 264280304Sjkim * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on 265280304Sjkim * the order of items in this structure. Don't blindly modify, reorder, 266280304Sjkim * etc! 267280304Sjkim */ 268280304Sjkimstruct padlock_cipher_data { 269280304Sjkim unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 270280304Sjkim union { 271280304Sjkim unsigned int pad[4]; 272280304Sjkim struct { 273280304Sjkim int rounds:4; 274280304Sjkim int dgst:1; /* n/a in C3 */ 275280304Sjkim int align:1; /* n/a in C3 */ 276280304Sjkim int ciphr:1; /* n/a in C3 */ 277280304Sjkim unsigned int keygen:1; 278280304Sjkim int interm:1; 279280304Sjkim unsigned int encdec:1; 280280304Sjkim int ksize:2; 281280304Sjkim } b; 282280304Sjkim } cword; /* Control word */ 283280304Sjkim AES_KEY ks; /* Encryption key */ 284238384Sjkim}; 285238384Sjkim 286238384Sjkim/* 287238384Sjkim * Essentially this variable belongs in thread local storage. 288238384Sjkim * Having this variable global on the other hand can only cause 289238384Sjkim * few bogus key reloads [if any at all on single-CPU system], 290238384Sjkim * so we accept the penatly... 291238384Sjkim */ 292238384Sjkimstatic volatile struct padlock_cipher_data *padlock_saved_context; 293280304Sjkim# endif 294238384Sjkim 295280304Sjkim/*- 296238384Sjkim * ======================================================= 297238384Sjkim * Inline assembler section(s). 298238384Sjkim * ======================================================= 299238384Sjkim * Order of arguments is chosen to facilitate Windows port 300238384Sjkim * using __fastcall calling convention. If you wish to add 301238384Sjkim * more routines, keep in mind that first __fastcall 302238384Sjkim * argument is passed in %ecx and second - in %edx. 303238384Sjkim * ======================================================= 304238384Sjkim */ 305280304Sjkim# if defined(__GNUC__) && __GNUC__>=2 306238384Sjkim/* 307238384Sjkim * As for excessive "push %ebx"/"pop %ebx" found all over. 308238384Sjkim * When generating position-independent code GCC won't let 309238384Sjkim * us use "b" in assembler templates nor even respect "ebx" 310238384Sjkim * in "clobber description." Therefore the trouble... 311238384Sjkim */ 312238384Sjkim 313280304Sjkim/* 314280304Sjkim * Helper function - check if a CPUID instruction is available on this CPU 315280304Sjkim */ 316280304Sjkimstatic int padlock_insn_cpuid_available(void) 317238384Sjkim{ 318280304Sjkim int result = -1; 319238384Sjkim 320280304Sjkim /* 321280304Sjkim * We're checking if the bit #21 of EFLAGS can be toggled. If yes = 322280304Sjkim * CPUID is available. 323280304Sjkim */ 324280304Sjkim asm volatile ("pushf\n" 325280304Sjkim "popl %%eax\n" 326280304Sjkim "xorl $0x200000, %%eax\n" 327280304Sjkim "movl %%eax, %%ecx\n" 328280304Sjkim "andl $0x200000, %%ecx\n" 329280304Sjkim "pushl %%eax\n" 330280304Sjkim "popf\n" 331280304Sjkim "pushf\n" 332280304Sjkim "popl %%eax\n" 333280304Sjkim "andl $0x200000, %%eax\n" 334280304Sjkim "xorl %%eax, %%ecx\n" 335280304Sjkim "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); 336280304Sjkim 337280304Sjkim return (result == 0); 338238384Sjkim} 339238384Sjkim 340280304Sjkim/* 341280304Sjkim * Load supported features of the CPU to see if the PadLock is available. 342280304Sjkim */ 343280304Sjkimstatic int padlock_available(void) 344238384Sjkim{ 345280304Sjkim char vendor_string[16]; 346280304Sjkim unsigned int eax, edx; 347238384Sjkim 348280304Sjkim /* First check if the CPUID instruction is available at all... */ 349280304Sjkim if (!padlock_insn_cpuid_available()) 350280304Sjkim return 0; 351238384Sjkim 352280304Sjkim /* Are we running on the Centaur (VIA) CPU? */ 353280304Sjkim eax = 0x00000000; 354280304Sjkim vendor_string[12] = 0; 355280304Sjkim asm volatile ("pushl %%ebx\n" 356280304Sjkim "cpuid\n" 357280304Sjkim "movl %%ebx,(%%edi)\n" 358280304Sjkim "movl %%edx,4(%%edi)\n" 359280304Sjkim "movl %%ecx,8(%%edi)\n" 360280304Sjkim "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); 361280304Sjkim if (strcmp(vendor_string, "CentaurHauls") != 0) 362280304Sjkim return 0; 363238384Sjkim 364280304Sjkim /* Check for Centaur Extended Feature Flags presence */ 365280304Sjkim eax = 0xC0000000; 366280304Sjkim asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); 367280304Sjkim if (eax < 0xC0000001) 368280304Sjkim return 0; 369238384Sjkim 370280304Sjkim /* Read the Centaur Extended Feature Flags */ 371280304Sjkim eax = 0xC0000001; 372280304Sjkim asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), 373280304Sjkim "=d"(edx)::"ecx"); 374238384Sjkim 375280304Sjkim /* Fill up some flags */ 376280304Sjkim padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); 377280304Sjkim padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); 378238384Sjkim 379280304Sjkim return padlock_use_ace + padlock_use_rng; 380238384Sjkim} 381238384Sjkim 382280304Sjkim# ifndef OPENSSL_NO_AES 383280304Sjkim# ifndef AES_ASM 384238384Sjkim/* Our own htonl()/ntohl() */ 385280304Sjkimstatic inline void padlock_bswapl(AES_KEY *ks) 386238384Sjkim{ 387280304Sjkim size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); 388280304Sjkim unsigned int *key = ks->rd_key; 389238384Sjkim 390280304Sjkim while (i--) { 391280304Sjkim asm volatile ("bswapl %0":"+r" (*key)); 392280304Sjkim key++; 393280304Sjkim } 394238384Sjkim} 395280304Sjkim# endif 396280304Sjkim# endif 397238384Sjkim 398280304Sjkim/* 399280304Sjkim * Force key reload from memory to the CPU microcode. Loading EFLAGS from the 400280304Sjkim * stack clears EFLAGS[30] which does the trick. 401280304Sjkim */ 402280304Sjkimstatic inline void padlock_reload_key(void) 403238384Sjkim{ 404280304Sjkim asm volatile ("pushfl; popfl"); 405238384Sjkim} 406238384Sjkim 407280304Sjkim# ifndef OPENSSL_NO_AES 408238384Sjkim/* 409238384Sjkim * This is heuristic key context tracing. At first one 410238384Sjkim * believes that one should use atomic swap instructions, 411238384Sjkim * but it's not actually necessary. Point is that if 412238384Sjkim * padlock_saved_context was changed by another thread 413238384Sjkim * after we've read it and before we compare it with cdata, 414238384Sjkim * our key *shall* be reloaded upon thread context switch 415238384Sjkim * and we are therefore set in either case... 416238384Sjkim */ 417280304Sjkimstatic inline void padlock_verify_context(struct padlock_cipher_data *cdata) 418238384Sjkim{ 419280304Sjkim asm volatile ("pushfl\n" 420280304Sjkim " btl $30,(%%esp)\n" 421280304Sjkim " jnc 1f\n" 422280304Sjkim " cmpl %2,%1\n" 423280304Sjkim " je 1f\n" 424280304Sjkim " popfl\n" 425280304Sjkim " subl $4,%%esp\n" 426280304Sjkim "1: addl $4,%%esp\n" 427280304Sjkim " movl %2,%0":"+m" (padlock_saved_context) 428280304Sjkim :"r"(padlock_saved_context), "r"(cdata):"cc"); 429238384Sjkim} 430238384Sjkim 431238384Sjkim/* Template for padlock_xcrypt_* modes */ 432280304Sjkim/* 433280304Sjkim * BIG FAT WARNING: The offsets used with 'leal' instructions describe items 434280304Sjkim * of the 'padlock_cipher_data' structure. 435238384Sjkim */ 436280304Sjkim# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 437280304Sjkimstatic inline void *name(size_t cnt, \ 438280304Sjkim struct padlock_cipher_data *cdata, \ 439280304Sjkim void *out, const void *inp) \ 440280304Sjkim{ void *iv; \ 441280304Sjkim asm volatile ( "pushl %%ebx\n" \ 442280304Sjkim " leal 16(%0),%%edx\n" \ 443280304Sjkim " leal 32(%0),%%ebx\n" \ 444280304Sjkim rep_xcrypt "\n" \ 445280304Sjkim " popl %%ebx" \ 446280304Sjkim : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 447280304Sjkim : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 448280304Sjkim : "edx", "cc", "memory"); \ 449280304Sjkim return iv; \ 450238384Sjkim} 451238384Sjkim 452238384Sjkim/* Generate all functions with appropriate opcodes */ 453280304Sjkim/* rep xcryptecb */ 454280304SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") 455280304Sjkim/* rep xcryptcbc */ 456280304Sjkim PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") 457280304Sjkim/* rep xcryptcfb */ 458280304Sjkim PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") 459280304Sjkim/* rep xcryptofb */ 460280304Sjkim PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") 461280304Sjkim# endif 462238384Sjkim/* The RNG call itself */ 463280304Sjkimstatic inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) 464238384Sjkim{ 465280304Sjkim unsigned int eax_out; 466238384Sjkim 467280304Sjkim asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 468280304Sjkim :"=a" (eax_out), "=m"(*(unsigned *)addr) 469280304Sjkim :"D"(addr), "d"(edx_in) 470280304Sjkim ); 471238384Sjkim 472280304Sjkim return eax_out; 473238384Sjkim} 474238384Sjkim 475280304Sjkim/* 476280304Sjkim * Why not inline 'rep movsd'? I failed to find information on what value in 477280304Sjkim * Direction Flag one can expect and consequently have to apply 478280304Sjkim * "better-safe-than-sorry" approach and assume "undefined." I could 479280304Sjkim * explicitly clear it and restore the original value upon return from 480280304Sjkim * padlock_aes_cipher, but it's presumably too much trouble for too little 481280304Sjkim * gain... In case you wonder 'rep xcrypt*' instructions above are *not* 482280304Sjkim * affected by the Direction Flag and pointers advance toward larger 483280304Sjkim * addresses unconditionally. 484280304Sjkim */ 485280304Sjkimstatic inline unsigned char *padlock_memcpy(void *dst, const void *src, 486280304Sjkim size_t n) 487238384Sjkim{ 488280304Sjkim long *d = dst; 489280304Sjkim const long *s = src; 490238384Sjkim 491280304Sjkim n /= sizeof(*d); 492280304Sjkim do { 493280304Sjkim *d++ = *s++; 494280304Sjkim } while (--n); 495238384Sjkim 496280304Sjkim return dst; 497238384Sjkim} 498238384Sjkim 499280304Sjkim# elif defined(_MSC_VER) 500238384Sjkim/* 501238384Sjkim * Unlike GCC these are real functions. In order to minimize impact 502238384Sjkim * on performance we adhere to __fastcall calling convention in 503238384Sjkim * order to get two first arguments passed through %ecx and %edx. 504238384Sjkim * Which kind of suits very well, as instructions in question use 505238384Sjkim * both %ecx and %edx as input:-) 506238384Sjkim */ 507280304Sjkim# define REP_XCRYPT(code) \ 508280304Sjkim _asm _emit 0xf3 \ 509280304Sjkim _asm _emit 0x0f _asm _emit 0xa7 \ 510280304Sjkim _asm _emit code 511238384Sjkim 512280304Sjkim/* 513280304Sjkim * BIG FAT WARNING: The offsets used with 'lea' instructions describe items 514280304Sjkim * of the 'padlock_cipher_data' structure. 515238384Sjkim */ 516280304Sjkim# define PADLOCK_XCRYPT_ASM(name,code) \ 517280304Sjkimstatic void * __fastcall \ 518280304Sjkim name (size_t cnt, void *cdata, \ 519280304Sjkim void *outp, const void *inp) \ 520280304Sjkim{ _asm mov eax,edx \ 521280304Sjkim _asm lea edx,[eax+16] \ 522280304Sjkim _asm lea ebx,[eax+32] \ 523280304Sjkim _asm mov edi,outp \ 524280304Sjkim _asm mov esi,inp \ 525280304Sjkim REP_XCRYPT(code) \ 526238384Sjkim} 527238384Sjkim 528238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 529238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 530238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 531238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 532238384Sjkim 533280304Sjkimstatic int __fastcall padlock_xstore(void *outp, unsigned int code) 534280304Sjkim{ 535280304Sjkim _asm mov edi,ecx 536280304Sjkim _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 537238384Sjkim} 538238384Sjkim 539280304Sjkimstatic void __fastcall padlock_reload_key(void) 540280304Sjkim{ 541280304Sjkim _asm pushfd 542280304Sjkim _asm popfd 543280304Sjkim} 544238384Sjkim 545280304Sjkimstatic void __fastcall padlock_verify_context(void *cdata) 546280304Sjkim{ 547280304Sjkim _asm { 548280304Sjkim pushfd 549280304Sjkim bt DWORD PTR[esp],30 550280304Sjkim jnc skip 551280304Sjkim cmp ecx,padlock_saved_context 552280304Sjkim je skip 553280304Sjkim popfd 554280304Sjkim sub esp,4 555280304Sjkim skip: add esp,4 556280304Sjkim mov padlock_saved_context,ecx 557280304Sjkim } 558238384Sjkim} 559238384Sjkim 560238384Sjkimstatic int 561238384Sjkimpadlock_available(void) 562280304Sjkim{ 563280304Sjkim _asm { 564280304Sjkim pushfd 565280304Sjkim pop eax 566280304Sjkim mov ecx,eax 567280304Sjkim xor eax,1<<21 568280304Sjkim push eax 569280304Sjkim popfd 570280304Sjkim pushfd 571280304Sjkim pop eax 572280304Sjkim xor eax,ecx 573280304Sjkim bt eax,21 574280304Sjkim jnc noluck 575280304Sjkim mov eax,0 576280304Sjkim cpuid 577280304Sjkim xor eax,eax 578280304Sjkim cmp ebx,'tneC' 579280304Sjkim jne noluck 580280304Sjkim cmp edx,'Hrua' 581280304Sjkim jne noluck 582280304Sjkim cmp ecx,'slua' 583280304Sjkim jne noluck 584280304Sjkim mov eax,0xC0000000 585280304Sjkim cpuid 586280304Sjkim mov edx,eax 587280304Sjkim xor eax,eax 588280304Sjkim cmp edx,0xC0000001 589280304Sjkim jb noluck 590280304Sjkim mov eax,0xC0000001 591280304Sjkim cpuid 592280304Sjkim xor eax,eax 593280304Sjkim bt edx,6 594280304Sjkim jnc skip_a 595280304Sjkim bt edx,7 596280304Sjkim jnc skip_a 597280304Sjkim mov padlock_use_ace,1 598280304Sjkim inc eax 599280304Sjkim skip_a: bt edx,2 600280304Sjkim jnc skip_r 601280304Sjkim bt edx,3 602280304Sjkim jnc skip_r 603280304Sjkim mov padlock_use_rng,1 604280304Sjkim inc eax 605280304Sjkim skip_r: 606280304Sjkim noluck: 607280304Sjkim } 608238384Sjkim} 609238384Sjkim 610280304Sjkimstatic void __fastcall padlock_bswapl(void *key) 611280304Sjkim{ 612280304Sjkim _asm { 613280304Sjkim pushfd 614280304Sjkim cld 615280304Sjkim mov esi,ecx 616280304Sjkim mov edi,ecx 617280304Sjkim mov ecx,60 618280304Sjkim up: lodsd 619280304Sjkim bswap eax 620280304Sjkim stosd 621280304Sjkim loop up 622280304Sjkim popfd 623280304Sjkim } 624238384Sjkim} 625238384Sjkim 626280304Sjkim/* 627280304Sjkim * MS actually specifies status of Direction Flag and compiler even manages 628280304Sjkim * to compile following as 'rep movsd' all by itself... 629238384Sjkim */ 630280304Sjkim# define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 631280304Sjkim# endif 632238384Sjkim/* ===== AES encryption/decryption ===== */ 633280304Sjkim# ifndef OPENSSL_NO_AES 634280304Sjkim# if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 635280304Sjkim# define NID_aes_128_cfb NID_aes_128_cfb128 636280304Sjkim# endif 637280304Sjkim# if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 638280304Sjkim# define NID_aes_128_ofb NID_aes_128_ofb128 639280304Sjkim# endif 640280304Sjkim# if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 641280304Sjkim# define NID_aes_192_cfb NID_aes_192_cfb128 642280304Sjkim# endif 643280304Sjkim# if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 644280304Sjkim# define NID_aes_192_ofb NID_aes_192_ofb128 645280304Sjkim# endif 646280304Sjkim# if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 647280304Sjkim# define NID_aes_256_cfb NID_aes_256_cfb128 648280304Sjkim# endif 649280304Sjkim# if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 650280304Sjkim# define NID_aes_256_ofb NID_aes_256_ofb128 651280304Sjkim# endif 652280304Sjkim/* 653280304Sjkim * List of supported ciphers. 654280304Sjkim */ static int padlock_cipher_nids[] = { 655280304Sjkim NID_aes_128_ecb, 656280304Sjkim NID_aes_128_cbc, 657280304Sjkim NID_aes_128_cfb, 658280304Sjkim NID_aes_128_ofb, 659238384Sjkim 660280304Sjkim NID_aes_192_ecb, 661280304Sjkim NID_aes_192_cbc, 662280304Sjkim NID_aes_192_cfb, 663280304Sjkim NID_aes_192_ofb, 664238384Sjkim 665280304Sjkim NID_aes_256_ecb, 666280304Sjkim NID_aes_256_cbc, 667280304Sjkim NID_aes_256_cfb, 668280304Sjkim NID_aes_256_ofb, 669280304Sjkim}; 670238384Sjkim 671280304Sjkimstatic int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / 672280304Sjkim sizeof(padlock_cipher_nids[0])); 673238384Sjkim 674238384Sjkim/* Function prototypes ... */ 675238384Sjkimstatic int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 676280304Sjkim const unsigned char *iv, int enc); 677238384Sjkimstatic int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 678280304Sjkim const unsigned char *in, size_t nbytes); 679238384Sjkim 680280304Sjkim# define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 681280304Sjkim ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 682280304Sjkim# define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 683280304Sjkim NEAREST_ALIGNED(ctx->cipher_data)) 684238384Sjkim 685280304Sjkim# define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 686280304Sjkim# define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 687280304Sjkim# define EVP_CIPHER_block_size_OFB 1 688280304Sjkim# define EVP_CIPHER_block_size_CFB 1 689238384Sjkim 690280304Sjkim/* 691280304Sjkim * Declaring so many ciphers by hand would be a pain. Instead introduce a bit 692280304Sjkim * of preprocessor magic :-) 693280304Sjkim */ 694280304Sjkim# define DECLARE_AES_EVP(ksize,lmode,umode) \ 695280304Sjkimstatic const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 696280304Sjkim NID_aes_##ksize##_##lmode, \ 697280304Sjkim EVP_CIPHER_block_size_##umode, \ 698280304Sjkim AES_KEY_SIZE_##ksize, \ 699280304Sjkim AES_BLOCK_SIZE, \ 700280304Sjkim 0 | EVP_CIPH_##umode##_MODE, \ 701280304Sjkim padlock_aes_init_key, \ 702280304Sjkim padlock_aes_cipher, \ 703280304Sjkim NULL, \ 704280304Sjkim sizeof(struct padlock_cipher_data) + 16, \ 705280304Sjkim EVP_CIPHER_set_asn1_iv, \ 706280304Sjkim EVP_CIPHER_get_asn1_iv, \ 707280304Sjkim NULL, \ 708280304Sjkim NULL \ 709238384Sjkim} 710238384Sjkim 711280304SjkimDECLARE_AES_EVP(128, ecb, ECB); 712280304SjkimDECLARE_AES_EVP(128, cbc, CBC); 713280304SjkimDECLARE_AES_EVP(128, cfb, CFB); 714280304SjkimDECLARE_AES_EVP(128, ofb, OFB); 715238384Sjkim 716280304SjkimDECLARE_AES_EVP(192, ecb, ECB); 717280304SjkimDECLARE_AES_EVP(192, cbc, CBC); 718280304SjkimDECLARE_AES_EVP(192, cfb, CFB); 719280304SjkimDECLARE_AES_EVP(192, ofb, OFB); 720238384Sjkim 721280304SjkimDECLARE_AES_EVP(256, ecb, ECB); 722280304SjkimDECLARE_AES_EVP(256, cbc, CBC); 723280304SjkimDECLARE_AES_EVP(256, cfb, CFB); 724280304SjkimDECLARE_AES_EVP(256, ofb, OFB); 725238384Sjkim 726238384Sjkimstatic int 727280304Sjkimpadlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, 728280304Sjkim int nid) 729238384Sjkim{ 730280304Sjkim /* No specific cipher => return a list of supported nids ... */ 731280304Sjkim if (!cipher) { 732280304Sjkim *nids = padlock_cipher_nids; 733280304Sjkim return padlock_cipher_nids_num; 734280304Sjkim } 735238384Sjkim 736280304Sjkim /* ... or the requested "cipher" otherwise */ 737280304Sjkim switch (nid) { 738280304Sjkim case NID_aes_128_ecb: 739280304Sjkim *cipher = &padlock_aes_128_ecb; 740280304Sjkim break; 741280304Sjkim case NID_aes_128_cbc: 742280304Sjkim *cipher = &padlock_aes_128_cbc; 743280304Sjkim break; 744280304Sjkim case NID_aes_128_cfb: 745280304Sjkim *cipher = &padlock_aes_128_cfb; 746280304Sjkim break; 747280304Sjkim case NID_aes_128_ofb: 748280304Sjkim *cipher = &padlock_aes_128_ofb; 749280304Sjkim break; 750238384Sjkim 751280304Sjkim case NID_aes_192_ecb: 752280304Sjkim *cipher = &padlock_aes_192_ecb; 753280304Sjkim break; 754280304Sjkim case NID_aes_192_cbc: 755280304Sjkim *cipher = &padlock_aes_192_cbc; 756280304Sjkim break; 757280304Sjkim case NID_aes_192_cfb: 758280304Sjkim *cipher = &padlock_aes_192_cfb; 759280304Sjkim break; 760280304Sjkim case NID_aes_192_ofb: 761280304Sjkim *cipher = &padlock_aes_192_ofb; 762280304Sjkim break; 763238384Sjkim 764280304Sjkim case NID_aes_256_ecb: 765280304Sjkim *cipher = &padlock_aes_256_ecb; 766280304Sjkim break; 767280304Sjkim case NID_aes_256_cbc: 768280304Sjkim *cipher = &padlock_aes_256_cbc; 769280304Sjkim break; 770280304Sjkim case NID_aes_256_cfb: 771280304Sjkim *cipher = &padlock_aes_256_cfb; 772280304Sjkim break; 773280304Sjkim case NID_aes_256_ofb: 774280304Sjkim *cipher = &padlock_aes_256_ofb; 775280304Sjkim break; 776238384Sjkim 777280304Sjkim default: 778280304Sjkim /* Sorry, we don't support this NID */ 779280304Sjkim *cipher = NULL; 780280304Sjkim return 0; 781280304Sjkim } 782238384Sjkim 783280304Sjkim return 1; 784238384Sjkim} 785238384Sjkim 786238384Sjkim/* Prepare the encryption key for PadLock usage */ 787238384Sjkimstatic int 788280304Sjkimpadlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 789280304Sjkim const unsigned char *iv, int enc) 790238384Sjkim{ 791280304Sjkim struct padlock_cipher_data *cdata; 792280304Sjkim int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 793238384Sjkim 794280304Sjkim if (key == NULL) 795280304Sjkim return 0; /* ERROR */ 796238384Sjkim 797280304Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 798280304Sjkim memset(cdata, 0, sizeof(struct padlock_cipher_data)); 799238384Sjkim 800280304Sjkim /* Prepare Control word. */ 801280304Sjkim if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 802280304Sjkim cdata->cword.b.encdec = 0; 803280304Sjkim else 804280304Sjkim cdata->cword.b.encdec = (ctx->encrypt == 0); 805280304Sjkim cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 806280304Sjkim cdata->cword.b.ksize = (key_len - 128) / 64; 807238384Sjkim 808280304Sjkim switch (key_len) { 809280304Sjkim case 128: 810280304Sjkim /* 811280304Sjkim * PadLock can generate an extended key for AES128 in hardware 812280304Sjkim */ 813280304Sjkim memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 814280304Sjkim cdata->cword.b.keygen = 0; 815280304Sjkim break; 816238384Sjkim 817280304Sjkim case 192: 818280304Sjkim case 256: 819280304Sjkim /* 820280304Sjkim * Generate an extended AES key in software. Needed for AES192/AES256 821280304Sjkim */ 822280304Sjkim /* 823280304Sjkim * Well, the above applies to Stepping 8 CPUs and is listed as 824280304Sjkim * hardware errata. They most likely will fix it at some point and 825280304Sjkim * then a check for stepping would be due here. 826280304Sjkim */ 827280304Sjkim if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 828280304Sjkim EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) 829280304Sjkim AES_set_encrypt_key(key, key_len, &cdata->ks); 830280304Sjkim else 831280304Sjkim AES_set_decrypt_key(key, key_len, &cdata->ks); 832280304Sjkim# ifndef AES_ASM 833280304Sjkim /* 834280304Sjkim * OpenSSL C functions use byte-swapped extended key. 835280304Sjkim */ 836280304Sjkim padlock_bswapl(&cdata->ks); 837280304Sjkim# endif 838280304Sjkim cdata->cword.b.keygen = 1; 839280304Sjkim break; 840238384Sjkim 841280304Sjkim default: 842280304Sjkim /* ERROR */ 843280304Sjkim return 0; 844280304Sjkim } 845238384Sjkim 846280304Sjkim /* 847280304Sjkim * This is done to cover for cases when user reuses the 848280304Sjkim * context for new key. The catch is that if we don't do 849280304Sjkim * this, padlock_eas_cipher might proceed with old key... 850280304Sjkim */ 851280304Sjkim padlock_reload_key(); 852238384Sjkim 853280304Sjkim return 1; 854238384Sjkim} 855238384Sjkim 856280304Sjkim/*- 857238384Sjkim * Simplified version of padlock_aes_cipher() used when 858238384Sjkim * 1) both input and output buffers are at aligned addresses. 859238384Sjkim * or when 860238384Sjkim * 2) running on a newer CPU that doesn't require aligned buffers. 861238384Sjkim */ 862238384Sjkimstatic int 863238384Sjkimpadlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 864280304Sjkim const unsigned char *in_arg, size_t nbytes) 865238384Sjkim{ 866280304Sjkim struct padlock_cipher_data *cdata; 867280304Sjkim void *iv; 868238384Sjkim 869280304Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 870280304Sjkim padlock_verify_context(cdata); 871238384Sjkim 872280304Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 873280304Sjkim case EVP_CIPH_ECB_MODE: 874280304Sjkim padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 875280304Sjkim break; 876238384Sjkim 877280304Sjkim case EVP_CIPH_CBC_MODE: 878280304Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 879280304Sjkim iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 880280304Sjkim in_arg); 881280304Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 882280304Sjkim break; 883238384Sjkim 884280304Sjkim case EVP_CIPH_CFB_MODE: 885280304Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 886280304Sjkim iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 887280304Sjkim in_arg); 888280304Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 889280304Sjkim break; 890238384Sjkim 891280304Sjkim case EVP_CIPH_OFB_MODE: 892280304Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 893280304Sjkim padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 894280304Sjkim memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 895280304Sjkim break; 896238384Sjkim 897280304Sjkim default: 898280304Sjkim return 0; 899280304Sjkim } 900238384Sjkim 901280304Sjkim memset(cdata->iv, 0, AES_BLOCK_SIZE); 902238384Sjkim 903280304Sjkim return 1; 904238384Sjkim} 905238384Sjkim 906280304Sjkim# ifndef PADLOCK_CHUNK 907280304Sjkim# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 908280304Sjkim# endif 909280304Sjkim# if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 910280304Sjkim# error "insane PADLOCK_CHUNK..." 911280304Sjkim# endif 912238384Sjkim 913280304Sjkim/* 914280304Sjkim * Re-align the arguments to 16-Bytes boundaries and run the encryption 915280304Sjkim * function itself. This function is not AES-specific. 916280304Sjkim */ 917238384Sjkimstatic int 918238384Sjkimpadlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 919280304Sjkim const unsigned char *in_arg, size_t nbytes) 920238384Sjkim{ 921280304Sjkim struct padlock_cipher_data *cdata; 922280304Sjkim const void *inp; 923280304Sjkim unsigned char *out; 924280304Sjkim void *iv; 925280304Sjkim int inp_misaligned, out_misaligned, realign_in_loop; 926280304Sjkim size_t chunk, allocated = 0; 927238384Sjkim 928280304Sjkim /* 929280304Sjkim * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... 930280304Sjkim */ 931280304Sjkim if ((chunk = ctx->num)) { /* borrow chunk variable */ 932280304Sjkim unsigned char *ivp = ctx->iv; 933238384Sjkim 934280304Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 935280304Sjkim case EVP_CIPH_CFB_MODE: 936280304Sjkim if (chunk >= AES_BLOCK_SIZE) 937280304Sjkim return 0; /* bogus value */ 938238384Sjkim 939280304Sjkim if (ctx->encrypt) 940280304Sjkim while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 941280304Sjkim ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 942280304Sjkim chunk++, nbytes--; 943280304Sjkim } else 944280304Sjkim while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 945280304Sjkim unsigned char c = *(in_arg++); 946280304Sjkim *(out_arg++) = c ^ ivp[chunk]; 947280304Sjkim ivp[chunk++] = c, nbytes--; 948280304Sjkim } 949238384Sjkim 950280304Sjkim ctx->num = chunk % AES_BLOCK_SIZE; 951280304Sjkim break; 952280304Sjkim case EVP_CIPH_OFB_MODE: 953280304Sjkim if (chunk >= AES_BLOCK_SIZE) 954280304Sjkim return 0; /* bogus value */ 955238384Sjkim 956280304Sjkim while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 957280304Sjkim *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 958280304Sjkim chunk++, nbytes--; 959280304Sjkim } 960238384Sjkim 961280304Sjkim ctx->num = chunk % AES_BLOCK_SIZE; 962280304Sjkim break; 963280304Sjkim } 964280304Sjkim } 965238384Sjkim 966280304Sjkim if (nbytes == 0) 967280304Sjkim return 1; 968280304Sjkim# if 0 969280304Sjkim if (nbytes % AES_BLOCK_SIZE) 970280304Sjkim return 0; /* are we expected to do tail processing? */ 971280304Sjkim# else 972280304Sjkim /* 973280304Sjkim * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and 974280304Sjkim * arbitrary value in byte-oriented modes, such as CFB and OFB... 975280304Sjkim */ 976280304Sjkim# endif 977238384Sjkim 978280304Sjkim /* 979280304Sjkim * VIA promises CPUs that won't require alignment in the future. For now 980280304Sjkim * padlock_aes_align_required is initialized to 1 and the condition is 981280304Sjkim * never met... 982280304Sjkim */ 983280304Sjkim /* 984280304Sjkim * C7 core is capable to manage unaligned input in non-ECB[!] mode, but 985280304Sjkim * performance penalties appear to be approximately same as for software 986280304Sjkim * alignment below or ~3x. They promise to improve it in the future, but 987280304Sjkim * for now we can just as well pretend that it can only handle aligned 988280304Sjkim * input... 989280304Sjkim */ 990280304Sjkim if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) 991280304Sjkim return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 992238384Sjkim 993280304Sjkim inp_misaligned = (((size_t)in_arg) & 0x0F); 994280304Sjkim out_misaligned = (((size_t)out_arg) & 0x0F); 995238384Sjkim 996280304Sjkim /* 997280304Sjkim * Note that even if output is aligned and input not, I still prefer to 998280304Sjkim * loop instead of copy the whole input and then encrypt in one stroke. 999280304Sjkim * This is done in order to improve L1 cache utilization... 1000280304Sjkim */ 1001280304Sjkim realign_in_loop = out_misaligned | inp_misaligned; 1002238384Sjkim 1003280304Sjkim if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) 1004280304Sjkim return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 1005238384Sjkim 1006280304Sjkim /* this takes one "if" out of the loops */ 1007280304Sjkim chunk = nbytes; 1008280304Sjkim chunk %= PADLOCK_CHUNK; 1009280304Sjkim if (chunk == 0) 1010280304Sjkim chunk = PADLOCK_CHUNK; 1011238384Sjkim 1012280304Sjkim if (out_misaligned) { 1013280304Sjkim /* optmize for small input */ 1014280304Sjkim allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); 1015280304Sjkim out = alloca(0x10 + allocated); 1016280304Sjkim out = NEAREST_ALIGNED(out); 1017280304Sjkim } else 1018280304Sjkim out = out_arg; 1019238384Sjkim 1020280304Sjkim cdata = ALIGNED_CIPHER_DATA(ctx); 1021280304Sjkim padlock_verify_context(cdata); 1022238384Sjkim 1023280304Sjkim switch (EVP_CIPHER_CTX_mode(ctx)) { 1024280304Sjkim case EVP_CIPH_ECB_MODE: 1025280304Sjkim do { 1026280304Sjkim if (inp_misaligned) 1027280304Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1028280304Sjkim else 1029280304Sjkim inp = in_arg; 1030280304Sjkim in_arg += chunk; 1031238384Sjkim 1032280304Sjkim padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1033238384Sjkim 1034280304Sjkim if (out_misaligned) 1035280304Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1036280304Sjkim else 1037280304Sjkim out = out_arg += chunk; 1038238384Sjkim 1039280304Sjkim nbytes -= chunk; 1040280304Sjkim chunk = PADLOCK_CHUNK; 1041280304Sjkim } while (nbytes); 1042280304Sjkim break; 1043238384Sjkim 1044280304Sjkim case EVP_CIPH_CBC_MODE: 1045280304Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1046280304Sjkim goto cbc_shortcut; 1047280304Sjkim do { 1048280304Sjkim if (iv != cdata->iv) 1049280304Sjkim memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1050280304Sjkim chunk = PADLOCK_CHUNK; 1051280304Sjkim cbc_shortcut: /* optimize for small input */ 1052280304Sjkim if (inp_misaligned) 1053280304Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1054280304Sjkim else 1055280304Sjkim inp = in_arg; 1056280304Sjkim in_arg += chunk; 1057238384Sjkim 1058280304Sjkim iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1059238384Sjkim 1060280304Sjkim if (out_misaligned) 1061280304Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1062280304Sjkim else 1063280304Sjkim out = out_arg += chunk; 1064238384Sjkim 1065280304Sjkim } while (nbytes -= chunk); 1066280304Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1067280304Sjkim break; 1068238384Sjkim 1069280304Sjkim case EVP_CIPH_CFB_MODE: 1070280304Sjkim memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1071280304Sjkim chunk &= ~(AES_BLOCK_SIZE - 1); 1072280304Sjkim if (chunk) 1073280304Sjkim goto cfb_shortcut; 1074280304Sjkim else 1075280304Sjkim goto cfb_skiploop; 1076280304Sjkim do { 1077280304Sjkim if (iv != cdata->iv) 1078280304Sjkim memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1079280304Sjkim chunk = PADLOCK_CHUNK; 1080280304Sjkim cfb_shortcut: /* optimize for small input */ 1081280304Sjkim if (inp_misaligned) 1082280304Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1083280304Sjkim else 1084280304Sjkim inp = in_arg; 1085280304Sjkim in_arg += chunk; 1086238384Sjkim 1087280304Sjkim iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1088238384Sjkim 1089280304Sjkim if (out_misaligned) 1090280304Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1091280304Sjkim else 1092280304Sjkim out = out_arg += chunk; 1093238384Sjkim 1094280304Sjkim nbytes -= chunk; 1095280304Sjkim } while (nbytes >= AES_BLOCK_SIZE); 1096238384Sjkim 1097280304Sjkim cfb_skiploop: 1098280304Sjkim if (nbytes) { 1099280304Sjkim unsigned char *ivp = cdata->iv; 1100238384Sjkim 1101280304Sjkim if (iv != ivp) { 1102280304Sjkim memcpy(ivp, iv, AES_BLOCK_SIZE); 1103280304Sjkim iv = ivp; 1104280304Sjkim } 1105280304Sjkim ctx->num = nbytes; 1106280304Sjkim if (cdata->cword.b.encdec) { 1107280304Sjkim cdata->cword.b.encdec = 0; 1108280304Sjkim padlock_reload_key(); 1109280304Sjkim padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1110280304Sjkim cdata->cword.b.encdec = 1; 1111280304Sjkim padlock_reload_key(); 1112280304Sjkim while (nbytes) { 1113280304Sjkim unsigned char c = *(in_arg++); 1114280304Sjkim *(out_arg++) = c ^ *ivp; 1115280304Sjkim *(ivp++) = c, nbytes--; 1116280304Sjkim } 1117280304Sjkim } else { 1118280304Sjkim padlock_reload_key(); 1119280304Sjkim padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1120280304Sjkim padlock_reload_key(); 1121280304Sjkim while (nbytes) { 1122280304Sjkim *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1123280304Sjkim ivp++, nbytes--; 1124280304Sjkim } 1125280304Sjkim } 1126280304Sjkim } 1127238384Sjkim 1128280304Sjkim memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1129280304Sjkim break; 1130238384Sjkim 1131280304Sjkim case EVP_CIPH_OFB_MODE: 1132280304Sjkim memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1133280304Sjkim chunk &= ~(AES_BLOCK_SIZE - 1); 1134280304Sjkim if (chunk) 1135280304Sjkim do { 1136280304Sjkim if (inp_misaligned) 1137280304Sjkim inp = padlock_memcpy(out, in_arg, chunk); 1138280304Sjkim else 1139280304Sjkim inp = in_arg; 1140280304Sjkim in_arg += chunk; 1141238384Sjkim 1142280304Sjkim padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1143238384Sjkim 1144280304Sjkim if (out_misaligned) 1145280304Sjkim out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1146280304Sjkim else 1147280304Sjkim out = out_arg += chunk; 1148238384Sjkim 1149280304Sjkim nbytes -= chunk; 1150280304Sjkim chunk = PADLOCK_CHUNK; 1151280304Sjkim } while (nbytes >= AES_BLOCK_SIZE); 1152238384Sjkim 1153280304Sjkim if (nbytes) { 1154280304Sjkim unsigned char *ivp = cdata->iv; 1155238384Sjkim 1156280304Sjkim ctx->num = nbytes; 1157280304Sjkim padlock_reload_key(); /* empirically found */ 1158280304Sjkim padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1159280304Sjkim padlock_reload_key(); /* empirically found */ 1160280304Sjkim while (nbytes) { 1161280304Sjkim *(out_arg++) = *(in_arg++) ^ *ivp; 1162280304Sjkim ivp++, nbytes--; 1163280304Sjkim } 1164280304Sjkim } 1165238384Sjkim 1166280304Sjkim memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1167280304Sjkim break; 1168238384Sjkim 1169280304Sjkim default: 1170280304Sjkim return 0; 1171280304Sjkim } 1172238384Sjkim 1173280304Sjkim /* Clean the realign buffer if it was used */ 1174280304Sjkim if (out_misaligned) { 1175280304Sjkim volatile unsigned long *p = (void *)out; 1176280304Sjkim size_t n = allocated / sizeof(*p); 1177280304Sjkim while (n--) 1178280304Sjkim *p++ = 0; 1179280304Sjkim } 1180238384Sjkim 1181280304Sjkim memset(cdata->iv, 0, AES_BLOCK_SIZE); 1182238384Sjkim 1183280304Sjkim return 1; 1184238384Sjkim} 1185238384Sjkim 1186280304Sjkim# endif /* OPENSSL_NO_AES */ 1187238384Sjkim 1188238384Sjkim/* ===== Random Number Generator ===== */ 1189238384Sjkim/* 1190238384Sjkim * This code is not engaged. The reason is that it does not comply 1191238384Sjkim * with recommendations for VIA RNG usage for secure applications 1192238384Sjkim * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1193238384Sjkim * provide meaningful error control... 1194238384Sjkim */ 1195280304Sjkim/* 1196280304Sjkim * Wrapper that provides an interface between the API and the raw PadLock 1197280304Sjkim * RNG 1198280304Sjkim */ 1199280304Sjkimstatic int padlock_rand_bytes(unsigned char *output, int count) 1200238384Sjkim{ 1201280304Sjkim unsigned int eax, buf; 1202238384Sjkim 1203280304Sjkim while (count >= 8) { 1204280304Sjkim eax = padlock_xstore(output, 0); 1205280304Sjkim if (!(eax & (1 << 6))) 1206280304Sjkim return 0; /* RNG disabled */ 1207280304Sjkim /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1208280304Sjkim if (eax & (0x1F << 10)) 1209280304Sjkim return 0; 1210280304Sjkim if ((eax & 0x1F) == 0) 1211280304Sjkim continue; /* no data, retry... */ 1212280304Sjkim if ((eax & 0x1F) != 8) 1213280304Sjkim return 0; /* fatal failure... */ 1214280304Sjkim output += 8; 1215280304Sjkim count -= 8; 1216280304Sjkim } 1217280304Sjkim while (count > 0) { 1218280304Sjkim eax = padlock_xstore(&buf, 3); 1219280304Sjkim if (!(eax & (1 << 6))) 1220280304Sjkim return 0; /* RNG disabled */ 1221280304Sjkim /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1222280304Sjkim if (eax & (0x1F << 10)) 1223280304Sjkim return 0; 1224280304Sjkim if ((eax & 0x1F) == 0) 1225280304Sjkim continue; /* no data, retry... */ 1226280304Sjkim if ((eax & 0x1F) != 1) 1227280304Sjkim return 0; /* fatal failure... */ 1228280304Sjkim *output++ = (unsigned char)buf; 1229280304Sjkim count--; 1230280304Sjkim } 1231280304Sjkim *(volatile unsigned int *)&buf = 0; 1232238384Sjkim 1233280304Sjkim return 1; 1234238384Sjkim} 1235238384Sjkim 1236238384Sjkim/* Dummy but necessary function */ 1237280304Sjkimstatic int padlock_rand_status(void) 1238238384Sjkim{ 1239280304Sjkim return 1; 1240238384Sjkim} 1241238384Sjkim 1242238384Sjkim/* Prepare structure for registration */ 1243238384Sjkimstatic RAND_METHOD padlock_rand = { 1244280304Sjkim NULL, /* seed */ 1245280304Sjkim padlock_rand_bytes, /* bytes */ 1246280304Sjkim NULL, /* cleanup */ 1247280304Sjkim NULL, /* add */ 1248280304Sjkim padlock_rand_bytes, /* pseudorand */ 1249280304Sjkim padlock_rand_status, /* rand status */ 1250238384Sjkim}; 1251238384Sjkim 1252280304Sjkim# else /* !COMPILE_HW_PADLOCK */ 1253280304Sjkim# ifndef OPENSSL_NO_DYNAMIC_ENGINE 1254238384SjkimOPENSSL_EXPORT 1255280304Sjkim int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); 1256238384SjkimOPENSSL_EXPORT 1257280304Sjkim int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) 1258280304Sjkim{ 1259280304Sjkim return 0; 1260280304Sjkim} 1261280304Sjkim 1262238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 1263280304Sjkim# endif 1264280304Sjkim# endif /* COMPILE_HW_PADLOCK */ 1265280304Sjkim# endif /* !OPENSSL_NO_HW_PADLOCK */ 1266280304Sjkim#endif /* !OPENSSL_NO_HW */ 1267