1296341Sdelphij/*- 2238384Sjkim * Support for VIA PadLock Advanced Cryptography Engine (ACE) 3238384Sjkim * Written by Michal Ludvig <michal@logix.cz> 4238384Sjkim * http://www.logix.cz/michal 5238384Sjkim * 6296341Sdelphij * Big thanks to Andy Polyakov for a help with optimization, 7296341Sdelphij * assembler fixes, port to MS Windows and a lot of other 8238384Sjkim * valuable work on this engine! 9238384Sjkim */ 10238384Sjkim 11238384Sjkim/* ==================================================================== 12238384Sjkim * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. 13238384Sjkim * 14238384Sjkim * Redistribution and use in source and binary forms, with or without 15238384Sjkim * modification, are permitted provided that the following conditions 16238384Sjkim * are met: 17238384Sjkim * 18238384Sjkim * 1. Redistributions of source code must retain the above copyright 19238384Sjkim * notice, this list of conditions and the following disclaimer. 20238384Sjkim * 21238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 22238384Sjkim * notice, this list of conditions and the following disclaimer in 23238384Sjkim * the documentation and/or other materials provided with the 24238384Sjkim * distribution. 25238384Sjkim * 26238384Sjkim * 3. All advertising materials mentioning features or use of this 27238384Sjkim * software must display the following acknowledgment: 28238384Sjkim * "This product includes software developed by the OpenSSL Project 29238384Sjkim * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 30238384Sjkim * 31238384Sjkim * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 32238384Sjkim * endorse or promote products derived from this software without 33238384Sjkim * prior written permission. For written permission, please contact 34238384Sjkim * licensing@OpenSSL.org. 35238384Sjkim * 36238384Sjkim * 5. Products derived from this software may not be called "OpenSSL" 37238384Sjkim * nor may "OpenSSL" appear in their names without prior written 38238384Sjkim * permission of the OpenSSL Project. 39238384Sjkim * 40238384Sjkim * 6. Redistributions of any form whatsoever must retain the following 41238384Sjkim * acknowledgment: 42238384Sjkim * "This product includes software developed by the OpenSSL Project 43238384Sjkim * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 44238384Sjkim * 45238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 46238384Sjkim * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48238384Sjkim * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 49238384Sjkim * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50238384Sjkim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51238384Sjkim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 52238384Sjkim * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 54238384Sjkim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 55238384Sjkim * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 56238384Sjkim * OF THE POSSIBILITY OF SUCH DAMAGE. 57238384Sjkim * ==================================================================== 58238384Sjkim * 59238384Sjkim * This product includes cryptographic software written by Eric Young 60238384Sjkim * (eay@cryptsoft.com). This product includes software written by Tim 61238384Sjkim * Hudson (tjh@cryptsoft.com). 62238384Sjkim * 63238384Sjkim */ 64238384Sjkim 65238384Sjkim#include <stdio.h> 66238384Sjkim#include <string.h> 67238384Sjkim 68238384Sjkim#include <openssl/opensslconf.h> 69238384Sjkim#include <openssl/crypto.h> 70238384Sjkim#include <openssl/dso.h> 71238384Sjkim#include <openssl/engine.h> 72238384Sjkim#include <openssl/evp.h> 73238384Sjkim#ifndef OPENSSL_NO_AES 74296341Sdelphij# include <openssl/aes.h> 75238384Sjkim#endif 76238384Sjkim#include <openssl/rand.h> 77238384Sjkim#include <openssl/err.h> 78238384Sjkim 79238384Sjkim#ifndef OPENSSL_NO_HW 80296341Sdelphij# ifndef OPENSSL_NO_HW_PADLOCK 81238384Sjkim 82238384Sjkim/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ 83296341Sdelphij# if (OPENSSL_VERSION_NUMBER >= 0x00908000L) 84296341Sdelphij# ifndef OPENSSL_NO_DYNAMIC_ENGINE 85238384Sjkim# define DYNAMIC_ENGINE 86296341Sdelphij# endif 87296341Sdelphij# elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) 88296341Sdelphij# ifdef ENGINE_DYNAMIC_SUPPORT 89238384Sjkim# define DYNAMIC_ENGINE 90296341Sdelphij# endif 91296341Sdelphij# else 92296341Sdelphij# error "Only OpenSSL >= 0.9.7 is supported" 93238384Sjkim# endif 94238384Sjkim 95296341Sdelphij/* 96296341Sdelphij * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it 97296341Sdelphij * doesn't exist elsewhere, but it even can't be compiled on other platforms! 98296341Sdelphij * 99296341Sdelphij * In addition, because of the heavy use of inline assembler, compiler choice 100296341Sdelphij * is limited to GCC and Microsoft C. 101296341Sdelphij */ 102296341Sdelphij# undef COMPILE_HW_PADLOCK 103296341Sdelphij# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) 104296341Sdelphij# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 105238384Sjkim (defined(_MSC_VER) && defined(_M_IX86)) 106296341Sdelphij# define COMPILE_HW_PADLOCK 107296341Sdelphij# endif 108296341Sdelphij# endif 109238384Sjkim 110296341Sdelphij# ifdef OPENSSL_NO_DYNAMIC_ENGINE 111296341Sdelphij# ifdef COMPILE_HW_PADLOCK 112296341Sdelphijstatic ENGINE *ENGINE_padlock(void); 113296341Sdelphij# endif 114238384Sjkim 115296341Sdelphijvoid ENGINE_load_padlock(void) 116238384Sjkim{ 117238384Sjkim/* On non-x86 CPUs it just returns. */ 118296341Sdelphij# ifdef COMPILE_HW_PADLOCK 119296341Sdelphij ENGINE *toadd = ENGINE_padlock(); 120296341Sdelphij if (!toadd) 121296341Sdelphij return; 122296341Sdelphij ENGINE_add(toadd); 123296341Sdelphij ENGINE_free(toadd); 124296341Sdelphij ERR_clear_error(); 125296341Sdelphij# endif 126238384Sjkim} 127238384Sjkim 128296341Sdelphij# endif 129238384Sjkim 130296341Sdelphij# ifdef COMPILE_HW_PADLOCK 131296341Sdelphij/* 132296341Sdelphij * We do these includes here to avoid header problems on platforms that do 133296341Sdelphij * not have the VIA padlock anyway... 134296341Sdelphij */ 135296341Sdelphij# include <stdlib.h> 136296341Sdelphij# ifdef _WIN32 137296341Sdelphij# include <malloc.h> 138296341Sdelphij# ifndef alloca 139296341Sdelphij# define alloca _alloca 140296341Sdelphij# endif 141296341Sdelphij# elif defined(__GNUC__) 142296341Sdelphij# ifndef alloca 143296341Sdelphij# define alloca(s) __builtin_alloca(s) 144296341Sdelphij# endif 145296341Sdelphij# endif 146238384Sjkim 147238384Sjkim/* Function for ENGINE detection and control */ 148238384Sjkimstatic int padlock_available(void); 149238384Sjkimstatic int padlock_init(ENGINE *e); 150238384Sjkim 151238384Sjkim/* RNG Stuff */ 152238384Sjkimstatic RAND_METHOD padlock_rand; 153238384Sjkim 154238384Sjkim/* Cipher Stuff */ 155296341Sdelphij# ifndef OPENSSL_NO_AES 156296341Sdelphijstatic int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, 157296341Sdelphij const int **nids, int nid); 158296341Sdelphij# endif 159238384Sjkim 160238384Sjkim/* Engine names */ 161238384Sjkimstatic const char *padlock_id = "padlock"; 162238384Sjkimstatic char padlock_name[100]; 163238384Sjkim 164238384Sjkim/* Available features */ 165296341Sdelphijstatic int padlock_use_ace = 0; /* Advanced Cryptography Engine */ 166296341Sdelphijstatic int padlock_use_rng = 0; /* Random Number Generator */ 167296341Sdelphij# ifndef OPENSSL_NO_AES 168238384Sjkimstatic int padlock_aes_align_required = 1; 169296341Sdelphij# endif 170238384Sjkim 171238384Sjkim/* ===== Engine "management" functions ===== */ 172238384Sjkim 173238384Sjkim/* Prepare the ENGINE structure for registration */ 174296341Sdelphijstatic int padlock_bind_helper(ENGINE *e) 175238384Sjkim{ 176296341Sdelphij /* Check available features */ 177296341Sdelphij padlock_available(); 178238384Sjkim 179296341Sdelphij# if 1 /* disable RNG for now, see commentary in 180296341Sdelphij * vicinity of RNG code */ 181296341Sdelphij padlock_use_rng = 0; 182296341Sdelphij# endif 183238384Sjkim 184296341Sdelphij /* Generate a nice engine name with available features */ 185296341Sdelphij BIO_snprintf(padlock_name, sizeof(padlock_name), 186296341Sdelphij "VIA PadLock (%s, %s)", 187296341Sdelphij padlock_use_rng ? "RNG" : "no-RNG", 188296341Sdelphij padlock_use_ace ? "ACE" : "no-ACE"); 189238384Sjkim 190296341Sdelphij /* Register everything or return with an error */ 191296341Sdelphij if (!ENGINE_set_id(e, padlock_id) || 192296341Sdelphij !ENGINE_set_name(e, padlock_name) || 193296341Sdelphij !ENGINE_set_init_function(e, padlock_init) || 194296341Sdelphij# ifndef OPENSSL_NO_AES 195296341Sdelphij (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || 196296341Sdelphij# endif 197296341Sdelphij (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { 198296341Sdelphij return 0; 199296341Sdelphij } 200238384Sjkim 201296341Sdelphij /* Everything looks good */ 202296341Sdelphij return 1; 203238384Sjkim} 204238384Sjkim 205296341Sdelphij# ifdef OPENSSL_NO_DYNAMIC_ENGINE 206238384Sjkim 207238384Sjkim/* Constructor */ 208296341Sdelphijstatic ENGINE *ENGINE_padlock(void) 209238384Sjkim{ 210296341Sdelphij ENGINE *eng = ENGINE_new(); 211238384Sjkim 212296341Sdelphij if (!eng) { 213296341Sdelphij return NULL; 214296341Sdelphij } 215238384Sjkim 216296341Sdelphij if (!padlock_bind_helper(eng)) { 217296341Sdelphij ENGINE_free(eng); 218296341Sdelphij return NULL; 219296341Sdelphij } 220238384Sjkim 221296341Sdelphij return eng; 222238384Sjkim} 223238384Sjkim 224296341Sdelphij# endif 225238384Sjkim 226238384Sjkim/* Check availability of the engine */ 227296341Sdelphijstatic int padlock_init(ENGINE *e) 228238384Sjkim{ 229296341Sdelphij return (padlock_use_rng || padlock_use_ace); 230238384Sjkim} 231238384Sjkim 232296341Sdelphij/* 233296341Sdelphij * This stuff is needed if this ENGINE is being compiled into a 234296341Sdelphij * self-contained shared-library. 235238384Sjkim */ 236296341Sdelphij# ifdef DYNAMIC_ENGINE 237296341Sdelphijstatic int padlock_bind_fn(ENGINE *e, const char *id) 238238384Sjkim{ 239296341Sdelphij if (id && (strcmp(id, padlock_id) != 0)) { 240296341Sdelphij return 0; 241296341Sdelphij } 242238384Sjkim 243296341Sdelphij if (!padlock_bind_helper(e)) { 244296341Sdelphij return 0; 245296341Sdelphij } 246238384Sjkim 247296341Sdelphij return 1; 248238384Sjkim} 249238384Sjkim 250238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 251296341Sdelphij IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) 252296341Sdelphij# endif /* DYNAMIC_ENGINE */ 253238384Sjkim/* ===== Here comes the "real" engine ===== */ 254296341Sdelphij# ifndef OPENSSL_NO_AES 255238384Sjkim/* Some AES-related constants */ 256296341Sdelphij# define AES_BLOCK_SIZE 16 257296341Sdelphij# define AES_KEY_SIZE_128 16 258296341Sdelphij# define AES_KEY_SIZE_192 24 259296341Sdelphij# define AES_KEY_SIZE_256 32 260296341Sdelphij /* 261296341Sdelphij * Here we store the status information relevant to the current context. 262296341Sdelphij */ 263296341Sdelphij /* 264296341Sdelphij * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on 265296341Sdelphij * the order of items in this structure. Don't blindly modify, reorder, 266296341Sdelphij * etc! 267296341Sdelphij */ 268296341Sdelphijstruct padlock_cipher_data { 269296341Sdelphij unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ 270296341Sdelphij union { 271296341Sdelphij unsigned int pad[4]; 272296341Sdelphij struct { 273296341Sdelphij int rounds:4; 274296341Sdelphij int dgst:1; /* n/a in C3 */ 275296341Sdelphij int align:1; /* n/a in C3 */ 276296341Sdelphij int ciphr:1; /* n/a in C3 */ 277296341Sdelphij unsigned int keygen:1; 278296341Sdelphij int interm:1; 279296341Sdelphij unsigned int encdec:1; 280296341Sdelphij int ksize:2; 281296341Sdelphij } b; 282296341Sdelphij } cword; /* Control word */ 283296341Sdelphij AES_KEY ks; /* Encryption key */ 284238384Sjkim}; 285238384Sjkim 286238384Sjkim/* 287238384Sjkim * Essentially this variable belongs in thread local storage. 288238384Sjkim * Having this variable global on the other hand can only cause 289238384Sjkim * few bogus key reloads [if any at all on single-CPU system], 290238384Sjkim * so we accept the penatly... 291238384Sjkim */ 292238384Sjkimstatic volatile struct padlock_cipher_data *padlock_saved_context; 293296341Sdelphij# endif 294238384Sjkim 295296341Sdelphij/*- 296238384Sjkim * ======================================================= 297238384Sjkim * Inline assembler section(s). 298238384Sjkim * ======================================================= 299238384Sjkim * Order of arguments is chosen to facilitate Windows port 300238384Sjkim * using __fastcall calling convention. If you wish to add 301238384Sjkim * more routines, keep in mind that first __fastcall 302238384Sjkim * argument is passed in %ecx and second - in %edx. 303238384Sjkim * ======================================================= 304238384Sjkim */ 305296341Sdelphij# if defined(__GNUC__) && __GNUC__>=2 306238384Sjkim/* 307238384Sjkim * As for excessive "push %ebx"/"pop %ebx" found all over. 308238384Sjkim * When generating position-independent code GCC won't let 309238384Sjkim * us use "b" in assembler templates nor even respect "ebx" 310238384Sjkim * in "clobber description." Therefore the trouble... 311238384Sjkim */ 312238384Sjkim 313296341Sdelphij/* 314296341Sdelphij * Helper function - check if a CPUID instruction is available on this CPU 315296341Sdelphij */ 316296341Sdelphijstatic int padlock_insn_cpuid_available(void) 317238384Sjkim{ 318296341Sdelphij int result = -1; 319238384Sjkim 320296341Sdelphij /* 321296341Sdelphij * We're checking if the bit #21 of EFLAGS can be toggled. If yes = 322296341Sdelphij * CPUID is available. 323296341Sdelphij */ 324296341Sdelphij asm volatile ("pushf\n" 325296341Sdelphij "popl %%eax\n" 326296341Sdelphij "xorl $0x200000, %%eax\n" 327296341Sdelphij "movl %%eax, %%ecx\n" 328296341Sdelphij "andl $0x200000, %%ecx\n" 329296341Sdelphij "pushl %%eax\n" 330296341Sdelphij "popf\n" 331296341Sdelphij "pushf\n" 332296341Sdelphij "popl %%eax\n" 333296341Sdelphij "andl $0x200000, %%eax\n" 334296341Sdelphij "xorl %%eax, %%ecx\n" 335296341Sdelphij "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); 336296341Sdelphij 337296341Sdelphij return (result == 0); 338238384Sjkim} 339238384Sjkim 340296341Sdelphij/* 341296341Sdelphij * Load supported features of the CPU to see if the PadLock is available. 342296341Sdelphij */ 343296341Sdelphijstatic int padlock_available(void) 344238384Sjkim{ 345296341Sdelphij char vendor_string[16]; 346296341Sdelphij unsigned int eax, edx; 347238384Sjkim 348296341Sdelphij /* First check if the CPUID instruction is available at all... */ 349296341Sdelphij if (!padlock_insn_cpuid_available()) 350296341Sdelphij return 0; 351238384Sjkim 352296341Sdelphij /* Are we running on the Centaur (VIA) CPU? */ 353296341Sdelphij eax = 0x00000000; 354296341Sdelphij vendor_string[12] = 0; 355296341Sdelphij asm volatile ("pushl %%ebx\n" 356296341Sdelphij "cpuid\n" 357296341Sdelphij "movl %%ebx,(%%edi)\n" 358296341Sdelphij "movl %%edx,4(%%edi)\n" 359296341Sdelphij "movl %%ecx,8(%%edi)\n" 360296341Sdelphij "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); 361296341Sdelphij if (strcmp(vendor_string, "CentaurHauls") != 0) 362296341Sdelphij return 0; 363238384Sjkim 364296341Sdelphij /* Check for Centaur Extended Feature Flags presence */ 365296341Sdelphij eax = 0xC0000000; 366296341Sdelphij asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); 367296341Sdelphij if (eax < 0xC0000001) 368296341Sdelphij return 0; 369238384Sjkim 370296341Sdelphij /* Read the Centaur Extended Feature Flags */ 371296341Sdelphij eax = 0xC0000001; 372296341Sdelphij asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), 373296341Sdelphij "=d"(edx)::"ecx"); 374238384Sjkim 375296341Sdelphij /* Fill up some flags */ 376296341Sdelphij padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); 377296341Sdelphij padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); 378238384Sjkim 379296341Sdelphij return padlock_use_ace + padlock_use_rng; 380238384Sjkim} 381238384Sjkim 382296341Sdelphij# ifndef OPENSSL_NO_AES 383296341Sdelphij# ifndef AES_ASM 384238384Sjkim/* Our own htonl()/ntohl() */ 385296341Sdelphijstatic inline void padlock_bswapl(AES_KEY *ks) 386238384Sjkim{ 387296341Sdelphij size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); 388296341Sdelphij unsigned int *key = ks->rd_key; 389238384Sjkim 390296341Sdelphij while (i--) { 391296341Sdelphij asm volatile ("bswapl %0":"+r" (*key)); 392296341Sdelphij key++; 393296341Sdelphij } 394238384Sjkim} 395296341Sdelphij# endif 396296341Sdelphij# endif 397238384Sjkim 398296341Sdelphij/* 399296341Sdelphij * Force key reload from memory to the CPU microcode. Loading EFLAGS from the 400296341Sdelphij * stack clears EFLAGS[30] which does the trick. 401296341Sdelphij */ 402296341Sdelphijstatic inline void padlock_reload_key(void) 403238384Sjkim{ 404296341Sdelphij asm volatile ("pushfl; popfl"); 405238384Sjkim} 406238384Sjkim 407296341Sdelphij# ifndef OPENSSL_NO_AES 408238384Sjkim/* 409238384Sjkim * This is heuristic key context tracing. At first one 410238384Sjkim * believes that one should use atomic swap instructions, 411238384Sjkim * but it's not actually necessary. Point is that if 412238384Sjkim * padlock_saved_context was changed by another thread 413238384Sjkim * after we've read it and before we compare it with cdata, 414238384Sjkim * our key *shall* be reloaded upon thread context switch 415238384Sjkim * and we are therefore set in either case... 416238384Sjkim */ 417296341Sdelphijstatic inline void padlock_verify_context(struct padlock_cipher_data *cdata) 418238384Sjkim{ 419296341Sdelphij asm volatile ("pushfl\n" 420296341Sdelphij " btl $30,(%%esp)\n" 421296341Sdelphij " jnc 1f\n" 422296341Sdelphij " cmpl %2,%1\n" 423296341Sdelphij " je 1f\n" 424296341Sdelphij " popfl\n" 425296341Sdelphij " subl $4,%%esp\n" 426296341Sdelphij "1: addl $4,%%esp\n" 427296341Sdelphij " movl %2,%0":"+m" (padlock_saved_context) 428296341Sdelphij :"r"(padlock_saved_context), "r"(cdata):"cc"); 429238384Sjkim} 430238384Sjkim 431238384Sjkim/* Template for padlock_xcrypt_* modes */ 432296341Sdelphij/* 433296341Sdelphij * BIG FAT WARNING: The offsets used with 'leal' instructions describe items 434296341Sdelphij * of the 'padlock_cipher_data' structure. 435238384Sjkim */ 436296341Sdelphij# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ 437296341Sdelphijstatic inline void *name(size_t cnt, \ 438296341Sdelphij struct padlock_cipher_data *cdata, \ 439296341Sdelphij void *out, const void *inp) \ 440296341Sdelphij{ void *iv; \ 441296341Sdelphij asm volatile ( "pushl %%ebx\n" \ 442296341Sdelphij " leal 16(%0),%%edx\n" \ 443296341Sdelphij " leal 32(%0),%%ebx\n" \ 444296341Sdelphij rep_xcrypt "\n" \ 445296341Sdelphij " popl %%ebx" \ 446296341Sdelphij : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ 447296341Sdelphij : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ 448296341Sdelphij : "edx", "cc", "memory"); \ 449296341Sdelphij return iv; \ 450238384Sjkim} 451238384Sjkim 452238384Sjkim/* Generate all functions with appropriate opcodes */ 453296341Sdelphij/* rep xcryptecb */ 454296341SdelphijPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") 455296341Sdelphij/* rep xcryptcbc */ 456296341Sdelphij PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") 457296341Sdelphij/* rep xcryptcfb */ 458296341Sdelphij PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") 459296341Sdelphij/* rep xcryptofb */ 460296341Sdelphij PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") 461296341Sdelphij# endif 462238384Sjkim/* The RNG call itself */ 463296341Sdelphijstatic inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) 464238384Sjkim{ 465296341Sdelphij unsigned int eax_out; 466238384Sjkim 467296341Sdelphij asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ 468296341Sdelphij :"=a" (eax_out), "=m"(*(unsigned *)addr) 469296341Sdelphij :"D"(addr), "d"(edx_in) 470296341Sdelphij ); 471238384Sjkim 472296341Sdelphij return eax_out; 473238384Sjkim} 474238384Sjkim 475296341Sdelphij/* 476296341Sdelphij * Why not inline 'rep movsd'? I failed to find information on what value in 477296341Sdelphij * Direction Flag one can expect and consequently have to apply 478296341Sdelphij * "better-safe-than-sorry" approach and assume "undefined." I could 479296341Sdelphij * explicitly clear it and restore the original value upon return from 480296341Sdelphij * padlock_aes_cipher, but it's presumably too much trouble for too little 481296341Sdelphij * gain... In case you wonder 'rep xcrypt*' instructions above are *not* 482296341Sdelphij * affected by the Direction Flag and pointers advance toward larger 483296341Sdelphij * addresses unconditionally. 484296341Sdelphij */ 485296341Sdelphijstatic inline unsigned char *padlock_memcpy(void *dst, const void *src, 486296341Sdelphij size_t n) 487238384Sjkim{ 488296341Sdelphij long *d = dst; 489296341Sdelphij const long *s = src; 490238384Sjkim 491296341Sdelphij n /= sizeof(*d); 492296341Sdelphij do { 493296341Sdelphij *d++ = *s++; 494296341Sdelphij } while (--n); 495238384Sjkim 496296341Sdelphij return dst; 497238384Sjkim} 498238384Sjkim 499296341Sdelphij# elif defined(_MSC_VER) 500238384Sjkim/* 501238384Sjkim * Unlike GCC these are real functions. In order to minimize impact 502238384Sjkim * on performance we adhere to __fastcall calling convention in 503238384Sjkim * order to get two first arguments passed through %ecx and %edx. 504238384Sjkim * Which kind of suits very well, as instructions in question use 505238384Sjkim * both %ecx and %edx as input:-) 506238384Sjkim */ 507296341Sdelphij# define REP_XCRYPT(code) \ 508296341Sdelphij _asm _emit 0xf3 \ 509296341Sdelphij _asm _emit 0x0f _asm _emit 0xa7 \ 510296341Sdelphij _asm _emit code 511238384Sjkim 512296341Sdelphij/* 513296341Sdelphij * BIG FAT WARNING: The offsets used with 'lea' instructions describe items 514296341Sdelphij * of the 'padlock_cipher_data' structure. 515238384Sjkim */ 516296341Sdelphij# define PADLOCK_XCRYPT_ASM(name,code) \ 517296341Sdelphijstatic void * __fastcall \ 518296341Sdelphij name (size_t cnt, void *cdata, \ 519296341Sdelphij void *outp, const void *inp) \ 520296341Sdelphij{ _asm mov eax,edx \ 521296341Sdelphij _asm lea edx,[eax+16] \ 522296341Sdelphij _asm lea ebx,[eax+32] \ 523296341Sdelphij _asm mov edi,outp \ 524296341Sdelphij _asm mov esi,inp \ 525296341Sdelphij REP_XCRYPT(code) \ 526238384Sjkim} 527238384Sjkim 528238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) 529238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) 530238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) 531238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) 532238384Sjkim 533296341Sdelphijstatic int __fastcall padlock_xstore(void *outp, unsigned int code) 534296341Sdelphij{ 535296341Sdelphij _asm mov edi,ecx 536296341Sdelphij _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 537238384Sjkim} 538238384Sjkim 539296341Sdelphijstatic void __fastcall padlock_reload_key(void) 540296341Sdelphij{ 541296341Sdelphij _asm pushfd 542296341Sdelphij _asm popfd 543296341Sdelphij} 544238384Sjkim 545296341Sdelphijstatic void __fastcall padlock_verify_context(void *cdata) 546296341Sdelphij{ 547296341Sdelphij _asm { 548296341Sdelphij pushfd 549296341Sdelphij bt DWORD PTR[esp],30 550296341Sdelphij jnc skip 551296341Sdelphij cmp ecx,padlock_saved_context 552296341Sdelphij je skip 553296341Sdelphij popfd 554296341Sdelphij sub esp,4 555296341Sdelphij skip: add esp,4 556296341Sdelphij mov padlock_saved_context,ecx 557296341Sdelphij } 558238384Sjkim} 559238384Sjkim 560238384Sjkimstatic int 561238384Sjkimpadlock_available(void) 562296341Sdelphij{ 563296341Sdelphij _asm { 564296341Sdelphij pushfd 565296341Sdelphij pop eax 566296341Sdelphij mov ecx,eax 567296341Sdelphij xor eax,1<<21 568296341Sdelphij push eax 569296341Sdelphij popfd 570296341Sdelphij pushfd 571296341Sdelphij pop eax 572296341Sdelphij xor eax,ecx 573296341Sdelphij bt eax,21 574296341Sdelphij jnc noluck 575296341Sdelphij mov eax,0 576296341Sdelphij cpuid 577296341Sdelphij xor eax,eax 578296341Sdelphij cmp ebx,'tneC' 579296341Sdelphij jne noluck 580296341Sdelphij cmp edx,'Hrua' 581296341Sdelphij jne noluck 582296341Sdelphij cmp ecx,'slua' 583296341Sdelphij jne noluck 584296341Sdelphij mov eax,0xC0000000 585296341Sdelphij cpuid 586296341Sdelphij mov edx,eax 587296341Sdelphij xor eax,eax 588296341Sdelphij cmp edx,0xC0000001 589296341Sdelphij jb noluck 590296341Sdelphij mov eax,0xC0000001 591296341Sdelphij cpuid 592296341Sdelphij xor eax,eax 593296341Sdelphij bt edx,6 594296341Sdelphij jnc skip_a 595296341Sdelphij bt edx,7 596296341Sdelphij jnc skip_a 597296341Sdelphij mov padlock_use_ace,1 598296341Sdelphij inc eax 599296341Sdelphij skip_a: bt edx,2 600296341Sdelphij jnc skip_r 601296341Sdelphij bt edx,3 602296341Sdelphij jnc skip_r 603296341Sdelphij mov padlock_use_rng,1 604296341Sdelphij inc eax 605296341Sdelphij skip_r: 606296341Sdelphij noluck: 607296341Sdelphij } 608238384Sjkim} 609238384Sjkim 610296341Sdelphijstatic void __fastcall padlock_bswapl(void *key) 611296341Sdelphij{ 612296341Sdelphij _asm { 613296341Sdelphij pushfd 614296341Sdelphij cld 615296341Sdelphij mov esi,ecx 616296341Sdelphij mov edi,ecx 617296341Sdelphij mov ecx,60 618296341Sdelphij up: lodsd 619296341Sdelphij bswap eax 620296341Sdelphij stosd 621296341Sdelphij loop up 622296341Sdelphij popfd 623296341Sdelphij } 624238384Sjkim} 625238384Sjkim 626296341Sdelphij/* 627296341Sdelphij * MS actually specifies status of Direction Flag and compiler even manages 628296341Sdelphij * to compile following as 'rep movsd' all by itself... 629238384Sjkim */ 630296341Sdelphij# define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) 631296341Sdelphij# endif 632238384Sjkim/* ===== AES encryption/decryption ===== */ 633296341Sdelphij# ifndef OPENSSL_NO_AES 634296341Sdelphij# if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) 635296341Sdelphij# define NID_aes_128_cfb NID_aes_128_cfb128 636296341Sdelphij# endif 637296341Sdelphij# if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) 638296341Sdelphij# define NID_aes_128_ofb NID_aes_128_ofb128 639296341Sdelphij# endif 640296341Sdelphij# if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) 641296341Sdelphij# define NID_aes_192_cfb NID_aes_192_cfb128 642296341Sdelphij# endif 643296341Sdelphij# if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) 644296341Sdelphij# define NID_aes_192_ofb NID_aes_192_ofb128 645296341Sdelphij# endif 646296341Sdelphij# if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) 647296341Sdelphij# define NID_aes_256_cfb NID_aes_256_cfb128 648296341Sdelphij# endif 649296341Sdelphij# if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) 650296341Sdelphij# define NID_aes_256_ofb NID_aes_256_ofb128 651296341Sdelphij# endif 652296341Sdelphij/* 653296341Sdelphij * List of supported ciphers. 654296341Sdelphij */ static int padlock_cipher_nids[] = { 655296341Sdelphij NID_aes_128_ecb, 656296341Sdelphij NID_aes_128_cbc, 657296341Sdelphij NID_aes_128_cfb, 658296341Sdelphij NID_aes_128_ofb, 659238384Sjkim 660296341Sdelphij NID_aes_192_ecb, 661296341Sdelphij NID_aes_192_cbc, 662296341Sdelphij NID_aes_192_cfb, 663296341Sdelphij NID_aes_192_ofb, 664238384Sjkim 665296341Sdelphij NID_aes_256_ecb, 666296341Sdelphij NID_aes_256_cbc, 667296341Sdelphij NID_aes_256_cfb, 668296341Sdelphij NID_aes_256_ofb, 669296341Sdelphij}; 670238384Sjkim 671296341Sdelphijstatic int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / 672296341Sdelphij sizeof(padlock_cipher_nids[0])); 673238384Sjkim 674238384Sjkim/* Function prototypes ... */ 675238384Sjkimstatic int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 676296341Sdelphij const unsigned char *iv, int enc); 677238384Sjkimstatic int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, 678296341Sdelphij const unsigned char *in, size_t nbytes); 679238384Sjkim 680296341Sdelphij# define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ 681296341Sdelphij ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) 682296341Sdelphij# define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ 683296341Sdelphij NEAREST_ALIGNED(ctx->cipher_data)) 684238384Sjkim 685296341Sdelphij# define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE 686296341Sdelphij# define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE 687296341Sdelphij# define EVP_CIPHER_block_size_OFB 1 688296341Sdelphij# define EVP_CIPHER_block_size_CFB 1 689238384Sjkim 690296341Sdelphij/* 691296341Sdelphij * Declaring so many ciphers by hand would be a pain. Instead introduce a bit 692296341Sdelphij * of preprocessor magic :-) 693296341Sdelphij */ 694296341Sdelphij# define DECLARE_AES_EVP(ksize,lmode,umode) \ 695296341Sdelphijstatic const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ 696296341Sdelphij NID_aes_##ksize##_##lmode, \ 697296341Sdelphij EVP_CIPHER_block_size_##umode, \ 698296341Sdelphij AES_KEY_SIZE_##ksize, \ 699296341Sdelphij AES_BLOCK_SIZE, \ 700296341Sdelphij 0 | EVP_CIPH_##umode##_MODE, \ 701296341Sdelphij padlock_aes_init_key, \ 702296341Sdelphij padlock_aes_cipher, \ 703296341Sdelphij NULL, \ 704296341Sdelphij sizeof(struct padlock_cipher_data) + 16, \ 705296341Sdelphij EVP_CIPHER_set_asn1_iv, \ 706296341Sdelphij EVP_CIPHER_get_asn1_iv, \ 707296341Sdelphij NULL, \ 708296341Sdelphij NULL \ 709238384Sjkim} 710238384Sjkim 711296341SdelphijDECLARE_AES_EVP(128, ecb, ECB); 712296341SdelphijDECLARE_AES_EVP(128, cbc, CBC); 713296341SdelphijDECLARE_AES_EVP(128, cfb, CFB); 714296341SdelphijDECLARE_AES_EVP(128, ofb, OFB); 715238384Sjkim 716296341SdelphijDECLARE_AES_EVP(192, ecb, ECB); 717296341SdelphijDECLARE_AES_EVP(192, cbc, CBC); 718296341SdelphijDECLARE_AES_EVP(192, cfb, CFB); 719296341SdelphijDECLARE_AES_EVP(192, ofb, OFB); 720238384Sjkim 721296341SdelphijDECLARE_AES_EVP(256, ecb, ECB); 722296341SdelphijDECLARE_AES_EVP(256, cbc, CBC); 723296341SdelphijDECLARE_AES_EVP(256, cfb, CFB); 724296341SdelphijDECLARE_AES_EVP(256, ofb, OFB); 725238384Sjkim 726238384Sjkimstatic int 727296341Sdelphijpadlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, 728296341Sdelphij int nid) 729238384Sjkim{ 730296341Sdelphij /* No specific cipher => return a list of supported nids ... */ 731296341Sdelphij if (!cipher) { 732296341Sdelphij *nids = padlock_cipher_nids; 733296341Sdelphij return padlock_cipher_nids_num; 734296341Sdelphij } 735238384Sjkim 736296341Sdelphij /* ... or the requested "cipher" otherwise */ 737296341Sdelphij switch (nid) { 738296341Sdelphij case NID_aes_128_ecb: 739296341Sdelphij *cipher = &padlock_aes_128_ecb; 740296341Sdelphij break; 741296341Sdelphij case NID_aes_128_cbc: 742296341Sdelphij *cipher = &padlock_aes_128_cbc; 743296341Sdelphij break; 744296341Sdelphij case NID_aes_128_cfb: 745296341Sdelphij *cipher = &padlock_aes_128_cfb; 746296341Sdelphij break; 747296341Sdelphij case NID_aes_128_ofb: 748296341Sdelphij *cipher = &padlock_aes_128_ofb; 749296341Sdelphij break; 750238384Sjkim 751296341Sdelphij case NID_aes_192_ecb: 752296341Sdelphij *cipher = &padlock_aes_192_ecb; 753296341Sdelphij break; 754296341Sdelphij case NID_aes_192_cbc: 755296341Sdelphij *cipher = &padlock_aes_192_cbc; 756296341Sdelphij break; 757296341Sdelphij case NID_aes_192_cfb: 758296341Sdelphij *cipher = &padlock_aes_192_cfb; 759296341Sdelphij break; 760296341Sdelphij case NID_aes_192_ofb: 761296341Sdelphij *cipher = &padlock_aes_192_ofb; 762296341Sdelphij break; 763238384Sjkim 764296341Sdelphij case NID_aes_256_ecb: 765296341Sdelphij *cipher = &padlock_aes_256_ecb; 766296341Sdelphij break; 767296341Sdelphij case NID_aes_256_cbc: 768296341Sdelphij *cipher = &padlock_aes_256_cbc; 769296341Sdelphij break; 770296341Sdelphij case NID_aes_256_cfb: 771296341Sdelphij *cipher = &padlock_aes_256_cfb; 772296341Sdelphij break; 773296341Sdelphij case NID_aes_256_ofb: 774296341Sdelphij *cipher = &padlock_aes_256_ofb; 775296341Sdelphij break; 776238384Sjkim 777296341Sdelphij default: 778296341Sdelphij /* Sorry, we don't support this NID */ 779296341Sdelphij *cipher = NULL; 780296341Sdelphij return 0; 781296341Sdelphij } 782238384Sjkim 783296341Sdelphij return 1; 784238384Sjkim} 785238384Sjkim 786238384Sjkim/* Prepare the encryption key for PadLock usage */ 787238384Sjkimstatic int 788296341Sdelphijpadlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, 789296341Sdelphij const unsigned char *iv, int enc) 790238384Sjkim{ 791296341Sdelphij struct padlock_cipher_data *cdata; 792296341Sdelphij int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; 793238384Sjkim 794296341Sdelphij if (key == NULL) 795296341Sdelphij return 0; /* ERROR */ 796238384Sjkim 797296341Sdelphij cdata = ALIGNED_CIPHER_DATA(ctx); 798296341Sdelphij memset(cdata, 0, sizeof(struct padlock_cipher_data)); 799238384Sjkim 800296341Sdelphij /* Prepare Control word. */ 801296341Sdelphij if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) 802296341Sdelphij cdata->cword.b.encdec = 0; 803296341Sdelphij else 804296341Sdelphij cdata->cword.b.encdec = (ctx->encrypt == 0); 805296341Sdelphij cdata->cword.b.rounds = 10 + (key_len - 128) / 32; 806296341Sdelphij cdata->cword.b.ksize = (key_len - 128) / 64; 807238384Sjkim 808296341Sdelphij switch (key_len) { 809296341Sdelphij case 128: 810296341Sdelphij /* 811296341Sdelphij * PadLock can generate an extended key for AES128 in hardware 812296341Sdelphij */ 813296341Sdelphij memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); 814296341Sdelphij cdata->cword.b.keygen = 0; 815296341Sdelphij break; 816238384Sjkim 817296341Sdelphij case 192: 818296341Sdelphij case 256: 819296341Sdelphij /* 820296341Sdelphij * Generate an extended AES key in software. Needed for AES192/AES256 821296341Sdelphij */ 822296341Sdelphij /* 823296341Sdelphij * Well, the above applies to Stepping 8 CPUs and is listed as 824296341Sdelphij * hardware errata. They most likely will fix it at some point and 825296341Sdelphij * then a check for stepping would be due here. 826296341Sdelphij */ 827296341Sdelphij if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || 828296341Sdelphij EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) 829296341Sdelphij AES_set_encrypt_key(key, key_len, &cdata->ks); 830296341Sdelphij else 831296341Sdelphij AES_set_decrypt_key(key, key_len, &cdata->ks); 832296341Sdelphij# ifndef AES_ASM 833296341Sdelphij /* 834296341Sdelphij * OpenSSL C functions use byte-swapped extended key. 835296341Sdelphij */ 836296341Sdelphij padlock_bswapl(&cdata->ks); 837296341Sdelphij# endif 838296341Sdelphij cdata->cword.b.keygen = 1; 839296341Sdelphij break; 840238384Sjkim 841296341Sdelphij default: 842296341Sdelphij /* ERROR */ 843296341Sdelphij return 0; 844296341Sdelphij } 845238384Sjkim 846296341Sdelphij /* 847296341Sdelphij * This is done to cover for cases when user reuses the 848296341Sdelphij * context for new key. The catch is that if we don't do 849296341Sdelphij * this, padlock_eas_cipher might proceed with old key... 850296341Sdelphij */ 851296341Sdelphij padlock_reload_key(); 852238384Sjkim 853296341Sdelphij return 1; 854238384Sjkim} 855238384Sjkim 856296341Sdelphij/*- 857238384Sjkim * Simplified version of padlock_aes_cipher() used when 858238384Sjkim * 1) both input and output buffers are at aligned addresses. 859238384Sjkim * or when 860238384Sjkim * 2) running on a newer CPU that doesn't require aligned buffers. 861238384Sjkim */ 862238384Sjkimstatic int 863238384Sjkimpadlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 864296341Sdelphij const unsigned char *in_arg, size_t nbytes) 865238384Sjkim{ 866296341Sdelphij struct padlock_cipher_data *cdata; 867296341Sdelphij void *iv; 868238384Sjkim 869296341Sdelphij cdata = ALIGNED_CIPHER_DATA(ctx); 870296341Sdelphij padlock_verify_context(cdata); 871238384Sjkim 872296341Sdelphij switch (EVP_CIPHER_CTX_mode(ctx)) { 873296341Sdelphij case EVP_CIPH_ECB_MODE: 874296341Sdelphij padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 875296341Sdelphij break; 876238384Sjkim 877296341Sdelphij case EVP_CIPH_CBC_MODE: 878296341Sdelphij memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 879296341Sdelphij iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 880296341Sdelphij in_arg); 881296341Sdelphij memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 882296341Sdelphij break; 883238384Sjkim 884296341Sdelphij case EVP_CIPH_CFB_MODE: 885296341Sdelphij memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 886296341Sdelphij iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, 887296341Sdelphij in_arg); 888296341Sdelphij memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 889296341Sdelphij break; 890238384Sjkim 891296341Sdelphij case EVP_CIPH_OFB_MODE: 892296341Sdelphij memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 893296341Sdelphij padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); 894296341Sdelphij memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 895296341Sdelphij break; 896238384Sjkim 897296341Sdelphij default: 898296341Sdelphij return 0; 899296341Sdelphij } 900238384Sjkim 901296341Sdelphij memset(cdata->iv, 0, AES_BLOCK_SIZE); 902238384Sjkim 903296341Sdelphij return 1; 904238384Sjkim} 905238384Sjkim 906296341Sdelphij# ifndef PADLOCK_CHUNK 907296341Sdelphij# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ 908296341Sdelphij# endif 909296341Sdelphij# if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) 910296341Sdelphij# error "insane PADLOCK_CHUNK..." 911296341Sdelphij# endif 912238384Sjkim 913296341Sdelphij/* 914296341Sdelphij * Re-align the arguments to 16-Bytes boundaries and run the encryption 915296341Sdelphij * function itself. This function is not AES-specific. 916296341Sdelphij */ 917238384Sjkimstatic int 918238384Sjkimpadlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, 919296341Sdelphij const unsigned char *in_arg, size_t nbytes) 920238384Sjkim{ 921296341Sdelphij struct padlock_cipher_data *cdata; 922296341Sdelphij const void *inp; 923296341Sdelphij unsigned char *out; 924296341Sdelphij void *iv; 925296341Sdelphij int inp_misaligned, out_misaligned, realign_in_loop; 926296341Sdelphij size_t chunk, allocated = 0; 927238384Sjkim 928296341Sdelphij /* 929296341Sdelphij * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... 930296341Sdelphij */ 931296341Sdelphij if ((chunk = ctx->num)) { /* borrow chunk variable */ 932296341Sdelphij unsigned char *ivp = ctx->iv; 933238384Sjkim 934296341Sdelphij switch (EVP_CIPHER_CTX_mode(ctx)) { 935296341Sdelphij case EVP_CIPH_CFB_MODE: 936296341Sdelphij if (chunk >= AES_BLOCK_SIZE) 937296341Sdelphij return 0; /* bogus value */ 938238384Sjkim 939296341Sdelphij if (ctx->encrypt) 940296341Sdelphij while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 941296341Sdelphij ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 942296341Sdelphij chunk++, nbytes--; 943296341Sdelphij } else 944296341Sdelphij while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 945296341Sdelphij unsigned char c = *(in_arg++); 946296341Sdelphij *(out_arg++) = c ^ ivp[chunk]; 947296341Sdelphij ivp[chunk++] = c, nbytes--; 948296341Sdelphij } 949238384Sjkim 950296341Sdelphij ctx->num = chunk % AES_BLOCK_SIZE; 951296341Sdelphij break; 952296341Sdelphij case EVP_CIPH_OFB_MODE: 953296341Sdelphij if (chunk >= AES_BLOCK_SIZE) 954296341Sdelphij return 0; /* bogus value */ 955238384Sjkim 956296341Sdelphij while (chunk < AES_BLOCK_SIZE && nbytes != 0) { 957296341Sdelphij *(out_arg++) = *(in_arg++) ^ ivp[chunk]; 958296341Sdelphij chunk++, nbytes--; 959296341Sdelphij } 960238384Sjkim 961296341Sdelphij ctx->num = chunk % AES_BLOCK_SIZE; 962296341Sdelphij break; 963296341Sdelphij } 964296341Sdelphij } 965238384Sjkim 966296341Sdelphij if (nbytes == 0) 967296341Sdelphij return 1; 968296341Sdelphij# if 0 969296341Sdelphij if (nbytes % AES_BLOCK_SIZE) 970296341Sdelphij return 0; /* are we expected to do tail processing? */ 971296341Sdelphij# else 972296341Sdelphij /* 973296341Sdelphij * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and 974296341Sdelphij * arbitrary value in byte-oriented modes, such as CFB and OFB... 975296341Sdelphij */ 976296341Sdelphij# endif 977238384Sjkim 978296341Sdelphij /* 979296341Sdelphij * VIA promises CPUs that won't require alignment in the future. For now 980296341Sdelphij * padlock_aes_align_required is initialized to 1 and the condition is 981296341Sdelphij * never met... 982296341Sdelphij */ 983296341Sdelphij /* 984296341Sdelphij * C7 core is capable to manage unaligned input in non-ECB[!] mode, but 985296341Sdelphij * performance penalties appear to be approximately same as for software 986296341Sdelphij * alignment below or ~3x. They promise to improve it in the future, but 987296341Sdelphij * for now we can just as well pretend that it can only handle aligned 988296341Sdelphij * input... 989296341Sdelphij */ 990296341Sdelphij if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) 991296341Sdelphij return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 992238384Sjkim 993296341Sdelphij inp_misaligned = (((size_t)in_arg) & 0x0F); 994296341Sdelphij out_misaligned = (((size_t)out_arg) & 0x0F); 995238384Sjkim 996296341Sdelphij /* 997296341Sdelphij * Note that even if output is aligned and input not, I still prefer to 998296341Sdelphij * loop instead of copy the whole input and then encrypt in one stroke. 999296341Sdelphij * This is done in order to improve L1 cache utilization... 1000296341Sdelphij */ 1001296341Sdelphij realign_in_loop = out_misaligned | inp_misaligned; 1002238384Sjkim 1003296341Sdelphij if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) 1004296341Sdelphij return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); 1005238384Sjkim 1006296341Sdelphij /* this takes one "if" out of the loops */ 1007296341Sdelphij chunk = nbytes; 1008296341Sdelphij chunk %= PADLOCK_CHUNK; 1009296341Sdelphij if (chunk == 0) 1010296341Sdelphij chunk = PADLOCK_CHUNK; 1011238384Sjkim 1012296341Sdelphij if (out_misaligned) { 1013296341Sdelphij /* optmize for small input */ 1014296341Sdelphij allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); 1015296341Sdelphij out = alloca(0x10 + allocated); 1016296341Sdelphij out = NEAREST_ALIGNED(out); 1017296341Sdelphij } else 1018296341Sdelphij out = out_arg; 1019238384Sjkim 1020296341Sdelphij cdata = ALIGNED_CIPHER_DATA(ctx); 1021296341Sdelphij padlock_verify_context(cdata); 1022238384Sjkim 1023296341Sdelphij switch (EVP_CIPHER_CTX_mode(ctx)) { 1024296341Sdelphij case EVP_CIPH_ECB_MODE: 1025296341Sdelphij do { 1026296341Sdelphij if (inp_misaligned) 1027296341Sdelphij inp = padlock_memcpy(out, in_arg, chunk); 1028296341Sdelphij else 1029296341Sdelphij inp = in_arg; 1030296341Sdelphij in_arg += chunk; 1031238384Sjkim 1032296341Sdelphij padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1033238384Sjkim 1034296341Sdelphij if (out_misaligned) 1035296341Sdelphij out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1036296341Sdelphij else 1037296341Sdelphij out = out_arg += chunk; 1038238384Sjkim 1039296341Sdelphij nbytes -= chunk; 1040296341Sdelphij chunk = PADLOCK_CHUNK; 1041296341Sdelphij } while (nbytes); 1042296341Sdelphij break; 1043238384Sjkim 1044296341Sdelphij case EVP_CIPH_CBC_MODE: 1045296341Sdelphij memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1046296341Sdelphij goto cbc_shortcut; 1047296341Sdelphij do { 1048296341Sdelphij if (iv != cdata->iv) 1049296341Sdelphij memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1050296341Sdelphij chunk = PADLOCK_CHUNK; 1051296341Sdelphij cbc_shortcut: /* optimize for small input */ 1052296341Sdelphij if (inp_misaligned) 1053296341Sdelphij inp = padlock_memcpy(out, in_arg, chunk); 1054296341Sdelphij else 1055296341Sdelphij inp = in_arg; 1056296341Sdelphij in_arg += chunk; 1057238384Sjkim 1058296341Sdelphij iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1059238384Sjkim 1060296341Sdelphij if (out_misaligned) 1061296341Sdelphij out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1062296341Sdelphij else 1063296341Sdelphij out = out_arg += chunk; 1064238384Sjkim 1065296341Sdelphij } while (nbytes -= chunk); 1066296341Sdelphij memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1067296341Sdelphij break; 1068238384Sjkim 1069296341Sdelphij case EVP_CIPH_CFB_MODE: 1070296341Sdelphij memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1071296341Sdelphij chunk &= ~(AES_BLOCK_SIZE - 1); 1072296341Sdelphij if (chunk) 1073296341Sdelphij goto cfb_shortcut; 1074296341Sdelphij else 1075296341Sdelphij goto cfb_skiploop; 1076296341Sdelphij do { 1077296341Sdelphij if (iv != cdata->iv) 1078296341Sdelphij memcpy(cdata->iv, iv, AES_BLOCK_SIZE); 1079296341Sdelphij chunk = PADLOCK_CHUNK; 1080296341Sdelphij cfb_shortcut: /* optimize for small input */ 1081296341Sdelphij if (inp_misaligned) 1082296341Sdelphij inp = padlock_memcpy(out, in_arg, chunk); 1083296341Sdelphij else 1084296341Sdelphij inp = in_arg; 1085296341Sdelphij in_arg += chunk; 1086238384Sjkim 1087296341Sdelphij iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1088238384Sjkim 1089296341Sdelphij if (out_misaligned) 1090296341Sdelphij out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1091296341Sdelphij else 1092296341Sdelphij out = out_arg += chunk; 1093238384Sjkim 1094296341Sdelphij nbytes -= chunk; 1095296341Sdelphij } while (nbytes >= AES_BLOCK_SIZE); 1096238384Sjkim 1097296341Sdelphij cfb_skiploop: 1098296341Sdelphij if (nbytes) { 1099296341Sdelphij unsigned char *ivp = cdata->iv; 1100238384Sjkim 1101296341Sdelphij if (iv != ivp) { 1102296341Sdelphij memcpy(ivp, iv, AES_BLOCK_SIZE); 1103296341Sdelphij iv = ivp; 1104296341Sdelphij } 1105296341Sdelphij ctx->num = nbytes; 1106296341Sdelphij if (cdata->cword.b.encdec) { 1107296341Sdelphij cdata->cword.b.encdec = 0; 1108296341Sdelphij padlock_reload_key(); 1109296341Sdelphij padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1110296341Sdelphij cdata->cword.b.encdec = 1; 1111296341Sdelphij padlock_reload_key(); 1112296341Sdelphij while (nbytes) { 1113296341Sdelphij unsigned char c = *(in_arg++); 1114296341Sdelphij *(out_arg++) = c ^ *ivp; 1115296341Sdelphij *(ivp++) = c, nbytes--; 1116296341Sdelphij } 1117296341Sdelphij } else { 1118296341Sdelphij padlock_reload_key(); 1119296341Sdelphij padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1120296341Sdelphij padlock_reload_key(); 1121296341Sdelphij while (nbytes) { 1122296341Sdelphij *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; 1123296341Sdelphij ivp++, nbytes--; 1124296341Sdelphij } 1125296341Sdelphij } 1126296341Sdelphij } 1127238384Sjkim 1128296341Sdelphij memcpy(ctx->iv, iv, AES_BLOCK_SIZE); 1129296341Sdelphij break; 1130238384Sjkim 1131296341Sdelphij case EVP_CIPH_OFB_MODE: 1132296341Sdelphij memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); 1133296341Sdelphij chunk &= ~(AES_BLOCK_SIZE - 1); 1134296341Sdelphij if (chunk) 1135296341Sdelphij do { 1136296341Sdelphij if (inp_misaligned) 1137296341Sdelphij inp = padlock_memcpy(out, in_arg, chunk); 1138296341Sdelphij else 1139296341Sdelphij inp = in_arg; 1140296341Sdelphij in_arg += chunk; 1141238384Sjkim 1142296341Sdelphij padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); 1143238384Sjkim 1144296341Sdelphij if (out_misaligned) 1145296341Sdelphij out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; 1146296341Sdelphij else 1147296341Sdelphij out = out_arg += chunk; 1148238384Sjkim 1149296341Sdelphij nbytes -= chunk; 1150296341Sdelphij chunk = PADLOCK_CHUNK; 1151296341Sdelphij } while (nbytes >= AES_BLOCK_SIZE); 1152238384Sjkim 1153296341Sdelphij if (nbytes) { 1154296341Sdelphij unsigned char *ivp = cdata->iv; 1155238384Sjkim 1156296341Sdelphij ctx->num = nbytes; 1157296341Sdelphij padlock_reload_key(); /* empirically found */ 1158296341Sdelphij padlock_xcrypt_ecb(1, cdata, ivp, ivp); 1159296341Sdelphij padlock_reload_key(); /* empirically found */ 1160296341Sdelphij while (nbytes) { 1161296341Sdelphij *(out_arg++) = *(in_arg++) ^ *ivp; 1162296341Sdelphij ivp++, nbytes--; 1163296341Sdelphij } 1164296341Sdelphij } 1165238384Sjkim 1166296341Sdelphij memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); 1167296341Sdelphij break; 1168238384Sjkim 1169296341Sdelphij default: 1170296341Sdelphij return 0; 1171296341Sdelphij } 1172238384Sjkim 1173296341Sdelphij /* Clean the realign buffer if it was used */ 1174296341Sdelphij if (out_misaligned) { 1175296341Sdelphij volatile unsigned long *p = (void *)out; 1176296341Sdelphij size_t n = allocated / sizeof(*p); 1177296341Sdelphij while (n--) 1178296341Sdelphij *p++ = 0; 1179296341Sdelphij } 1180238384Sjkim 1181296341Sdelphij memset(cdata->iv, 0, AES_BLOCK_SIZE); 1182238384Sjkim 1183296341Sdelphij return 1; 1184238384Sjkim} 1185238384Sjkim 1186296341Sdelphij# endif /* OPENSSL_NO_AES */ 1187238384Sjkim 1188238384Sjkim/* ===== Random Number Generator ===== */ 1189238384Sjkim/* 1190238384Sjkim * This code is not engaged. The reason is that it does not comply 1191238384Sjkim * with recommendations for VIA RNG usage for secure applications 1192238384Sjkim * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it 1193238384Sjkim * provide meaningful error control... 1194238384Sjkim */ 1195296341Sdelphij/* 1196296341Sdelphij * Wrapper that provides an interface between the API and the raw PadLock 1197296341Sdelphij * RNG 1198296341Sdelphij */ 1199296341Sdelphijstatic int padlock_rand_bytes(unsigned char *output, int count) 1200238384Sjkim{ 1201296341Sdelphij unsigned int eax, buf; 1202238384Sjkim 1203296341Sdelphij while (count >= 8) { 1204296341Sdelphij eax = padlock_xstore(output, 0); 1205296341Sdelphij if (!(eax & (1 << 6))) 1206296341Sdelphij return 0; /* RNG disabled */ 1207296341Sdelphij /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1208296341Sdelphij if (eax & (0x1F << 10)) 1209296341Sdelphij return 0; 1210296341Sdelphij if ((eax & 0x1F) == 0) 1211296341Sdelphij continue; /* no data, retry... */ 1212296341Sdelphij if ((eax & 0x1F) != 8) 1213296341Sdelphij return 0; /* fatal failure... */ 1214296341Sdelphij output += 8; 1215296341Sdelphij count -= 8; 1216296341Sdelphij } 1217296341Sdelphij while (count > 0) { 1218296341Sdelphij eax = padlock_xstore(&buf, 3); 1219296341Sdelphij if (!(eax & (1 << 6))) 1220296341Sdelphij return 0; /* RNG disabled */ 1221296341Sdelphij /* this ---vv--- covers DC bias, Raw Bits and String Filter */ 1222296341Sdelphij if (eax & (0x1F << 10)) 1223296341Sdelphij return 0; 1224296341Sdelphij if ((eax & 0x1F) == 0) 1225296341Sdelphij continue; /* no data, retry... */ 1226296341Sdelphij if ((eax & 0x1F) != 1) 1227296341Sdelphij return 0; /* fatal failure... */ 1228296341Sdelphij *output++ = (unsigned char)buf; 1229296341Sdelphij count--; 1230296341Sdelphij } 1231296341Sdelphij *(volatile unsigned int *)&buf = 0; 1232238384Sjkim 1233296341Sdelphij return 1; 1234238384Sjkim} 1235238384Sjkim 1236238384Sjkim/* Dummy but necessary function */ 1237296341Sdelphijstatic int padlock_rand_status(void) 1238238384Sjkim{ 1239296341Sdelphij return 1; 1240238384Sjkim} 1241238384Sjkim 1242238384Sjkim/* Prepare structure for registration */ 1243238384Sjkimstatic RAND_METHOD padlock_rand = { 1244296341Sdelphij NULL, /* seed */ 1245296341Sdelphij padlock_rand_bytes, /* bytes */ 1246296341Sdelphij NULL, /* cleanup */ 1247296341Sdelphij NULL, /* add */ 1248296341Sdelphij padlock_rand_bytes, /* pseudorand */ 1249296341Sdelphij padlock_rand_status, /* rand status */ 1250238384Sjkim}; 1251238384Sjkim 1252296341Sdelphij# else /* !COMPILE_HW_PADLOCK */ 1253296341Sdelphij# ifndef OPENSSL_NO_DYNAMIC_ENGINE 1254238384SjkimOPENSSL_EXPORT 1255296341Sdelphij int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); 1256238384SjkimOPENSSL_EXPORT 1257296341Sdelphij int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) 1258296341Sdelphij{ 1259296341Sdelphij return 0; 1260296341Sdelphij} 1261296341Sdelphij 1262238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN() 1263296341Sdelphij# endif 1264296341Sdelphij# endif /* COMPILE_HW_PADLOCK */ 1265296341Sdelphij# endif /* !OPENSSL_NO_HW_PADLOCK */ 1266296341Sdelphij#endif /* !OPENSSL_NO_HW */ 1267