1296341Sdelphij/*-
2238384Sjkim * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3238384Sjkim * Written by Michal Ludvig <michal@logix.cz>
4238384Sjkim *            http://www.logix.cz/michal
5238384Sjkim *
6296341Sdelphij * Big thanks to Andy Polyakov for a help with optimization,
7296341Sdelphij * assembler fixes, port to MS Windows and a lot of other
8238384Sjkim * valuable work on this engine!
9238384Sjkim */
10238384Sjkim
11238384Sjkim/* ====================================================================
12238384Sjkim * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
13238384Sjkim *
14238384Sjkim * Redistribution and use in source and binary forms, with or without
15238384Sjkim * modification, are permitted provided that the following conditions
16238384Sjkim * are met:
17238384Sjkim *
18238384Sjkim * 1. Redistributions of source code must retain the above copyright
19238384Sjkim *    notice, this list of conditions and the following disclaimer.
20238384Sjkim *
21238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright
22238384Sjkim *    notice, this list of conditions and the following disclaimer in
23238384Sjkim *    the documentation and/or other materials provided with the
24238384Sjkim *    distribution.
25238384Sjkim *
26238384Sjkim * 3. All advertising materials mentioning features or use of this
27238384Sjkim *    software must display the following acknowledgment:
28238384Sjkim *    "This product includes software developed by the OpenSSL Project
29238384Sjkim *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30238384Sjkim *
31238384Sjkim * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32238384Sjkim *    endorse or promote products derived from this software without
33238384Sjkim *    prior written permission. For written permission, please contact
34238384Sjkim *    licensing@OpenSSL.org.
35238384Sjkim *
36238384Sjkim * 5. Products derived from this software may not be called "OpenSSL"
37238384Sjkim *    nor may "OpenSSL" appear in their names without prior written
38238384Sjkim *    permission of the OpenSSL Project.
39238384Sjkim *
40238384Sjkim * 6. Redistributions of any form whatsoever must retain the following
41238384Sjkim *    acknowledgment:
42238384Sjkim *    "This product includes software developed by the OpenSSL Project
43238384Sjkim *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44238384Sjkim *
45238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46238384Sjkim * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48238384Sjkim * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
49238384Sjkim * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50238384Sjkim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51238384Sjkim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52238384Sjkim * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54238384Sjkim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55238384Sjkim * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56238384Sjkim * OF THE POSSIBILITY OF SUCH DAMAGE.
57238384Sjkim * ====================================================================
58238384Sjkim *
59238384Sjkim * This product includes cryptographic software written by Eric Young
60238384Sjkim * (eay@cryptsoft.com).  This product includes software written by Tim
61238384Sjkim * Hudson (tjh@cryptsoft.com).
62238384Sjkim *
63238384Sjkim */
64238384Sjkim
65238384Sjkim#include <stdio.h>
66238384Sjkim#include <string.h>
67238384Sjkim
68238384Sjkim#include <openssl/opensslconf.h>
69238384Sjkim#include <openssl/crypto.h>
70238384Sjkim#include <openssl/dso.h>
71238384Sjkim#include <openssl/engine.h>
72238384Sjkim#include <openssl/evp.h>
73238384Sjkim#ifndef OPENSSL_NO_AES
74296341Sdelphij# include <openssl/aes.h>
75238384Sjkim#endif
76238384Sjkim#include <openssl/rand.h>
77238384Sjkim#include <openssl/err.h>
78238384Sjkim
79238384Sjkim#ifndef OPENSSL_NO_HW
80296341Sdelphij# ifndef OPENSSL_NO_HW_PADLOCK
81238384Sjkim
82238384Sjkim/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83296341Sdelphij#  if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84296341Sdelphij#   ifndef OPENSSL_NO_DYNAMIC_ENGINE
85238384Sjkim#    define DYNAMIC_ENGINE
86296341Sdelphij#   endif
87296341Sdelphij#  elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88296341Sdelphij#   ifdef ENGINE_DYNAMIC_SUPPORT
89238384Sjkim#    define DYNAMIC_ENGINE
90296341Sdelphij#   endif
91296341Sdelphij#  else
92296341Sdelphij#   error "Only OpenSSL >= 0.9.7 is supported"
93238384Sjkim#  endif
94238384Sjkim
95296341Sdelphij/*
96296341Sdelphij * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97296341Sdelphij * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98296341Sdelphij *
99296341Sdelphij * In addition, because of the heavy use of inline assembler, compiler choice
100296341Sdelphij * is limited to GCC and Microsoft C.
101296341Sdelphij */
102296341Sdelphij#  undef COMPILE_HW_PADLOCK
103296341Sdelphij#  if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104296341Sdelphij#   if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105238384Sjkim     (defined(_MSC_VER) && defined(_M_IX86))
106296341Sdelphij#    define COMPILE_HW_PADLOCK
107296341Sdelphij#   endif
108296341Sdelphij#  endif
109238384Sjkim
110296341Sdelphij#  ifdef OPENSSL_NO_DYNAMIC_ENGINE
111296341Sdelphij#   ifdef COMPILE_HW_PADLOCK
112296341Sdelphijstatic ENGINE *ENGINE_padlock(void);
113296341Sdelphij#   endif
114238384Sjkim
115296341Sdelphijvoid ENGINE_load_padlock(void)
116238384Sjkim{
117238384Sjkim/* On non-x86 CPUs it just returns. */
118296341Sdelphij#   ifdef COMPILE_HW_PADLOCK
119296341Sdelphij    ENGINE *toadd = ENGINE_padlock();
120296341Sdelphij    if (!toadd)
121296341Sdelphij        return;
122296341Sdelphij    ENGINE_add(toadd);
123296341Sdelphij    ENGINE_free(toadd);
124296341Sdelphij    ERR_clear_error();
125296341Sdelphij#   endif
126238384Sjkim}
127238384Sjkim
128296341Sdelphij#  endif
129238384Sjkim
130296341Sdelphij#  ifdef COMPILE_HW_PADLOCK
131296341Sdelphij/*
132296341Sdelphij * We do these includes here to avoid header problems on platforms that do
133296341Sdelphij * not have the VIA padlock anyway...
134296341Sdelphij */
135296341Sdelphij#   include <stdlib.h>
136296341Sdelphij#   ifdef _WIN32
137296341Sdelphij#    include <malloc.h>
138296341Sdelphij#    ifndef alloca
139296341Sdelphij#     define alloca _alloca
140296341Sdelphij#    endif
141296341Sdelphij#   elif defined(__GNUC__)
142296341Sdelphij#    ifndef alloca
143296341Sdelphij#     define alloca(s) __builtin_alloca(s)
144296341Sdelphij#    endif
145296341Sdelphij#   endif
146238384Sjkim
147238384Sjkim/* Function for ENGINE detection and control */
148238384Sjkimstatic int padlock_available(void);
149238384Sjkimstatic int padlock_init(ENGINE *e);
150238384Sjkim
151238384Sjkim/* RNG Stuff */
152238384Sjkimstatic RAND_METHOD padlock_rand;
153238384Sjkim
154238384Sjkim/* Cipher Stuff */
155296341Sdelphij#   ifndef OPENSSL_NO_AES
156296341Sdelphijstatic int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157296341Sdelphij                           const int **nids, int nid);
158296341Sdelphij#   endif
159238384Sjkim
160238384Sjkim/* Engine names */
161238384Sjkimstatic const char *padlock_id = "padlock";
162238384Sjkimstatic char padlock_name[100];
163238384Sjkim
164238384Sjkim/* Available features */
165296341Sdelphijstatic int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166296341Sdelphijstatic int padlock_use_rng = 0; /* Random Number Generator */
167296341Sdelphij#   ifndef OPENSSL_NO_AES
168238384Sjkimstatic int padlock_aes_align_required = 1;
169296341Sdelphij#   endif
170238384Sjkim
171238384Sjkim/* ===== Engine "management" functions ===== */
172238384Sjkim
173238384Sjkim/* Prepare the ENGINE structure for registration */
174296341Sdelphijstatic int padlock_bind_helper(ENGINE *e)
175238384Sjkim{
176296341Sdelphij    /* Check available features */
177296341Sdelphij    padlock_available();
178238384Sjkim
179296341Sdelphij#   if 1                        /* disable RNG for now, see commentary in
180296341Sdelphij                                 * vicinity of RNG code */
181296341Sdelphij    padlock_use_rng = 0;
182296341Sdelphij#   endif
183238384Sjkim
184296341Sdelphij    /* Generate a nice engine name with available features */
185296341Sdelphij    BIO_snprintf(padlock_name, sizeof(padlock_name),
186296341Sdelphij                 "VIA PadLock (%s, %s)",
187296341Sdelphij                 padlock_use_rng ? "RNG" : "no-RNG",
188296341Sdelphij                 padlock_use_ace ? "ACE" : "no-ACE");
189238384Sjkim
190296341Sdelphij    /* Register everything or return with an error */
191296341Sdelphij    if (!ENGINE_set_id(e, padlock_id) ||
192296341Sdelphij        !ENGINE_set_name(e, padlock_name) ||
193296341Sdelphij        !ENGINE_set_init_function(e, padlock_init) ||
194296341Sdelphij#   ifndef OPENSSL_NO_AES
195296341Sdelphij        (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196296341Sdelphij#   endif
197296341Sdelphij        (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198296341Sdelphij        return 0;
199296341Sdelphij    }
200238384Sjkim
201296341Sdelphij    /* Everything looks good */
202296341Sdelphij    return 1;
203238384Sjkim}
204238384Sjkim
205296341Sdelphij#   ifdef OPENSSL_NO_DYNAMIC_ENGINE
206238384Sjkim
207238384Sjkim/* Constructor */
208296341Sdelphijstatic ENGINE *ENGINE_padlock(void)
209238384Sjkim{
210296341Sdelphij    ENGINE *eng = ENGINE_new();
211238384Sjkim
212296341Sdelphij    if (!eng) {
213296341Sdelphij        return NULL;
214296341Sdelphij    }
215238384Sjkim
216296341Sdelphij    if (!padlock_bind_helper(eng)) {
217296341Sdelphij        ENGINE_free(eng);
218296341Sdelphij        return NULL;
219296341Sdelphij    }
220238384Sjkim
221296341Sdelphij    return eng;
222238384Sjkim}
223238384Sjkim
224296341Sdelphij#   endif
225238384Sjkim
226238384Sjkim/* Check availability of the engine */
227296341Sdelphijstatic int padlock_init(ENGINE *e)
228238384Sjkim{
229296341Sdelphij    return (padlock_use_rng || padlock_use_ace);
230238384Sjkim}
231238384Sjkim
232296341Sdelphij/*
233296341Sdelphij * This stuff is needed if this ENGINE is being compiled into a
234296341Sdelphij * self-contained shared-library.
235238384Sjkim */
236296341Sdelphij#   ifdef DYNAMIC_ENGINE
237296341Sdelphijstatic int padlock_bind_fn(ENGINE *e, const char *id)
238238384Sjkim{
239296341Sdelphij    if (id && (strcmp(id, padlock_id) != 0)) {
240296341Sdelphij        return 0;
241296341Sdelphij    }
242238384Sjkim
243296341Sdelphij    if (!padlock_bind_helper(e)) {
244296341Sdelphij        return 0;
245296341Sdelphij    }
246238384Sjkim
247296341Sdelphij    return 1;
248238384Sjkim}
249238384Sjkim
250238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN()
251296341Sdelphij    IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252296341Sdelphij#   endif                       /* DYNAMIC_ENGINE */
253238384Sjkim/* ===== Here comes the "real" engine ===== */
254296341Sdelphij#   ifndef OPENSSL_NO_AES
255238384Sjkim/* Some AES-related constants */
256296341Sdelphij#    define AES_BLOCK_SIZE          16
257296341Sdelphij#    define AES_KEY_SIZE_128        16
258296341Sdelphij#    define AES_KEY_SIZE_192        24
259296341Sdelphij#    define AES_KEY_SIZE_256        32
260296341Sdelphij    /*
261296341Sdelphij     * Here we store the status information relevant to the current context.
262296341Sdelphij     */
263296341Sdelphij    /*
264296341Sdelphij     * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265296341Sdelphij     * the order of items in this structure.  Don't blindly modify, reorder,
266296341Sdelphij     * etc!
267296341Sdelphij     */
268296341Sdelphijstruct padlock_cipher_data {
269296341Sdelphij    unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270296341Sdelphij    union {
271296341Sdelphij        unsigned int pad[4];
272296341Sdelphij        struct {
273296341Sdelphij            int rounds:4;
274296341Sdelphij            int dgst:1;         /* n/a in C3 */
275296341Sdelphij            int align:1;        /* n/a in C3 */
276296341Sdelphij            int ciphr:1;        /* n/a in C3 */
277296341Sdelphij            unsigned int keygen:1;
278296341Sdelphij            int interm:1;
279296341Sdelphij            unsigned int encdec:1;
280296341Sdelphij            int ksize:2;
281296341Sdelphij        } b;
282296341Sdelphij    } cword;                    /* Control word */
283296341Sdelphij    AES_KEY ks;                 /* Encryption key */
284238384Sjkim};
285238384Sjkim
286238384Sjkim/*
287238384Sjkim * Essentially this variable belongs in thread local storage.
288238384Sjkim * Having this variable global on the other hand can only cause
289238384Sjkim * few bogus key reloads [if any at all on single-CPU system],
290238384Sjkim * so we accept the penatly...
291238384Sjkim */
292238384Sjkimstatic volatile struct padlock_cipher_data *padlock_saved_context;
293296341Sdelphij#   endif
294238384Sjkim
295296341Sdelphij/*-
296238384Sjkim * =======================================================
297238384Sjkim * Inline assembler section(s).
298238384Sjkim * =======================================================
299238384Sjkim * Order of arguments is chosen to facilitate Windows port
300238384Sjkim * using __fastcall calling convention. If you wish to add
301238384Sjkim * more routines, keep in mind that first __fastcall
302238384Sjkim * argument is passed in %ecx and second - in %edx.
303238384Sjkim * =======================================================
304238384Sjkim */
305296341Sdelphij#   if defined(__GNUC__) && __GNUC__>=2
306238384Sjkim/*
307238384Sjkim * As for excessive "push %ebx"/"pop %ebx" found all over.
308238384Sjkim * When generating position-independent code GCC won't let
309238384Sjkim * us use "b" in assembler templates nor even respect "ebx"
310238384Sjkim * in "clobber description." Therefore the trouble...
311238384Sjkim */
312238384Sjkim
313296341Sdelphij/*
314296341Sdelphij * Helper function - check if a CPUID instruction is available on this CPU
315296341Sdelphij */
316296341Sdelphijstatic int padlock_insn_cpuid_available(void)
317238384Sjkim{
318296341Sdelphij    int result = -1;
319238384Sjkim
320296341Sdelphij    /*
321296341Sdelphij     * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322296341Sdelphij     * CPUID is available.
323296341Sdelphij     */
324296341Sdelphij    asm volatile ("pushf\n"
325296341Sdelphij                  "popl %%eax\n"
326296341Sdelphij                  "xorl $0x200000, %%eax\n"
327296341Sdelphij                  "movl %%eax, %%ecx\n"
328296341Sdelphij                  "andl $0x200000, %%ecx\n"
329296341Sdelphij                  "pushl %%eax\n"
330296341Sdelphij                  "popf\n"
331296341Sdelphij                  "pushf\n"
332296341Sdelphij                  "popl %%eax\n"
333296341Sdelphij                  "andl $0x200000, %%eax\n"
334296341Sdelphij                  "xorl %%eax, %%ecx\n"
335296341Sdelphij                  "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336296341Sdelphij
337296341Sdelphij    return (result == 0);
338238384Sjkim}
339238384Sjkim
340296341Sdelphij/*
341296341Sdelphij * Load supported features of the CPU to see if the PadLock is available.
342296341Sdelphij */
343296341Sdelphijstatic int padlock_available(void)
344238384Sjkim{
345296341Sdelphij    char vendor_string[16];
346296341Sdelphij    unsigned int eax, edx;
347238384Sjkim
348296341Sdelphij    /* First check if the CPUID instruction is available at all... */
349296341Sdelphij    if (!padlock_insn_cpuid_available())
350296341Sdelphij        return 0;
351238384Sjkim
352296341Sdelphij    /* Are we running on the Centaur (VIA) CPU? */
353296341Sdelphij    eax = 0x00000000;
354296341Sdelphij    vendor_string[12] = 0;
355296341Sdelphij    asm volatile ("pushl  %%ebx\n"
356296341Sdelphij                  "cpuid\n"
357296341Sdelphij                  "movl   %%ebx,(%%edi)\n"
358296341Sdelphij                  "movl   %%edx,4(%%edi)\n"
359296341Sdelphij                  "movl   %%ecx,8(%%edi)\n"
360296341Sdelphij                  "popl   %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361296341Sdelphij    if (strcmp(vendor_string, "CentaurHauls") != 0)
362296341Sdelphij        return 0;
363238384Sjkim
364296341Sdelphij    /* Check for Centaur Extended Feature Flags presence */
365296341Sdelphij    eax = 0xC0000000;
366296341Sdelphij    asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367296341Sdelphij    if (eax < 0xC0000001)
368296341Sdelphij        return 0;
369238384Sjkim
370296341Sdelphij    /* Read the Centaur Extended Feature Flags */
371296341Sdelphij    eax = 0xC0000001;
372296341Sdelphij    asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373296341Sdelphij                  "=d"(edx)::"ecx");
374238384Sjkim
375296341Sdelphij    /* Fill up some flags */
376296341Sdelphij    padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377296341Sdelphij    padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
378238384Sjkim
379296341Sdelphij    return padlock_use_ace + padlock_use_rng;
380238384Sjkim}
381238384Sjkim
382296341Sdelphij#    ifndef OPENSSL_NO_AES
383296341Sdelphij#     ifndef AES_ASM
384238384Sjkim/* Our own htonl()/ntohl() */
385296341Sdelphijstatic inline void padlock_bswapl(AES_KEY *ks)
386238384Sjkim{
387296341Sdelphij    size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388296341Sdelphij    unsigned int *key = ks->rd_key;
389238384Sjkim
390296341Sdelphij    while (i--) {
391296341Sdelphij        asm volatile ("bswapl %0":"+r" (*key));
392296341Sdelphij        key++;
393296341Sdelphij    }
394238384Sjkim}
395296341Sdelphij#     endif
396296341Sdelphij#    endif
397238384Sjkim
398296341Sdelphij/*
399296341Sdelphij * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400296341Sdelphij * stack clears EFLAGS[30] which does the trick.
401296341Sdelphij */
402296341Sdelphijstatic inline void padlock_reload_key(void)
403238384Sjkim{
404296341Sdelphij    asm volatile ("pushfl; popfl");
405238384Sjkim}
406238384Sjkim
407296341Sdelphij#    ifndef OPENSSL_NO_AES
408238384Sjkim/*
409238384Sjkim * This is heuristic key context tracing. At first one
410238384Sjkim * believes that one should use atomic swap instructions,
411238384Sjkim * but it's not actually necessary. Point is that if
412238384Sjkim * padlock_saved_context was changed by another thread
413238384Sjkim * after we've read it and before we compare it with cdata,
414238384Sjkim * our key *shall* be reloaded upon thread context switch
415238384Sjkim * and we are therefore set in either case...
416238384Sjkim */
417296341Sdelphijstatic inline void padlock_verify_context(struct padlock_cipher_data *cdata)
418238384Sjkim{
419296341Sdelphij    asm volatile ("pushfl\n"
420296341Sdelphij                  "       btl     $30,(%%esp)\n"
421296341Sdelphij                  "       jnc     1f\n"
422296341Sdelphij                  "       cmpl    %2,%1\n"
423296341Sdelphij                  "       je      1f\n"
424296341Sdelphij                  "       popfl\n"
425296341Sdelphij                  "       subl    $4,%%esp\n"
426296341Sdelphij                  "1:     addl    $4,%%esp\n"
427296341Sdelphij                  "       movl    %2,%0":"+m" (padlock_saved_context)
428296341Sdelphij                  :"r"(padlock_saved_context), "r"(cdata):"cc");
429238384Sjkim}
430238384Sjkim
431238384Sjkim/* Template for padlock_xcrypt_* modes */
432296341Sdelphij/*
433296341Sdelphij * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434296341Sdelphij * of the 'padlock_cipher_data' structure.
435238384Sjkim */
436296341Sdelphij#     define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)     \
437296341Sdelphijstatic inline void *name(size_t cnt,            \
438296341Sdelphij        struct padlock_cipher_data *cdata,      \
439296341Sdelphij        void *out, const void *inp)             \
440296341Sdelphij{       void *iv;                               \
441296341Sdelphij        asm volatile ( "pushl   %%ebx\n"        \
442296341Sdelphij                "       leal    16(%0),%%edx\n" \
443296341Sdelphij                "       leal    32(%0),%%ebx\n" \
444296341Sdelphij                        rep_xcrypt "\n"         \
445296341Sdelphij                "       popl    %%ebx"          \
446296341Sdelphij                : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447296341Sdelphij                : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
448296341Sdelphij                : "edx", "cc", "memory");       \
449296341Sdelphij        return iv;                              \
450238384Sjkim}
451238384Sjkim
452238384Sjkim/* Generate all functions with appropriate opcodes */
453296341Sdelphij/* rep xcryptecb */
454296341SdelphijPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455296341Sdelphij/* rep xcryptcbc */
456296341Sdelphij    PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457296341Sdelphij/* rep xcryptcfb */
458296341Sdelphij    PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459296341Sdelphij/* rep xcryptofb */
460296341Sdelphij    PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461296341Sdelphij#    endif
462238384Sjkim/* The RNG call itself */
463296341Sdelphijstatic inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
464238384Sjkim{
465296341Sdelphij    unsigned int eax_out;
466238384Sjkim
467296341Sdelphij    asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468296341Sdelphij                  :"=a" (eax_out), "=m"(*(unsigned *)addr)
469296341Sdelphij                  :"D"(addr), "d"(edx_in)
470296341Sdelphij        );
471238384Sjkim
472296341Sdelphij    return eax_out;
473238384Sjkim}
474238384Sjkim
475296341Sdelphij/*
476296341Sdelphij * Why not inline 'rep movsd'? I failed to find information on what value in
477296341Sdelphij * Direction Flag one can expect and consequently have to apply
478296341Sdelphij * "better-safe-than-sorry" approach and assume "undefined." I could
479296341Sdelphij * explicitly clear it and restore the original value upon return from
480296341Sdelphij * padlock_aes_cipher, but it's presumably too much trouble for too little
481296341Sdelphij * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482296341Sdelphij * affected by the Direction Flag and pointers advance toward larger
483296341Sdelphij * addresses unconditionally.
484296341Sdelphij */
485296341Sdelphijstatic inline unsigned char *padlock_memcpy(void *dst, const void *src,
486296341Sdelphij                                            size_t n)
487238384Sjkim{
488296341Sdelphij    long *d = dst;
489296341Sdelphij    const long *s = src;
490238384Sjkim
491296341Sdelphij    n /= sizeof(*d);
492296341Sdelphij    do {
493296341Sdelphij        *d++ = *s++;
494296341Sdelphij    } while (--n);
495238384Sjkim
496296341Sdelphij    return dst;
497238384Sjkim}
498238384Sjkim
499296341Sdelphij#   elif defined(_MSC_VER)
500238384Sjkim/*
501238384Sjkim * Unlike GCC these are real functions. In order to minimize impact
502238384Sjkim * on performance we adhere to __fastcall calling convention in
503238384Sjkim * order to get two first arguments passed through %ecx and %edx.
504238384Sjkim * Which kind of suits very well, as instructions in question use
505238384Sjkim * both %ecx and %edx as input:-)
506238384Sjkim */
507296341Sdelphij#    define REP_XCRYPT(code)                \
508296341Sdelphij        _asm _emit 0xf3                 \
509296341Sdelphij        _asm _emit 0x0f _asm _emit 0xa7 \
510296341Sdelphij        _asm _emit code
511238384Sjkim
512296341Sdelphij/*
513296341Sdelphij * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514296341Sdelphij * of the 'padlock_cipher_data' structure.
515238384Sjkim */
516296341Sdelphij#    define PADLOCK_XCRYPT_ASM(name,code)   \
517296341Sdelphijstatic void * __fastcall                \
518296341Sdelphij        name (size_t cnt, void *cdata,  \
519296341Sdelphij        void *outp, const void *inp)    \
520296341Sdelphij{       _asm    mov     eax,edx         \
521296341Sdelphij        _asm    lea     edx,[eax+16]    \
522296341Sdelphij        _asm    lea     ebx,[eax+32]    \
523296341Sdelphij        _asm    mov     edi,outp        \
524296341Sdelphij        _asm    mov     esi,inp         \
525296341Sdelphij        REP_XCRYPT(code)                \
526238384Sjkim}
527238384Sjkim
528238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
529238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
530238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
531238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
532238384Sjkim
533296341Sdelphijstatic int __fastcall padlock_xstore(void *outp, unsigned int code)
534296341Sdelphij{
535296341Sdelphij    _asm    mov edi,ecx
536296341Sdelphij    _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
537238384Sjkim}
538238384Sjkim
539296341Sdelphijstatic void __fastcall padlock_reload_key(void)
540296341Sdelphij{
541296341Sdelphij    _asm pushfd
542296341Sdelphij    _asm popfd
543296341Sdelphij}
544238384Sjkim
545296341Sdelphijstatic void __fastcall padlock_verify_context(void *cdata)
546296341Sdelphij{
547296341Sdelphij    _asm    {
548296341Sdelphij        pushfd
549296341Sdelphij        bt  DWORD PTR[esp],30
550296341Sdelphij        jnc skip
551296341Sdelphij        cmp ecx,padlock_saved_context
552296341Sdelphij        je  skip
553296341Sdelphij        popfd
554296341Sdelphij        sub esp,4
555296341Sdelphij    skip:   add esp,4
556296341Sdelphij        mov padlock_saved_context,ecx
557296341Sdelphij    }
558238384Sjkim}
559238384Sjkim
560238384Sjkimstatic int
561238384Sjkimpadlock_available(void)
562296341Sdelphij{
563296341Sdelphij    _asm    {
564296341Sdelphij        pushfd
565296341Sdelphij        pop eax
566296341Sdelphij        mov ecx,eax
567296341Sdelphij        xor eax,1<<21
568296341Sdelphij        push    eax
569296341Sdelphij        popfd
570296341Sdelphij        pushfd
571296341Sdelphij        pop eax
572296341Sdelphij        xor eax,ecx
573296341Sdelphij        bt  eax,21
574296341Sdelphij        jnc noluck
575296341Sdelphij        mov eax,0
576296341Sdelphij        cpuid
577296341Sdelphij        xor eax,eax
578296341Sdelphij        cmp ebx,'tneC'
579296341Sdelphij        jne noluck
580296341Sdelphij        cmp edx,'Hrua'
581296341Sdelphij        jne noluck
582296341Sdelphij        cmp ecx,'slua'
583296341Sdelphij        jne noluck
584296341Sdelphij        mov eax,0xC0000000
585296341Sdelphij        cpuid
586296341Sdelphij        mov edx,eax
587296341Sdelphij        xor eax,eax
588296341Sdelphij        cmp edx,0xC0000001
589296341Sdelphij        jb  noluck
590296341Sdelphij        mov eax,0xC0000001
591296341Sdelphij        cpuid
592296341Sdelphij        xor eax,eax
593296341Sdelphij        bt  edx,6
594296341Sdelphij        jnc skip_a
595296341Sdelphij        bt  edx,7
596296341Sdelphij        jnc skip_a
597296341Sdelphij        mov padlock_use_ace,1
598296341Sdelphij        inc eax
599296341Sdelphij    skip_a: bt  edx,2
600296341Sdelphij        jnc skip_r
601296341Sdelphij        bt  edx,3
602296341Sdelphij        jnc skip_r
603296341Sdelphij        mov padlock_use_rng,1
604296341Sdelphij        inc eax
605296341Sdelphij    skip_r:
606296341Sdelphij    noluck:
607296341Sdelphij    }
608238384Sjkim}
609238384Sjkim
610296341Sdelphijstatic void __fastcall padlock_bswapl(void *key)
611296341Sdelphij{
612296341Sdelphij    _asm    {
613296341Sdelphij        pushfd
614296341Sdelphij        cld
615296341Sdelphij        mov esi,ecx
616296341Sdelphij        mov edi,ecx
617296341Sdelphij        mov ecx,60
618296341Sdelphij    up: lodsd
619296341Sdelphij        bswap   eax
620296341Sdelphij        stosd
621296341Sdelphij        loop    up
622296341Sdelphij        popfd
623296341Sdelphij    }
624238384Sjkim}
625238384Sjkim
626296341Sdelphij/*
627296341Sdelphij * MS actually specifies status of Direction Flag and compiler even manages
628296341Sdelphij * to compile following as 'rep movsd' all by itself...
629238384Sjkim */
630296341Sdelphij#    define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
631296341Sdelphij#   endif
632238384Sjkim/* ===== AES encryption/decryption ===== */
633296341Sdelphij#   ifndef OPENSSL_NO_AES
634296341Sdelphij#    if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
635296341Sdelphij#     define NID_aes_128_cfb NID_aes_128_cfb128
636296341Sdelphij#    endif
637296341Sdelphij#    if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
638296341Sdelphij#     define NID_aes_128_ofb NID_aes_128_ofb128
639296341Sdelphij#    endif
640296341Sdelphij#    if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
641296341Sdelphij#     define NID_aes_192_cfb NID_aes_192_cfb128
642296341Sdelphij#    endif
643296341Sdelphij#    if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644296341Sdelphij#     define NID_aes_192_ofb NID_aes_192_ofb128
645296341Sdelphij#    endif
646296341Sdelphij#    if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
647296341Sdelphij#     define NID_aes_256_cfb NID_aes_256_cfb128
648296341Sdelphij#    endif
649296341Sdelphij#    if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
650296341Sdelphij#     define NID_aes_256_ofb NID_aes_256_ofb128
651296341Sdelphij#    endif
652296341Sdelphij/*
653296341Sdelphij * List of supported ciphers.
654296341Sdelphij */ static int padlock_cipher_nids[] = {
655296341Sdelphij    NID_aes_128_ecb,
656296341Sdelphij    NID_aes_128_cbc,
657296341Sdelphij    NID_aes_128_cfb,
658296341Sdelphij    NID_aes_128_ofb,
659238384Sjkim
660296341Sdelphij    NID_aes_192_ecb,
661296341Sdelphij    NID_aes_192_cbc,
662296341Sdelphij    NID_aes_192_cfb,
663296341Sdelphij    NID_aes_192_ofb,
664238384Sjkim
665296341Sdelphij    NID_aes_256_ecb,
666296341Sdelphij    NID_aes_256_cbc,
667296341Sdelphij    NID_aes_256_cfb,
668296341Sdelphij    NID_aes_256_ofb,
669296341Sdelphij};
670238384Sjkim
671296341Sdelphijstatic int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
672296341Sdelphij                                      sizeof(padlock_cipher_nids[0]));
673238384Sjkim
674238384Sjkim/* Function prototypes ... */
675238384Sjkimstatic int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
676296341Sdelphij                                const unsigned char *iv, int enc);
677238384Sjkimstatic int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
678296341Sdelphij                              const unsigned char *in, size_t nbytes);
679238384Sjkim
680296341Sdelphij#    define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +         \
681296341Sdelphij        ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )      )
682296341Sdelphij#    define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
683296341Sdelphij        NEAREST_ALIGNED(ctx->cipher_data))
684238384Sjkim
685296341Sdelphij#    define EVP_CIPHER_block_size_ECB       AES_BLOCK_SIZE
686296341Sdelphij#    define EVP_CIPHER_block_size_CBC       AES_BLOCK_SIZE
687296341Sdelphij#    define EVP_CIPHER_block_size_OFB       1
688296341Sdelphij#    define EVP_CIPHER_block_size_CFB       1
689238384Sjkim
690296341Sdelphij/*
691296341Sdelphij * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
692296341Sdelphij * of preprocessor magic :-)
693296341Sdelphij */
694296341Sdelphij#    define DECLARE_AES_EVP(ksize,lmode,umode)      \
695296341Sdelphijstatic const EVP_CIPHER padlock_aes_##ksize##_##lmode = {       \
696296341Sdelphij        NID_aes_##ksize##_##lmode,              \
697296341Sdelphij        EVP_CIPHER_block_size_##umode,  \
698296341Sdelphij        AES_KEY_SIZE_##ksize,           \
699296341Sdelphij        AES_BLOCK_SIZE,                 \
700296341Sdelphij        0 | EVP_CIPH_##umode##_MODE,    \
701296341Sdelphij        padlock_aes_init_key,           \
702296341Sdelphij        padlock_aes_cipher,             \
703296341Sdelphij        NULL,                           \
704296341Sdelphij        sizeof(struct padlock_cipher_data) + 16,        \
705296341Sdelphij        EVP_CIPHER_set_asn1_iv,         \
706296341Sdelphij        EVP_CIPHER_get_asn1_iv,         \
707296341Sdelphij        NULL,                           \
708296341Sdelphij        NULL                            \
709238384Sjkim}
710238384Sjkim
711296341SdelphijDECLARE_AES_EVP(128, ecb, ECB);
712296341SdelphijDECLARE_AES_EVP(128, cbc, CBC);
713296341SdelphijDECLARE_AES_EVP(128, cfb, CFB);
714296341SdelphijDECLARE_AES_EVP(128, ofb, OFB);
715238384Sjkim
716296341SdelphijDECLARE_AES_EVP(192, ecb, ECB);
717296341SdelphijDECLARE_AES_EVP(192, cbc, CBC);
718296341SdelphijDECLARE_AES_EVP(192, cfb, CFB);
719296341SdelphijDECLARE_AES_EVP(192, ofb, OFB);
720238384Sjkim
721296341SdelphijDECLARE_AES_EVP(256, ecb, ECB);
722296341SdelphijDECLARE_AES_EVP(256, cbc, CBC);
723296341SdelphijDECLARE_AES_EVP(256, cfb, CFB);
724296341SdelphijDECLARE_AES_EVP(256, ofb, OFB);
725238384Sjkim
726238384Sjkimstatic int
727296341Sdelphijpadlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
728296341Sdelphij                int nid)
729238384Sjkim{
730296341Sdelphij    /* No specific cipher => return a list of supported nids ... */
731296341Sdelphij    if (!cipher) {
732296341Sdelphij        *nids = padlock_cipher_nids;
733296341Sdelphij        return padlock_cipher_nids_num;
734296341Sdelphij    }
735238384Sjkim
736296341Sdelphij    /* ... or the requested "cipher" otherwise */
737296341Sdelphij    switch (nid) {
738296341Sdelphij    case NID_aes_128_ecb:
739296341Sdelphij        *cipher = &padlock_aes_128_ecb;
740296341Sdelphij        break;
741296341Sdelphij    case NID_aes_128_cbc:
742296341Sdelphij        *cipher = &padlock_aes_128_cbc;
743296341Sdelphij        break;
744296341Sdelphij    case NID_aes_128_cfb:
745296341Sdelphij        *cipher = &padlock_aes_128_cfb;
746296341Sdelphij        break;
747296341Sdelphij    case NID_aes_128_ofb:
748296341Sdelphij        *cipher = &padlock_aes_128_ofb;
749296341Sdelphij        break;
750238384Sjkim
751296341Sdelphij    case NID_aes_192_ecb:
752296341Sdelphij        *cipher = &padlock_aes_192_ecb;
753296341Sdelphij        break;
754296341Sdelphij    case NID_aes_192_cbc:
755296341Sdelphij        *cipher = &padlock_aes_192_cbc;
756296341Sdelphij        break;
757296341Sdelphij    case NID_aes_192_cfb:
758296341Sdelphij        *cipher = &padlock_aes_192_cfb;
759296341Sdelphij        break;
760296341Sdelphij    case NID_aes_192_ofb:
761296341Sdelphij        *cipher = &padlock_aes_192_ofb;
762296341Sdelphij        break;
763238384Sjkim
764296341Sdelphij    case NID_aes_256_ecb:
765296341Sdelphij        *cipher = &padlock_aes_256_ecb;
766296341Sdelphij        break;
767296341Sdelphij    case NID_aes_256_cbc:
768296341Sdelphij        *cipher = &padlock_aes_256_cbc;
769296341Sdelphij        break;
770296341Sdelphij    case NID_aes_256_cfb:
771296341Sdelphij        *cipher = &padlock_aes_256_cfb;
772296341Sdelphij        break;
773296341Sdelphij    case NID_aes_256_ofb:
774296341Sdelphij        *cipher = &padlock_aes_256_ofb;
775296341Sdelphij        break;
776238384Sjkim
777296341Sdelphij    default:
778296341Sdelphij        /* Sorry, we don't support this NID */
779296341Sdelphij        *cipher = NULL;
780296341Sdelphij        return 0;
781296341Sdelphij    }
782238384Sjkim
783296341Sdelphij    return 1;
784238384Sjkim}
785238384Sjkim
786238384Sjkim/* Prepare the encryption key for PadLock usage */
787238384Sjkimstatic int
788296341Sdelphijpadlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
789296341Sdelphij                     const unsigned char *iv, int enc)
790238384Sjkim{
791296341Sdelphij    struct padlock_cipher_data *cdata;
792296341Sdelphij    int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
793238384Sjkim
794296341Sdelphij    if (key == NULL)
795296341Sdelphij        return 0;               /* ERROR */
796238384Sjkim
797296341Sdelphij    cdata = ALIGNED_CIPHER_DATA(ctx);
798296341Sdelphij    memset(cdata, 0, sizeof(struct padlock_cipher_data));
799238384Sjkim
800296341Sdelphij    /* Prepare Control word. */
801296341Sdelphij    if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
802296341Sdelphij        cdata->cword.b.encdec = 0;
803296341Sdelphij    else
804296341Sdelphij        cdata->cword.b.encdec = (ctx->encrypt == 0);
805296341Sdelphij    cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
806296341Sdelphij    cdata->cword.b.ksize = (key_len - 128) / 64;
807238384Sjkim
808296341Sdelphij    switch (key_len) {
809296341Sdelphij    case 128:
810296341Sdelphij        /*
811296341Sdelphij         * PadLock can generate an extended key for AES128 in hardware
812296341Sdelphij         */
813296341Sdelphij        memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814296341Sdelphij        cdata->cword.b.keygen = 0;
815296341Sdelphij        break;
816238384Sjkim
817296341Sdelphij    case 192:
818296341Sdelphij    case 256:
819296341Sdelphij        /*
820296341Sdelphij         * Generate an extended AES key in software. Needed for AES192/AES256
821296341Sdelphij         */
822296341Sdelphij        /*
823296341Sdelphij         * Well, the above applies to Stepping 8 CPUs and is listed as
824296341Sdelphij         * hardware errata. They most likely will fix it at some point and
825296341Sdelphij         * then a check for stepping would be due here.
826296341Sdelphij         */
827296341Sdelphij        if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828296341Sdelphij            EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
829296341Sdelphij            AES_set_encrypt_key(key, key_len, &cdata->ks);
830296341Sdelphij        else
831296341Sdelphij            AES_set_decrypt_key(key, key_len, &cdata->ks);
832296341Sdelphij#    ifndef AES_ASM
833296341Sdelphij        /*
834296341Sdelphij         * OpenSSL C functions use byte-swapped extended key.
835296341Sdelphij         */
836296341Sdelphij        padlock_bswapl(&cdata->ks);
837296341Sdelphij#    endif
838296341Sdelphij        cdata->cword.b.keygen = 1;
839296341Sdelphij        break;
840238384Sjkim
841296341Sdelphij    default:
842296341Sdelphij        /* ERROR */
843296341Sdelphij        return 0;
844296341Sdelphij    }
845238384Sjkim
846296341Sdelphij    /*
847296341Sdelphij     * This is done to cover for cases when user reuses the
848296341Sdelphij     * context for new key. The catch is that if we don't do
849296341Sdelphij     * this, padlock_eas_cipher might proceed with old key...
850296341Sdelphij     */
851296341Sdelphij    padlock_reload_key();
852238384Sjkim
853296341Sdelphij    return 1;
854238384Sjkim}
855238384Sjkim
856296341Sdelphij/*-
857238384Sjkim * Simplified version of padlock_aes_cipher() used when
858238384Sjkim * 1) both input and output buffers are at aligned addresses.
859238384Sjkim * or when
860238384Sjkim * 2) running on a newer CPU that doesn't require aligned buffers.
861238384Sjkim */
862238384Sjkimstatic int
863238384Sjkimpadlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
864296341Sdelphij                              const unsigned char *in_arg, size_t nbytes)
865238384Sjkim{
866296341Sdelphij    struct padlock_cipher_data *cdata;
867296341Sdelphij    void *iv;
868238384Sjkim
869296341Sdelphij    cdata = ALIGNED_CIPHER_DATA(ctx);
870296341Sdelphij    padlock_verify_context(cdata);
871238384Sjkim
872296341Sdelphij    switch (EVP_CIPHER_CTX_mode(ctx)) {
873296341Sdelphij    case EVP_CIPH_ECB_MODE:
874296341Sdelphij        padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
875296341Sdelphij        break;
876238384Sjkim
877296341Sdelphij    case EVP_CIPH_CBC_MODE:
878296341Sdelphij        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879296341Sdelphij        iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
880296341Sdelphij                                in_arg);
881296341Sdelphij        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882296341Sdelphij        break;
883238384Sjkim
884296341Sdelphij    case EVP_CIPH_CFB_MODE:
885296341Sdelphij        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886296341Sdelphij        iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
887296341Sdelphij                                in_arg);
888296341Sdelphij        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
889296341Sdelphij        break;
890238384Sjkim
891296341Sdelphij    case EVP_CIPH_OFB_MODE:
892296341Sdelphij        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
893296341Sdelphij        padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
894296341Sdelphij        memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
895296341Sdelphij        break;
896238384Sjkim
897296341Sdelphij    default:
898296341Sdelphij        return 0;
899296341Sdelphij    }
900238384Sjkim
901296341Sdelphij    memset(cdata->iv, 0, AES_BLOCK_SIZE);
902238384Sjkim
903296341Sdelphij    return 1;
904238384Sjkim}
905238384Sjkim
906296341Sdelphij#    ifndef  PADLOCK_CHUNK
907296341Sdelphij#     define PADLOCK_CHUNK  512 /* Must be a power of 2 larger than 16 */
908296341Sdelphij#    endif
909296341Sdelphij#    if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
910296341Sdelphij#     error "insane PADLOCK_CHUNK..."
911296341Sdelphij#    endif
912238384Sjkim
913296341Sdelphij/*
914296341Sdelphij * Re-align the arguments to 16-Bytes boundaries and run the encryption
915296341Sdelphij * function itself. This function is not AES-specific.
916296341Sdelphij */
917238384Sjkimstatic int
918238384Sjkimpadlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
919296341Sdelphij                   const unsigned char *in_arg, size_t nbytes)
920238384Sjkim{
921296341Sdelphij    struct padlock_cipher_data *cdata;
922296341Sdelphij    const void *inp;
923296341Sdelphij    unsigned char *out;
924296341Sdelphij    void *iv;
925296341Sdelphij    int inp_misaligned, out_misaligned, realign_in_loop;
926296341Sdelphij    size_t chunk, allocated = 0;
927238384Sjkim
928296341Sdelphij    /*
929296341Sdelphij     * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
930296341Sdelphij     */
931296341Sdelphij    if ((chunk = ctx->num)) {   /* borrow chunk variable */
932296341Sdelphij        unsigned char *ivp = ctx->iv;
933238384Sjkim
934296341Sdelphij        switch (EVP_CIPHER_CTX_mode(ctx)) {
935296341Sdelphij        case EVP_CIPH_CFB_MODE:
936296341Sdelphij            if (chunk >= AES_BLOCK_SIZE)
937296341Sdelphij                return 0;       /* bogus value */
938238384Sjkim
939296341Sdelphij            if (ctx->encrypt)
940296341Sdelphij                while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
941296341Sdelphij                    ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
942296341Sdelphij                    chunk++, nbytes--;
943296341Sdelphij            } else
944296341Sdelphij                while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
945296341Sdelphij                    unsigned char c = *(in_arg++);
946296341Sdelphij                    *(out_arg++) = c ^ ivp[chunk];
947296341Sdelphij                    ivp[chunk++] = c, nbytes--;
948296341Sdelphij                }
949238384Sjkim
950296341Sdelphij            ctx->num = chunk % AES_BLOCK_SIZE;
951296341Sdelphij            break;
952296341Sdelphij        case EVP_CIPH_OFB_MODE:
953296341Sdelphij            if (chunk >= AES_BLOCK_SIZE)
954296341Sdelphij                return 0;       /* bogus value */
955238384Sjkim
956296341Sdelphij            while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
957296341Sdelphij                *(out_arg++) = *(in_arg++) ^ ivp[chunk];
958296341Sdelphij                chunk++, nbytes--;
959296341Sdelphij            }
960238384Sjkim
961296341Sdelphij            ctx->num = chunk % AES_BLOCK_SIZE;
962296341Sdelphij            break;
963296341Sdelphij        }
964296341Sdelphij    }
965238384Sjkim
966296341Sdelphij    if (nbytes == 0)
967296341Sdelphij        return 1;
968296341Sdelphij#    if 0
969296341Sdelphij    if (nbytes % AES_BLOCK_SIZE)
970296341Sdelphij        return 0;               /* are we expected to do tail processing? */
971296341Sdelphij#    else
972296341Sdelphij    /*
973296341Sdelphij     * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
974296341Sdelphij     * arbitrary value in byte-oriented modes, such as CFB and OFB...
975296341Sdelphij     */
976296341Sdelphij#    endif
977238384Sjkim
978296341Sdelphij    /*
979296341Sdelphij     * VIA promises CPUs that won't require alignment in the future. For now
980296341Sdelphij     * padlock_aes_align_required is initialized to 1 and the condition is
981296341Sdelphij     * never met...
982296341Sdelphij     */
983296341Sdelphij    /*
984296341Sdelphij     * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
985296341Sdelphij     * performance penalties appear to be approximately same as for software
986296341Sdelphij     * alignment below or ~3x. They promise to improve it in the future, but
987296341Sdelphij     * for now we can just as well pretend that it can only handle aligned
988296341Sdelphij     * input...
989296341Sdelphij     */
990296341Sdelphij    if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
991296341Sdelphij        return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992238384Sjkim
993296341Sdelphij    inp_misaligned = (((size_t)in_arg) & 0x0F);
994296341Sdelphij    out_misaligned = (((size_t)out_arg) & 0x0F);
995238384Sjkim
996296341Sdelphij    /*
997296341Sdelphij     * Note that even if output is aligned and input not, I still prefer to
998296341Sdelphij     * loop instead of copy the whole input and then encrypt in one stroke.
999296341Sdelphij     * This is done in order to improve L1 cache utilization...
1000296341Sdelphij     */
1001296341Sdelphij    realign_in_loop = out_misaligned | inp_misaligned;
1002238384Sjkim
1003296341Sdelphij    if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
1004296341Sdelphij        return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
1005238384Sjkim
1006296341Sdelphij    /* this takes one "if" out of the loops */
1007296341Sdelphij    chunk = nbytes;
1008296341Sdelphij    chunk %= PADLOCK_CHUNK;
1009296341Sdelphij    if (chunk == 0)
1010296341Sdelphij        chunk = PADLOCK_CHUNK;
1011238384Sjkim
1012296341Sdelphij    if (out_misaligned) {
1013296341Sdelphij        /* optmize for small input */
1014296341Sdelphij        allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1015296341Sdelphij        out = alloca(0x10 + allocated);
1016296341Sdelphij        out = NEAREST_ALIGNED(out);
1017296341Sdelphij    } else
1018296341Sdelphij        out = out_arg;
1019238384Sjkim
1020296341Sdelphij    cdata = ALIGNED_CIPHER_DATA(ctx);
1021296341Sdelphij    padlock_verify_context(cdata);
1022238384Sjkim
1023296341Sdelphij    switch (EVP_CIPHER_CTX_mode(ctx)) {
1024296341Sdelphij    case EVP_CIPH_ECB_MODE:
1025296341Sdelphij        do {
1026296341Sdelphij            if (inp_misaligned)
1027296341Sdelphij                inp = padlock_memcpy(out, in_arg, chunk);
1028296341Sdelphij            else
1029296341Sdelphij                inp = in_arg;
1030296341Sdelphij            in_arg += chunk;
1031238384Sjkim
1032296341Sdelphij            padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1033238384Sjkim
1034296341Sdelphij            if (out_misaligned)
1035296341Sdelphij                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1036296341Sdelphij            else
1037296341Sdelphij                out = out_arg += chunk;
1038238384Sjkim
1039296341Sdelphij            nbytes -= chunk;
1040296341Sdelphij            chunk = PADLOCK_CHUNK;
1041296341Sdelphij        } while (nbytes);
1042296341Sdelphij        break;
1043238384Sjkim
1044296341Sdelphij    case EVP_CIPH_CBC_MODE:
1045296341Sdelphij        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1046296341Sdelphij        goto cbc_shortcut;
1047296341Sdelphij        do {
1048296341Sdelphij            if (iv != cdata->iv)
1049296341Sdelphij                memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1050296341Sdelphij            chunk = PADLOCK_CHUNK;
1051296341Sdelphij cbc_shortcut:                 /* optimize for small input */
1052296341Sdelphij            if (inp_misaligned)
1053296341Sdelphij                inp = padlock_memcpy(out, in_arg, chunk);
1054296341Sdelphij            else
1055296341Sdelphij                inp = in_arg;
1056296341Sdelphij            in_arg += chunk;
1057238384Sjkim
1058296341Sdelphij            iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1059238384Sjkim
1060296341Sdelphij            if (out_misaligned)
1061296341Sdelphij                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1062296341Sdelphij            else
1063296341Sdelphij                out = out_arg += chunk;
1064238384Sjkim
1065296341Sdelphij        } while (nbytes -= chunk);
1066296341Sdelphij        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1067296341Sdelphij        break;
1068238384Sjkim
1069296341Sdelphij    case EVP_CIPH_CFB_MODE:
1070296341Sdelphij        memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1071296341Sdelphij        chunk &= ~(AES_BLOCK_SIZE - 1);
1072296341Sdelphij        if (chunk)
1073296341Sdelphij            goto cfb_shortcut;
1074296341Sdelphij        else
1075296341Sdelphij            goto cfb_skiploop;
1076296341Sdelphij        do {
1077296341Sdelphij            if (iv != cdata->iv)
1078296341Sdelphij                memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1079296341Sdelphij            chunk = PADLOCK_CHUNK;
1080296341Sdelphij cfb_shortcut:                 /* optimize for small input */
1081296341Sdelphij            if (inp_misaligned)
1082296341Sdelphij                inp = padlock_memcpy(out, in_arg, chunk);
1083296341Sdelphij            else
1084296341Sdelphij                inp = in_arg;
1085296341Sdelphij            in_arg += chunk;
1086238384Sjkim
1087296341Sdelphij            iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1088238384Sjkim
1089296341Sdelphij            if (out_misaligned)
1090296341Sdelphij                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1091296341Sdelphij            else
1092296341Sdelphij                out = out_arg += chunk;
1093238384Sjkim
1094296341Sdelphij            nbytes -= chunk;
1095296341Sdelphij        } while (nbytes >= AES_BLOCK_SIZE);
1096238384Sjkim
1097296341Sdelphij cfb_skiploop:
1098296341Sdelphij        if (nbytes) {
1099296341Sdelphij            unsigned char *ivp = cdata->iv;
1100238384Sjkim
1101296341Sdelphij            if (iv != ivp) {
1102296341Sdelphij                memcpy(ivp, iv, AES_BLOCK_SIZE);
1103296341Sdelphij                iv = ivp;
1104296341Sdelphij            }
1105296341Sdelphij            ctx->num = nbytes;
1106296341Sdelphij            if (cdata->cword.b.encdec) {
1107296341Sdelphij                cdata->cword.b.encdec = 0;
1108296341Sdelphij                padlock_reload_key();
1109296341Sdelphij                padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1110296341Sdelphij                cdata->cword.b.encdec = 1;
1111296341Sdelphij                padlock_reload_key();
1112296341Sdelphij                while (nbytes) {
1113296341Sdelphij                    unsigned char c = *(in_arg++);
1114296341Sdelphij                    *(out_arg++) = c ^ *ivp;
1115296341Sdelphij                    *(ivp++) = c, nbytes--;
1116296341Sdelphij                }
1117296341Sdelphij            } else {
1118296341Sdelphij                padlock_reload_key();
1119296341Sdelphij                padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1120296341Sdelphij                padlock_reload_key();
1121296341Sdelphij                while (nbytes) {
1122296341Sdelphij                    *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1123296341Sdelphij                    ivp++, nbytes--;
1124296341Sdelphij                }
1125296341Sdelphij            }
1126296341Sdelphij        }
1127238384Sjkim
1128296341Sdelphij        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1129296341Sdelphij        break;
1130238384Sjkim
1131296341Sdelphij    case EVP_CIPH_OFB_MODE:
1132296341Sdelphij        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1133296341Sdelphij        chunk &= ~(AES_BLOCK_SIZE - 1);
1134296341Sdelphij        if (chunk)
1135296341Sdelphij            do {
1136296341Sdelphij                if (inp_misaligned)
1137296341Sdelphij                    inp = padlock_memcpy(out, in_arg, chunk);
1138296341Sdelphij                else
1139296341Sdelphij                    inp = in_arg;
1140296341Sdelphij                in_arg += chunk;
1141238384Sjkim
1142296341Sdelphij                padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1143238384Sjkim
1144296341Sdelphij                if (out_misaligned)
1145296341Sdelphij                    out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1146296341Sdelphij                else
1147296341Sdelphij                    out = out_arg += chunk;
1148238384Sjkim
1149296341Sdelphij                nbytes -= chunk;
1150296341Sdelphij                chunk = PADLOCK_CHUNK;
1151296341Sdelphij            } while (nbytes >= AES_BLOCK_SIZE);
1152238384Sjkim
1153296341Sdelphij        if (nbytes) {
1154296341Sdelphij            unsigned char *ivp = cdata->iv;
1155238384Sjkim
1156296341Sdelphij            ctx->num = nbytes;
1157296341Sdelphij            padlock_reload_key(); /* empirically found */
1158296341Sdelphij            padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1159296341Sdelphij            padlock_reload_key(); /* empirically found */
1160296341Sdelphij            while (nbytes) {
1161296341Sdelphij                *(out_arg++) = *(in_arg++) ^ *ivp;
1162296341Sdelphij                ivp++, nbytes--;
1163296341Sdelphij            }
1164296341Sdelphij        }
1165238384Sjkim
1166296341Sdelphij        memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1167296341Sdelphij        break;
1168238384Sjkim
1169296341Sdelphij    default:
1170296341Sdelphij        return 0;
1171296341Sdelphij    }
1172238384Sjkim
1173296341Sdelphij    /* Clean the realign buffer if it was used */
1174296341Sdelphij    if (out_misaligned) {
1175296341Sdelphij        volatile unsigned long *p = (void *)out;
1176296341Sdelphij        size_t n = allocated / sizeof(*p);
1177296341Sdelphij        while (n--)
1178296341Sdelphij            *p++ = 0;
1179296341Sdelphij    }
1180238384Sjkim
1181296341Sdelphij    memset(cdata->iv, 0, AES_BLOCK_SIZE);
1182238384Sjkim
1183296341Sdelphij    return 1;
1184238384Sjkim}
1185238384Sjkim
1186296341Sdelphij#   endif                       /* OPENSSL_NO_AES */
1187238384Sjkim
1188238384Sjkim/* ===== Random Number Generator ===== */
1189238384Sjkim/*
1190238384Sjkim * This code is not engaged. The reason is that it does not comply
1191238384Sjkim * with recommendations for VIA RNG usage for secure applications
1192238384Sjkim * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1193238384Sjkim * provide meaningful error control...
1194238384Sjkim */
1195296341Sdelphij/*
1196296341Sdelphij * Wrapper that provides an interface between the API and the raw PadLock
1197296341Sdelphij * RNG
1198296341Sdelphij */
1199296341Sdelphijstatic int padlock_rand_bytes(unsigned char *output, int count)
1200238384Sjkim{
1201296341Sdelphij    unsigned int eax, buf;
1202238384Sjkim
1203296341Sdelphij    while (count >= 8) {
1204296341Sdelphij        eax = padlock_xstore(output, 0);
1205296341Sdelphij        if (!(eax & (1 << 6)))
1206296341Sdelphij            return 0;           /* RNG disabled */
1207296341Sdelphij        /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1208296341Sdelphij        if (eax & (0x1F << 10))
1209296341Sdelphij            return 0;
1210296341Sdelphij        if ((eax & 0x1F) == 0)
1211296341Sdelphij            continue;           /* no data, retry... */
1212296341Sdelphij        if ((eax & 0x1F) != 8)
1213296341Sdelphij            return 0;           /* fatal failure...  */
1214296341Sdelphij        output += 8;
1215296341Sdelphij        count -= 8;
1216296341Sdelphij    }
1217296341Sdelphij    while (count > 0) {
1218296341Sdelphij        eax = padlock_xstore(&buf, 3);
1219296341Sdelphij        if (!(eax & (1 << 6)))
1220296341Sdelphij            return 0;           /* RNG disabled */
1221296341Sdelphij        /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1222296341Sdelphij        if (eax & (0x1F << 10))
1223296341Sdelphij            return 0;
1224296341Sdelphij        if ((eax & 0x1F) == 0)
1225296341Sdelphij            continue;           /* no data, retry... */
1226296341Sdelphij        if ((eax & 0x1F) != 1)
1227296341Sdelphij            return 0;           /* fatal failure...  */
1228296341Sdelphij        *output++ = (unsigned char)buf;
1229296341Sdelphij        count--;
1230296341Sdelphij    }
1231296341Sdelphij    *(volatile unsigned int *)&buf = 0;
1232238384Sjkim
1233296341Sdelphij    return 1;
1234238384Sjkim}
1235238384Sjkim
1236238384Sjkim/* Dummy but necessary function */
1237296341Sdelphijstatic int padlock_rand_status(void)
1238238384Sjkim{
1239296341Sdelphij    return 1;
1240238384Sjkim}
1241238384Sjkim
1242238384Sjkim/* Prepare structure for registration */
1243238384Sjkimstatic RAND_METHOD padlock_rand = {
1244296341Sdelphij    NULL,                       /* seed */
1245296341Sdelphij    padlock_rand_bytes,         /* bytes */
1246296341Sdelphij    NULL,                       /* cleanup */
1247296341Sdelphij    NULL,                       /* add */
1248296341Sdelphij    padlock_rand_bytes,         /* pseudorand */
1249296341Sdelphij    padlock_rand_status,        /* rand status */
1250238384Sjkim};
1251238384Sjkim
1252296341Sdelphij#  else                         /* !COMPILE_HW_PADLOCK */
1253296341Sdelphij#   ifndef OPENSSL_NO_DYNAMIC_ENGINE
1254238384SjkimOPENSSL_EXPORT
1255296341Sdelphij    int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1256238384SjkimOPENSSL_EXPORT
1257296341Sdelphij    int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1258296341Sdelphij{
1259296341Sdelphij    return 0;
1260296341Sdelphij}
1261296341Sdelphij
1262238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN()
1263296341Sdelphij#   endif
1264296341Sdelphij#  endif                        /* COMPILE_HW_PADLOCK */
1265296341Sdelphij# endif                         /* !OPENSSL_NO_HW_PADLOCK */
1266296341Sdelphij#endif                          /* !OPENSSL_NO_HW */
1267