1280304Sjkim/*-
2238384Sjkim * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3238384Sjkim * Written by Michal Ludvig <michal@logix.cz>
4238384Sjkim *            http://www.logix.cz/michal
5238384Sjkim *
6280304Sjkim * Big thanks to Andy Polyakov for a help with optimization,
7280304Sjkim * assembler fixes, port to MS Windows and a lot of other
8238384Sjkim * valuable work on this engine!
9238384Sjkim */
10238384Sjkim
11238384Sjkim/* ====================================================================
12238384Sjkim * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
13238384Sjkim *
14238384Sjkim * Redistribution and use in source and binary forms, with or without
15238384Sjkim * modification, are permitted provided that the following conditions
16238384Sjkim * are met:
17238384Sjkim *
18238384Sjkim * 1. Redistributions of source code must retain the above copyright
19238384Sjkim *    notice, this list of conditions and the following disclaimer.
20238384Sjkim *
21238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright
22238384Sjkim *    notice, this list of conditions and the following disclaimer in
23238384Sjkim *    the documentation and/or other materials provided with the
24238384Sjkim *    distribution.
25238384Sjkim *
26238384Sjkim * 3. All advertising materials mentioning features or use of this
27238384Sjkim *    software must display the following acknowledgment:
28238384Sjkim *    "This product includes software developed by the OpenSSL Project
29238384Sjkim *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30238384Sjkim *
31238384Sjkim * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32238384Sjkim *    endorse or promote products derived from this software without
33238384Sjkim *    prior written permission. For written permission, please contact
34238384Sjkim *    licensing@OpenSSL.org.
35238384Sjkim *
36238384Sjkim * 5. Products derived from this software may not be called "OpenSSL"
37238384Sjkim *    nor may "OpenSSL" appear in their names without prior written
38238384Sjkim *    permission of the OpenSSL Project.
39238384Sjkim *
40238384Sjkim * 6. Redistributions of any form whatsoever must retain the following
41238384Sjkim *    acknowledgment:
42238384Sjkim *    "This product includes software developed by the OpenSSL Project
43238384Sjkim *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44238384Sjkim *
45238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46238384Sjkim * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48238384Sjkim * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
49238384Sjkim * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50238384Sjkim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51238384Sjkim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52238384Sjkim * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54238384Sjkim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55238384Sjkim * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56238384Sjkim * OF THE POSSIBILITY OF SUCH DAMAGE.
57238384Sjkim * ====================================================================
58238384Sjkim *
59238384Sjkim * This product includes cryptographic software written by Eric Young
60238384Sjkim * (eay@cryptsoft.com).  This product includes software written by Tim
61238384Sjkim * Hudson (tjh@cryptsoft.com).
62238384Sjkim *
63238384Sjkim */
64238384Sjkim
65238384Sjkim#include <stdio.h>
66238384Sjkim#include <string.h>
67238384Sjkim
68238384Sjkim#include <openssl/opensslconf.h>
69238384Sjkim#include <openssl/crypto.h>
70238384Sjkim#include <openssl/dso.h>
71238384Sjkim#include <openssl/engine.h>
72238384Sjkim#include <openssl/evp.h>
73238384Sjkim#ifndef OPENSSL_NO_AES
74280304Sjkim# include <openssl/aes.h>
75238384Sjkim#endif
76238384Sjkim#include <openssl/rand.h>
77238384Sjkim#include <openssl/err.h>
78238384Sjkim
79238384Sjkim#ifndef OPENSSL_NO_HW
80280304Sjkim# ifndef OPENSSL_NO_HW_PADLOCK
81238384Sjkim
82238384Sjkim/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83280304Sjkim#  if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84280304Sjkim#   ifndef OPENSSL_NO_DYNAMIC_ENGINE
85238384Sjkim#    define DYNAMIC_ENGINE
86280304Sjkim#   endif
87280304Sjkim#  elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88280304Sjkim#   ifdef ENGINE_DYNAMIC_SUPPORT
89238384Sjkim#    define DYNAMIC_ENGINE
90280304Sjkim#   endif
91280304Sjkim#  else
92280304Sjkim#   error "Only OpenSSL >= 0.9.7 is supported"
93238384Sjkim#  endif
94238384Sjkim
95280304Sjkim/*
96280304Sjkim * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97280304Sjkim * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98280304Sjkim *
99280304Sjkim * In addition, because of the heavy use of inline assembler, compiler choice
100280304Sjkim * is limited to GCC and Microsoft C.
101280304Sjkim */
102280304Sjkim#  undef COMPILE_HW_PADLOCK
103280304Sjkim#  if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104280304Sjkim#   if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105238384Sjkim     (defined(_MSC_VER) && defined(_M_IX86))
106280304Sjkim#    define COMPILE_HW_PADLOCK
107280304Sjkim#   endif
108280304Sjkim#  endif
109238384Sjkim
110280304Sjkim#  ifdef OPENSSL_NO_DYNAMIC_ENGINE
111280304Sjkim#   ifdef COMPILE_HW_PADLOCK
112280304Sjkimstatic ENGINE *ENGINE_padlock(void);
113280304Sjkim#   endif
114238384Sjkim
115280304Sjkimvoid ENGINE_load_padlock(void)
116238384Sjkim{
117238384Sjkim/* On non-x86 CPUs it just returns. */
118280304Sjkim#   ifdef COMPILE_HW_PADLOCK
119280304Sjkim    ENGINE *toadd = ENGINE_padlock();
120280304Sjkim    if (!toadd)
121280304Sjkim        return;
122280304Sjkim    ENGINE_add(toadd);
123280304Sjkim    ENGINE_free(toadd);
124280304Sjkim    ERR_clear_error();
125280304Sjkim#   endif
126238384Sjkim}
127238384Sjkim
128280304Sjkim#  endif
129238384Sjkim
130280304Sjkim#  ifdef COMPILE_HW_PADLOCK
131280304Sjkim/*
132280304Sjkim * We do these includes here to avoid header problems on platforms that do
133280304Sjkim * not have the VIA padlock anyway...
134280304Sjkim */
135280304Sjkim#   include <stdlib.h>
136280304Sjkim#   ifdef _WIN32
137280304Sjkim#    include <malloc.h>
138280304Sjkim#    ifndef alloca
139280304Sjkim#     define alloca _alloca
140280304Sjkim#    endif
141280304Sjkim#   elif defined(__GNUC__)
142280304Sjkim#    ifndef alloca
143280304Sjkim#     define alloca(s) __builtin_alloca(s)
144280304Sjkim#    endif
145280304Sjkim#   endif
146238384Sjkim
147238384Sjkim/* Function for ENGINE detection and control */
148238384Sjkimstatic int padlock_available(void);
149238384Sjkimstatic int padlock_init(ENGINE *e);
150238384Sjkim
151238384Sjkim/* RNG Stuff */
152238384Sjkimstatic RAND_METHOD padlock_rand;
153238384Sjkim
154238384Sjkim/* Cipher Stuff */
155280304Sjkim#   ifndef OPENSSL_NO_AES
156280304Sjkimstatic int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157280304Sjkim                           const int **nids, int nid);
158280304Sjkim#   endif
159238384Sjkim
160238384Sjkim/* Engine names */
161238384Sjkimstatic const char *padlock_id = "padlock";
162238384Sjkimstatic char padlock_name[100];
163238384Sjkim
164238384Sjkim/* Available features */
165280304Sjkimstatic int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166280304Sjkimstatic int padlock_use_rng = 0; /* Random Number Generator */
167280304Sjkim#   ifndef OPENSSL_NO_AES
168238384Sjkimstatic int padlock_aes_align_required = 1;
169280304Sjkim#   endif
170238384Sjkim
171238384Sjkim/* ===== Engine "management" functions ===== */
172238384Sjkim
173238384Sjkim/* Prepare the ENGINE structure for registration */
174280304Sjkimstatic int padlock_bind_helper(ENGINE *e)
175238384Sjkim{
176280304Sjkim    /* Check available features */
177280304Sjkim    padlock_available();
178238384Sjkim
179280304Sjkim#   if 1                        /* disable RNG for now, see commentary in
180280304Sjkim                                 * vicinity of RNG code */
181280304Sjkim    padlock_use_rng = 0;
182280304Sjkim#   endif
183238384Sjkim
184280304Sjkim    /* Generate a nice engine name with available features */
185280304Sjkim    BIO_snprintf(padlock_name, sizeof(padlock_name),
186280304Sjkim                 "VIA PadLock (%s, %s)",
187280304Sjkim                 padlock_use_rng ? "RNG" : "no-RNG",
188280304Sjkim                 padlock_use_ace ? "ACE" : "no-ACE");
189238384Sjkim
190280304Sjkim    /* Register everything or return with an error */
191280304Sjkim    if (!ENGINE_set_id(e, padlock_id) ||
192280304Sjkim        !ENGINE_set_name(e, padlock_name) ||
193280304Sjkim        !ENGINE_set_init_function(e, padlock_init) ||
194280304Sjkim#   ifndef OPENSSL_NO_AES
195280304Sjkim        (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196280304Sjkim#   endif
197280304Sjkim        (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198280304Sjkim        return 0;
199280304Sjkim    }
200238384Sjkim
201280304Sjkim    /* Everything looks good */
202280304Sjkim    return 1;
203238384Sjkim}
204238384Sjkim
205280304Sjkim#   ifdef OPENSSL_NO_DYNAMIC_ENGINE
206238384Sjkim
207238384Sjkim/* Constructor */
208280304Sjkimstatic ENGINE *ENGINE_padlock(void)
209238384Sjkim{
210280304Sjkim    ENGINE *eng = ENGINE_new();
211238384Sjkim
212280304Sjkim    if (!eng) {
213280304Sjkim        return NULL;
214280304Sjkim    }
215238384Sjkim
216280304Sjkim    if (!padlock_bind_helper(eng)) {
217280304Sjkim        ENGINE_free(eng);
218280304Sjkim        return NULL;
219280304Sjkim    }
220238384Sjkim
221280304Sjkim    return eng;
222238384Sjkim}
223238384Sjkim
224280304Sjkim#   endif
225238384Sjkim
226238384Sjkim/* Check availability of the engine */
227280304Sjkimstatic int padlock_init(ENGINE *e)
228238384Sjkim{
229280304Sjkim    return (padlock_use_rng || padlock_use_ace);
230238384Sjkim}
231238384Sjkim
232280304Sjkim/*
233280304Sjkim * This stuff is needed if this ENGINE is being compiled into a
234280304Sjkim * self-contained shared-library.
235238384Sjkim */
236280304Sjkim#   ifdef DYNAMIC_ENGINE
237280304Sjkimstatic int padlock_bind_fn(ENGINE *e, const char *id)
238238384Sjkim{
239280304Sjkim    if (id && (strcmp(id, padlock_id) != 0)) {
240280304Sjkim        return 0;
241280304Sjkim    }
242238384Sjkim
243280304Sjkim    if (!padlock_bind_helper(e)) {
244280304Sjkim        return 0;
245280304Sjkim    }
246238384Sjkim
247280304Sjkim    return 1;
248238384Sjkim}
249238384Sjkim
250238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN()
251280304Sjkim    IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252280304Sjkim#   endif                       /* DYNAMIC_ENGINE */
253238384Sjkim/* ===== Here comes the "real" engine ===== */
254280304Sjkim#   ifndef OPENSSL_NO_AES
255238384Sjkim/* Some AES-related constants */
256280304Sjkim#    define AES_BLOCK_SIZE          16
257280304Sjkim#    define AES_KEY_SIZE_128        16
258280304Sjkim#    define AES_KEY_SIZE_192        24
259280304Sjkim#    define AES_KEY_SIZE_256        32
260280304Sjkim    /*
261280304Sjkim     * Here we store the status information relevant to the current context.
262280304Sjkim     */
263280304Sjkim    /*
264280304Sjkim     * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265280304Sjkim     * the order of items in this structure.  Don't blindly modify, reorder,
266280304Sjkim     * etc!
267280304Sjkim     */
268280304Sjkimstruct padlock_cipher_data {
269280304Sjkim    unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270280304Sjkim    union {
271280304Sjkim        unsigned int pad[4];
272280304Sjkim        struct {
273280304Sjkim            int rounds:4;
274280304Sjkim            int dgst:1;         /* n/a in C3 */
275280304Sjkim            int align:1;        /* n/a in C3 */
276280304Sjkim            int ciphr:1;        /* n/a in C3 */
277280304Sjkim            unsigned int keygen:1;
278280304Sjkim            int interm:1;
279280304Sjkim            unsigned int encdec:1;
280280304Sjkim            int ksize:2;
281280304Sjkim        } b;
282280304Sjkim    } cword;                    /* Control word */
283280304Sjkim    AES_KEY ks;                 /* Encryption key */
284238384Sjkim};
285238384Sjkim
286238384Sjkim/*
287238384Sjkim * Essentially this variable belongs in thread local storage.
288238384Sjkim * Having this variable global on the other hand can only cause
289238384Sjkim * few bogus key reloads [if any at all on single-CPU system],
290238384Sjkim * so we accept the penatly...
291238384Sjkim */
292238384Sjkimstatic volatile struct padlock_cipher_data *padlock_saved_context;
293280304Sjkim#   endif
294238384Sjkim
295280304Sjkim/*-
296238384Sjkim * =======================================================
297238384Sjkim * Inline assembler section(s).
298238384Sjkim * =======================================================
299238384Sjkim * Order of arguments is chosen to facilitate Windows port
300238384Sjkim * using __fastcall calling convention. If you wish to add
301238384Sjkim * more routines, keep in mind that first __fastcall
302238384Sjkim * argument is passed in %ecx and second - in %edx.
303238384Sjkim * =======================================================
304238384Sjkim */
305280304Sjkim#   if defined(__GNUC__) && __GNUC__>=2
306238384Sjkim/*
307238384Sjkim * As for excessive "push %ebx"/"pop %ebx" found all over.
308238384Sjkim * When generating position-independent code GCC won't let
309238384Sjkim * us use "b" in assembler templates nor even respect "ebx"
310238384Sjkim * in "clobber description." Therefore the trouble...
311238384Sjkim */
312238384Sjkim
313280304Sjkim/*
314280304Sjkim * Helper function - check if a CPUID instruction is available on this CPU
315280304Sjkim */
316280304Sjkimstatic int padlock_insn_cpuid_available(void)
317238384Sjkim{
318280304Sjkim    int result = -1;
319238384Sjkim
320280304Sjkim    /*
321280304Sjkim     * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322280304Sjkim     * CPUID is available.
323280304Sjkim     */
324280304Sjkim    asm volatile ("pushf\n"
325280304Sjkim                  "popl %%eax\n"
326280304Sjkim                  "xorl $0x200000, %%eax\n"
327280304Sjkim                  "movl %%eax, %%ecx\n"
328280304Sjkim                  "andl $0x200000, %%ecx\n"
329280304Sjkim                  "pushl %%eax\n"
330280304Sjkim                  "popf\n"
331280304Sjkim                  "pushf\n"
332280304Sjkim                  "popl %%eax\n"
333280304Sjkim                  "andl $0x200000, %%eax\n"
334280304Sjkim                  "xorl %%eax, %%ecx\n"
335280304Sjkim                  "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336280304Sjkim
337280304Sjkim    return (result == 0);
338238384Sjkim}
339238384Sjkim
340280304Sjkim/*
341280304Sjkim * Load supported features of the CPU to see if the PadLock is available.
342280304Sjkim */
343280304Sjkimstatic int padlock_available(void)
344238384Sjkim{
345280304Sjkim    char vendor_string[16];
346280304Sjkim    unsigned int eax, edx;
347238384Sjkim
348280304Sjkim    /* First check if the CPUID instruction is available at all... */
349280304Sjkim    if (!padlock_insn_cpuid_available())
350280304Sjkim        return 0;
351238384Sjkim
352280304Sjkim    /* Are we running on the Centaur (VIA) CPU? */
353280304Sjkim    eax = 0x00000000;
354280304Sjkim    vendor_string[12] = 0;
355280304Sjkim    asm volatile ("pushl  %%ebx\n"
356280304Sjkim                  "cpuid\n"
357280304Sjkim                  "movl   %%ebx,(%%edi)\n"
358280304Sjkim                  "movl   %%edx,4(%%edi)\n"
359280304Sjkim                  "movl   %%ecx,8(%%edi)\n"
360280304Sjkim                  "popl   %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361280304Sjkim    if (strcmp(vendor_string, "CentaurHauls") != 0)
362280304Sjkim        return 0;
363238384Sjkim
364280304Sjkim    /* Check for Centaur Extended Feature Flags presence */
365280304Sjkim    eax = 0xC0000000;
366280304Sjkim    asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367280304Sjkim    if (eax < 0xC0000001)
368280304Sjkim        return 0;
369238384Sjkim
370280304Sjkim    /* Read the Centaur Extended Feature Flags */
371280304Sjkim    eax = 0xC0000001;
372280304Sjkim    asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373280304Sjkim                  "=d"(edx)::"ecx");
374238384Sjkim
375280304Sjkim    /* Fill up some flags */
376280304Sjkim    padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377280304Sjkim    padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
378238384Sjkim
379280304Sjkim    return padlock_use_ace + padlock_use_rng;
380238384Sjkim}
381238384Sjkim
382280304Sjkim#    ifndef OPENSSL_NO_AES
383280304Sjkim#     ifndef AES_ASM
384238384Sjkim/* Our own htonl()/ntohl() */
385280304Sjkimstatic inline void padlock_bswapl(AES_KEY *ks)
386238384Sjkim{
387280304Sjkim    size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388280304Sjkim    unsigned int *key = ks->rd_key;
389238384Sjkim
390280304Sjkim    while (i--) {
391280304Sjkim        asm volatile ("bswapl %0":"+r" (*key));
392280304Sjkim        key++;
393280304Sjkim    }
394238384Sjkim}
395280304Sjkim#     endif
396280304Sjkim#    endif
397238384Sjkim
398280304Sjkim/*
399280304Sjkim * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400280304Sjkim * stack clears EFLAGS[30] which does the trick.
401280304Sjkim */
402280304Sjkimstatic inline void padlock_reload_key(void)
403238384Sjkim{
404280304Sjkim    asm volatile ("pushfl; popfl");
405238384Sjkim}
406238384Sjkim
407280304Sjkim#    ifndef OPENSSL_NO_AES
408238384Sjkim/*
409238384Sjkim * This is heuristic key context tracing. At first one
410238384Sjkim * believes that one should use atomic swap instructions,
411238384Sjkim * but it's not actually necessary. Point is that if
412238384Sjkim * padlock_saved_context was changed by another thread
413238384Sjkim * after we've read it and before we compare it with cdata,
414238384Sjkim * our key *shall* be reloaded upon thread context switch
415238384Sjkim * and we are therefore set in either case...
416238384Sjkim */
417280304Sjkimstatic inline void padlock_verify_context(struct padlock_cipher_data *cdata)
418238384Sjkim{
419280304Sjkim    asm volatile ("pushfl\n"
420280304Sjkim                  "       btl     $30,(%%esp)\n"
421280304Sjkim                  "       jnc     1f\n"
422280304Sjkim                  "       cmpl    %2,%1\n"
423280304Sjkim                  "       je      1f\n"
424280304Sjkim                  "       popfl\n"
425280304Sjkim                  "       subl    $4,%%esp\n"
426280304Sjkim                  "1:     addl    $4,%%esp\n"
427280304Sjkim                  "       movl    %2,%0":"+m" (padlock_saved_context)
428280304Sjkim                  :"r"(padlock_saved_context), "r"(cdata):"cc");
429238384Sjkim}
430238384Sjkim
431238384Sjkim/* Template for padlock_xcrypt_* modes */
432280304Sjkim/*
433280304Sjkim * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434280304Sjkim * of the 'padlock_cipher_data' structure.
435238384Sjkim */
436280304Sjkim#     define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)     \
437280304Sjkimstatic inline void *name(size_t cnt,            \
438280304Sjkim        struct padlock_cipher_data *cdata,      \
439280304Sjkim        void *out, const void *inp)             \
440280304Sjkim{       void *iv;                               \
441280304Sjkim        asm volatile ( "pushl   %%ebx\n"        \
442280304Sjkim                "       leal    16(%0),%%edx\n" \
443280304Sjkim                "       leal    32(%0),%%ebx\n" \
444280304Sjkim                        rep_xcrypt "\n"         \
445280304Sjkim                "       popl    %%ebx"          \
446280304Sjkim                : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447280304Sjkim                : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
448280304Sjkim                : "edx", "cc", "memory");       \
449280304Sjkim        return iv;                              \
450238384Sjkim}
451238384Sjkim
452238384Sjkim/* Generate all functions with appropriate opcodes */
453280304Sjkim/* rep xcryptecb */
454280304SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455280304Sjkim/* rep xcryptcbc */
456280304Sjkim    PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457280304Sjkim/* rep xcryptcfb */
458280304Sjkim    PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459280304Sjkim/* rep xcryptofb */
460280304Sjkim    PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461280304Sjkim#    endif
462238384Sjkim/* The RNG call itself */
463280304Sjkimstatic inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
464238384Sjkim{
465280304Sjkim    unsigned int eax_out;
466238384Sjkim
467280304Sjkim    asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468280304Sjkim                  :"=a" (eax_out), "=m"(*(unsigned *)addr)
469280304Sjkim                  :"D"(addr), "d"(edx_in)
470280304Sjkim        );
471238384Sjkim
472280304Sjkim    return eax_out;
473238384Sjkim}
474238384Sjkim
475280304Sjkim/*
476280304Sjkim * Why not inline 'rep movsd'? I failed to find information on what value in
477280304Sjkim * Direction Flag one can expect and consequently have to apply
478280304Sjkim * "better-safe-than-sorry" approach and assume "undefined." I could
479280304Sjkim * explicitly clear it and restore the original value upon return from
480280304Sjkim * padlock_aes_cipher, but it's presumably too much trouble for too little
481280304Sjkim * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482280304Sjkim * affected by the Direction Flag and pointers advance toward larger
483280304Sjkim * addresses unconditionally.
484280304Sjkim */
485280304Sjkimstatic inline unsigned char *padlock_memcpy(void *dst, const void *src,
486280304Sjkim                                            size_t n)
487238384Sjkim{
488280304Sjkim    long *d = dst;
489280304Sjkim    const long *s = src;
490238384Sjkim
491280304Sjkim    n /= sizeof(*d);
492280304Sjkim    do {
493280304Sjkim        *d++ = *s++;
494280304Sjkim    } while (--n);
495238384Sjkim
496280304Sjkim    return dst;
497238384Sjkim}
498238384Sjkim
499280304Sjkim#   elif defined(_MSC_VER)
500238384Sjkim/*
501238384Sjkim * Unlike GCC these are real functions. In order to minimize impact
502238384Sjkim * on performance we adhere to __fastcall calling convention in
503238384Sjkim * order to get two first arguments passed through %ecx and %edx.
504238384Sjkim * Which kind of suits very well, as instructions in question use
505238384Sjkim * both %ecx and %edx as input:-)
506238384Sjkim */
507280304Sjkim#    define REP_XCRYPT(code)                \
508280304Sjkim        _asm _emit 0xf3                 \
509280304Sjkim        _asm _emit 0x0f _asm _emit 0xa7 \
510280304Sjkim        _asm _emit code
511238384Sjkim
512280304Sjkim/*
513280304Sjkim * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514280304Sjkim * of the 'padlock_cipher_data' structure.
515238384Sjkim */
516280304Sjkim#    define PADLOCK_XCRYPT_ASM(name,code)   \
517280304Sjkimstatic void * __fastcall                \
518280304Sjkim        name (size_t cnt, void *cdata,  \
519280304Sjkim        void *outp, const void *inp)    \
520280304Sjkim{       _asm    mov     eax,edx         \
521280304Sjkim        _asm    lea     edx,[eax+16]    \
522280304Sjkim        _asm    lea     ebx,[eax+32]    \
523280304Sjkim        _asm    mov     edi,outp        \
524280304Sjkim        _asm    mov     esi,inp         \
525280304Sjkim        REP_XCRYPT(code)                \
526238384Sjkim}
527238384Sjkim
528238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
529238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
530238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
531238384SjkimPADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
532238384Sjkim
533280304Sjkimstatic int __fastcall padlock_xstore(void *outp, unsigned int code)
534280304Sjkim{
535280304Sjkim    _asm    mov edi,ecx
536280304Sjkim    _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
537238384Sjkim}
538238384Sjkim
539280304Sjkimstatic void __fastcall padlock_reload_key(void)
540280304Sjkim{
541280304Sjkim    _asm pushfd
542280304Sjkim    _asm popfd
543280304Sjkim}
544238384Sjkim
545280304Sjkimstatic void __fastcall padlock_verify_context(void *cdata)
546280304Sjkim{
547280304Sjkim    _asm    {
548280304Sjkim        pushfd
549280304Sjkim        bt  DWORD PTR[esp],30
550280304Sjkim        jnc skip
551280304Sjkim        cmp ecx,padlock_saved_context
552280304Sjkim        je  skip
553280304Sjkim        popfd
554280304Sjkim        sub esp,4
555280304Sjkim    skip:   add esp,4
556280304Sjkim        mov padlock_saved_context,ecx
557280304Sjkim    }
558238384Sjkim}
559238384Sjkim
560238384Sjkimstatic int
561238384Sjkimpadlock_available(void)
562280304Sjkim{
563280304Sjkim    _asm    {
564280304Sjkim        pushfd
565280304Sjkim        pop eax
566280304Sjkim        mov ecx,eax
567280304Sjkim        xor eax,1<<21
568280304Sjkim        push    eax
569280304Sjkim        popfd
570280304Sjkim        pushfd
571280304Sjkim        pop eax
572280304Sjkim        xor eax,ecx
573280304Sjkim        bt  eax,21
574280304Sjkim        jnc noluck
575280304Sjkim        mov eax,0
576280304Sjkim        cpuid
577280304Sjkim        xor eax,eax
578280304Sjkim        cmp ebx,'tneC'
579280304Sjkim        jne noluck
580280304Sjkim        cmp edx,'Hrua'
581280304Sjkim        jne noluck
582280304Sjkim        cmp ecx,'slua'
583280304Sjkim        jne noluck
584280304Sjkim        mov eax,0xC0000000
585280304Sjkim        cpuid
586280304Sjkim        mov edx,eax
587280304Sjkim        xor eax,eax
588280304Sjkim        cmp edx,0xC0000001
589280304Sjkim        jb  noluck
590280304Sjkim        mov eax,0xC0000001
591280304Sjkim        cpuid
592280304Sjkim        xor eax,eax
593280304Sjkim        bt  edx,6
594280304Sjkim        jnc skip_a
595280304Sjkim        bt  edx,7
596280304Sjkim        jnc skip_a
597280304Sjkim        mov padlock_use_ace,1
598280304Sjkim        inc eax
599280304Sjkim    skip_a: bt  edx,2
600280304Sjkim        jnc skip_r
601280304Sjkim        bt  edx,3
602280304Sjkim        jnc skip_r
603280304Sjkim        mov padlock_use_rng,1
604280304Sjkim        inc eax
605280304Sjkim    skip_r:
606280304Sjkim    noluck:
607280304Sjkim    }
608238384Sjkim}
609238384Sjkim
610280304Sjkimstatic void __fastcall padlock_bswapl(void *key)
611280304Sjkim{
612280304Sjkim    _asm    {
613280304Sjkim        pushfd
614280304Sjkim        cld
615280304Sjkim        mov esi,ecx
616280304Sjkim        mov edi,ecx
617280304Sjkim        mov ecx,60
618280304Sjkim    up: lodsd
619280304Sjkim        bswap   eax
620280304Sjkim        stosd
621280304Sjkim        loop    up
622280304Sjkim        popfd
623280304Sjkim    }
624238384Sjkim}
625238384Sjkim
626280304Sjkim/*
627280304Sjkim * MS actually specifies status of Direction Flag and compiler even manages
628280304Sjkim * to compile following as 'rep movsd' all by itself...
629238384Sjkim */
630280304Sjkim#    define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
631280304Sjkim#   endif
632238384Sjkim/* ===== AES encryption/decryption ===== */
633280304Sjkim#   ifndef OPENSSL_NO_AES
634280304Sjkim#    if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
635280304Sjkim#     define NID_aes_128_cfb NID_aes_128_cfb128
636280304Sjkim#    endif
637280304Sjkim#    if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
638280304Sjkim#     define NID_aes_128_ofb NID_aes_128_ofb128
639280304Sjkim#    endif
640280304Sjkim#    if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
641280304Sjkim#     define NID_aes_192_cfb NID_aes_192_cfb128
642280304Sjkim#    endif
643280304Sjkim#    if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644280304Sjkim#     define NID_aes_192_ofb NID_aes_192_ofb128
645280304Sjkim#    endif
646280304Sjkim#    if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
647280304Sjkim#     define NID_aes_256_cfb NID_aes_256_cfb128
648280304Sjkim#    endif
649280304Sjkim#    if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
650280304Sjkim#     define NID_aes_256_ofb NID_aes_256_ofb128
651280304Sjkim#    endif
652280304Sjkim/*
653280304Sjkim * List of supported ciphers.
654280304Sjkim */ static int padlock_cipher_nids[] = {
655280304Sjkim    NID_aes_128_ecb,
656280304Sjkim    NID_aes_128_cbc,
657280304Sjkim    NID_aes_128_cfb,
658280304Sjkim    NID_aes_128_ofb,
659238384Sjkim
660280304Sjkim    NID_aes_192_ecb,
661280304Sjkim    NID_aes_192_cbc,
662280304Sjkim    NID_aes_192_cfb,
663280304Sjkim    NID_aes_192_ofb,
664238384Sjkim
665280304Sjkim    NID_aes_256_ecb,
666280304Sjkim    NID_aes_256_cbc,
667280304Sjkim    NID_aes_256_cfb,
668280304Sjkim    NID_aes_256_ofb,
669280304Sjkim};
670238384Sjkim
671280304Sjkimstatic int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
672280304Sjkim                                      sizeof(padlock_cipher_nids[0]));
673238384Sjkim
674238384Sjkim/* Function prototypes ... */
675238384Sjkimstatic int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
676280304Sjkim                                const unsigned char *iv, int enc);
677238384Sjkimstatic int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
678280304Sjkim                              const unsigned char *in, size_t nbytes);
679238384Sjkim
680280304Sjkim#    define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +         \
681280304Sjkim        ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )      )
682280304Sjkim#    define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
683280304Sjkim        NEAREST_ALIGNED(ctx->cipher_data))
684238384Sjkim
685280304Sjkim#    define EVP_CIPHER_block_size_ECB       AES_BLOCK_SIZE
686280304Sjkim#    define EVP_CIPHER_block_size_CBC       AES_BLOCK_SIZE
687280304Sjkim#    define EVP_CIPHER_block_size_OFB       1
688280304Sjkim#    define EVP_CIPHER_block_size_CFB       1
689238384Sjkim
690280304Sjkim/*
691280304Sjkim * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
692280304Sjkim * of preprocessor magic :-)
693280304Sjkim */
694280304Sjkim#    define DECLARE_AES_EVP(ksize,lmode,umode)      \
695280304Sjkimstatic const EVP_CIPHER padlock_aes_##ksize##_##lmode = {       \
696280304Sjkim        NID_aes_##ksize##_##lmode,              \
697280304Sjkim        EVP_CIPHER_block_size_##umode,  \
698280304Sjkim        AES_KEY_SIZE_##ksize,           \
699280304Sjkim        AES_BLOCK_SIZE,                 \
700280304Sjkim        0 | EVP_CIPH_##umode##_MODE,    \
701280304Sjkim        padlock_aes_init_key,           \
702280304Sjkim        padlock_aes_cipher,             \
703280304Sjkim        NULL,                           \
704280304Sjkim        sizeof(struct padlock_cipher_data) + 16,        \
705280304Sjkim        EVP_CIPHER_set_asn1_iv,         \
706280304Sjkim        EVP_CIPHER_get_asn1_iv,         \
707280304Sjkim        NULL,                           \
708280304Sjkim        NULL                            \
709238384Sjkim}
710238384Sjkim
711280304SjkimDECLARE_AES_EVP(128, ecb, ECB);
712280304SjkimDECLARE_AES_EVP(128, cbc, CBC);
713280304SjkimDECLARE_AES_EVP(128, cfb, CFB);
714280304SjkimDECLARE_AES_EVP(128, ofb, OFB);
715238384Sjkim
716280304SjkimDECLARE_AES_EVP(192, ecb, ECB);
717280304SjkimDECLARE_AES_EVP(192, cbc, CBC);
718280304SjkimDECLARE_AES_EVP(192, cfb, CFB);
719280304SjkimDECLARE_AES_EVP(192, ofb, OFB);
720238384Sjkim
721280304SjkimDECLARE_AES_EVP(256, ecb, ECB);
722280304SjkimDECLARE_AES_EVP(256, cbc, CBC);
723280304SjkimDECLARE_AES_EVP(256, cfb, CFB);
724280304SjkimDECLARE_AES_EVP(256, ofb, OFB);
725238384Sjkim
726238384Sjkimstatic int
727280304Sjkimpadlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
728280304Sjkim                int nid)
729238384Sjkim{
730280304Sjkim    /* No specific cipher => return a list of supported nids ... */
731280304Sjkim    if (!cipher) {
732280304Sjkim        *nids = padlock_cipher_nids;
733280304Sjkim        return padlock_cipher_nids_num;
734280304Sjkim    }
735238384Sjkim
736280304Sjkim    /* ... or the requested "cipher" otherwise */
737280304Sjkim    switch (nid) {
738280304Sjkim    case NID_aes_128_ecb:
739280304Sjkim        *cipher = &padlock_aes_128_ecb;
740280304Sjkim        break;
741280304Sjkim    case NID_aes_128_cbc:
742280304Sjkim        *cipher = &padlock_aes_128_cbc;
743280304Sjkim        break;
744280304Sjkim    case NID_aes_128_cfb:
745280304Sjkim        *cipher = &padlock_aes_128_cfb;
746280304Sjkim        break;
747280304Sjkim    case NID_aes_128_ofb:
748280304Sjkim        *cipher = &padlock_aes_128_ofb;
749280304Sjkim        break;
750238384Sjkim
751280304Sjkim    case NID_aes_192_ecb:
752280304Sjkim        *cipher = &padlock_aes_192_ecb;
753280304Sjkim        break;
754280304Sjkim    case NID_aes_192_cbc:
755280304Sjkim        *cipher = &padlock_aes_192_cbc;
756280304Sjkim        break;
757280304Sjkim    case NID_aes_192_cfb:
758280304Sjkim        *cipher = &padlock_aes_192_cfb;
759280304Sjkim        break;
760280304Sjkim    case NID_aes_192_ofb:
761280304Sjkim        *cipher = &padlock_aes_192_ofb;
762280304Sjkim        break;
763238384Sjkim
764280304Sjkim    case NID_aes_256_ecb:
765280304Sjkim        *cipher = &padlock_aes_256_ecb;
766280304Sjkim        break;
767280304Sjkim    case NID_aes_256_cbc:
768280304Sjkim        *cipher = &padlock_aes_256_cbc;
769280304Sjkim        break;
770280304Sjkim    case NID_aes_256_cfb:
771280304Sjkim        *cipher = &padlock_aes_256_cfb;
772280304Sjkim        break;
773280304Sjkim    case NID_aes_256_ofb:
774280304Sjkim        *cipher = &padlock_aes_256_ofb;
775280304Sjkim        break;
776238384Sjkim
777280304Sjkim    default:
778280304Sjkim        /* Sorry, we don't support this NID */
779280304Sjkim        *cipher = NULL;
780280304Sjkim        return 0;
781280304Sjkim    }
782238384Sjkim
783280304Sjkim    return 1;
784238384Sjkim}
785238384Sjkim
786238384Sjkim/* Prepare the encryption key for PadLock usage */
787238384Sjkimstatic int
788280304Sjkimpadlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
789280304Sjkim                     const unsigned char *iv, int enc)
790238384Sjkim{
791280304Sjkim    struct padlock_cipher_data *cdata;
792280304Sjkim    int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
793238384Sjkim
794280304Sjkim    if (key == NULL)
795280304Sjkim        return 0;               /* ERROR */
796238384Sjkim
797280304Sjkim    cdata = ALIGNED_CIPHER_DATA(ctx);
798280304Sjkim    memset(cdata, 0, sizeof(struct padlock_cipher_data));
799238384Sjkim
800280304Sjkim    /* Prepare Control word. */
801280304Sjkim    if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
802280304Sjkim        cdata->cword.b.encdec = 0;
803280304Sjkim    else
804280304Sjkim        cdata->cword.b.encdec = (ctx->encrypt == 0);
805280304Sjkim    cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
806280304Sjkim    cdata->cword.b.ksize = (key_len - 128) / 64;
807238384Sjkim
808280304Sjkim    switch (key_len) {
809280304Sjkim    case 128:
810280304Sjkim        /*
811280304Sjkim         * PadLock can generate an extended key for AES128 in hardware
812280304Sjkim         */
813280304Sjkim        memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814280304Sjkim        cdata->cword.b.keygen = 0;
815280304Sjkim        break;
816238384Sjkim
817280304Sjkim    case 192:
818280304Sjkim    case 256:
819280304Sjkim        /*
820280304Sjkim         * Generate an extended AES key in software. Needed for AES192/AES256
821280304Sjkim         */
822280304Sjkim        /*
823280304Sjkim         * Well, the above applies to Stepping 8 CPUs and is listed as
824280304Sjkim         * hardware errata. They most likely will fix it at some point and
825280304Sjkim         * then a check for stepping would be due here.
826280304Sjkim         */
827280304Sjkim        if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828280304Sjkim            EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
829280304Sjkim            AES_set_encrypt_key(key, key_len, &cdata->ks);
830280304Sjkim        else
831280304Sjkim            AES_set_decrypt_key(key, key_len, &cdata->ks);
832280304Sjkim#    ifndef AES_ASM
833280304Sjkim        /*
834280304Sjkim         * OpenSSL C functions use byte-swapped extended key.
835280304Sjkim         */
836280304Sjkim        padlock_bswapl(&cdata->ks);
837280304Sjkim#    endif
838280304Sjkim        cdata->cword.b.keygen = 1;
839280304Sjkim        break;
840238384Sjkim
841280304Sjkim    default:
842280304Sjkim        /* ERROR */
843280304Sjkim        return 0;
844280304Sjkim    }
845238384Sjkim
846280304Sjkim    /*
847280304Sjkim     * This is done to cover for cases when user reuses the
848280304Sjkim     * context for new key. The catch is that if we don't do
849280304Sjkim     * this, padlock_eas_cipher might proceed with old key...
850280304Sjkim     */
851280304Sjkim    padlock_reload_key();
852238384Sjkim
853280304Sjkim    return 1;
854238384Sjkim}
855238384Sjkim
856280304Sjkim/*-
857238384Sjkim * Simplified version of padlock_aes_cipher() used when
858238384Sjkim * 1) both input and output buffers are at aligned addresses.
859238384Sjkim * or when
860238384Sjkim * 2) running on a newer CPU that doesn't require aligned buffers.
861238384Sjkim */
862238384Sjkimstatic int
863238384Sjkimpadlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
864280304Sjkim                              const unsigned char *in_arg, size_t nbytes)
865238384Sjkim{
866280304Sjkim    struct padlock_cipher_data *cdata;
867280304Sjkim    void *iv;
868238384Sjkim
869280304Sjkim    cdata = ALIGNED_CIPHER_DATA(ctx);
870280304Sjkim    padlock_verify_context(cdata);
871238384Sjkim
872280304Sjkim    switch (EVP_CIPHER_CTX_mode(ctx)) {
873280304Sjkim    case EVP_CIPH_ECB_MODE:
874280304Sjkim        padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
875280304Sjkim        break;
876238384Sjkim
877280304Sjkim    case EVP_CIPH_CBC_MODE:
878280304Sjkim        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879280304Sjkim        iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
880280304Sjkim                                in_arg);
881280304Sjkim        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882280304Sjkim        break;
883238384Sjkim
884280304Sjkim    case EVP_CIPH_CFB_MODE:
885280304Sjkim        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886280304Sjkim        iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
887280304Sjkim                                in_arg);
888280304Sjkim        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
889280304Sjkim        break;
890238384Sjkim
891280304Sjkim    case EVP_CIPH_OFB_MODE:
892280304Sjkim        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
893280304Sjkim        padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
894280304Sjkim        memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
895280304Sjkim        break;
896238384Sjkim
897280304Sjkim    default:
898280304Sjkim        return 0;
899280304Sjkim    }
900238384Sjkim
901280304Sjkim    memset(cdata->iv, 0, AES_BLOCK_SIZE);
902238384Sjkim
903280304Sjkim    return 1;
904238384Sjkim}
905238384Sjkim
906280304Sjkim#    ifndef  PADLOCK_CHUNK
907280304Sjkim#     define PADLOCK_CHUNK  512 /* Must be a power of 2 larger than 16 */
908280304Sjkim#    endif
909280304Sjkim#    if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
910280304Sjkim#     error "insane PADLOCK_CHUNK..."
911280304Sjkim#    endif
912238384Sjkim
913280304Sjkim/*
914280304Sjkim * Re-align the arguments to 16-Bytes boundaries and run the encryption
915280304Sjkim * function itself. This function is not AES-specific.
916280304Sjkim */
917238384Sjkimstatic int
918238384Sjkimpadlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
919280304Sjkim                   const unsigned char *in_arg, size_t nbytes)
920238384Sjkim{
921280304Sjkim    struct padlock_cipher_data *cdata;
922280304Sjkim    const void *inp;
923280304Sjkim    unsigned char *out;
924280304Sjkim    void *iv;
925280304Sjkim    int inp_misaligned, out_misaligned, realign_in_loop;
926280304Sjkim    size_t chunk, allocated = 0;
927238384Sjkim
928280304Sjkim    /*
929280304Sjkim     * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
930280304Sjkim     */
931280304Sjkim    if ((chunk = ctx->num)) {   /* borrow chunk variable */
932280304Sjkim        unsigned char *ivp = ctx->iv;
933238384Sjkim
934280304Sjkim        switch (EVP_CIPHER_CTX_mode(ctx)) {
935280304Sjkim        case EVP_CIPH_CFB_MODE:
936280304Sjkim            if (chunk >= AES_BLOCK_SIZE)
937280304Sjkim                return 0;       /* bogus value */
938238384Sjkim
939280304Sjkim            if (ctx->encrypt)
940280304Sjkim                while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
941280304Sjkim                    ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
942280304Sjkim                    chunk++, nbytes--;
943280304Sjkim            } else
944280304Sjkim                while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
945280304Sjkim                    unsigned char c = *(in_arg++);
946280304Sjkim                    *(out_arg++) = c ^ ivp[chunk];
947280304Sjkim                    ivp[chunk++] = c, nbytes--;
948280304Sjkim                }
949238384Sjkim
950280304Sjkim            ctx->num = chunk % AES_BLOCK_SIZE;
951280304Sjkim            break;
952280304Sjkim        case EVP_CIPH_OFB_MODE:
953280304Sjkim            if (chunk >= AES_BLOCK_SIZE)
954280304Sjkim                return 0;       /* bogus value */
955238384Sjkim
956280304Sjkim            while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
957280304Sjkim                *(out_arg++) = *(in_arg++) ^ ivp[chunk];
958280304Sjkim                chunk++, nbytes--;
959280304Sjkim            }
960238384Sjkim
961280304Sjkim            ctx->num = chunk % AES_BLOCK_SIZE;
962280304Sjkim            break;
963280304Sjkim        }
964280304Sjkim    }
965238384Sjkim
966280304Sjkim    if (nbytes == 0)
967280304Sjkim        return 1;
968280304Sjkim#    if 0
969280304Sjkim    if (nbytes % AES_BLOCK_SIZE)
970280304Sjkim        return 0;               /* are we expected to do tail processing? */
971280304Sjkim#    else
972280304Sjkim    /*
973280304Sjkim     * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
974280304Sjkim     * arbitrary value in byte-oriented modes, such as CFB and OFB...
975280304Sjkim     */
976280304Sjkim#    endif
977238384Sjkim
978280304Sjkim    /*
979280304Sjkim     * VIA promises CPUs that won't require alignment in the future. For now
980280304Sjkim     * padlock_aes_align_required is initialized to 1 and the condition is
981280304Sjkim     * never met...
982280304Sjkim     */
983280304Sjkim    /*
984280304Sjkim     * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
985280304Sjkim     * performance penalties appear to be approximately same as for software
986280304Sjkim     * alignment below or ~3x. They promise to improve it in the future, but
987280304Sjkim     * for now we can just as well pretend that it can only handle aligned
988280304Sjkim     * input...
989280304Sjkim     */
990280304Sjkim    if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
991280304Sjkim        return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992238384Sjkim
993280304Sjkim    inp_misaligned = (((size_t)in_arg) & 0x0F);
994280304Sjkim    out_misaligned = (((size_t)out_arg) & 0x0F);
995238384Sjkim
996280304Sjkim    /*
997280304Sjkim     * Note that even if output is aligned and input not, I still prefer to
998280304Sjkim     * loop instead of copy the whole input and then encrypt in one stroke.
999280304Sjkim     * This is done in order to improve L1 cache utilization...
1000280304Sjkim     */
1001280304Sjkim    realign_in_loop = out_misaligned | inp_misaligned;
1002238384Sjkim
1003280304Sjkim    if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
1004280304Sjkim        return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
1005238384Sjkim
1006280304Sjkim    /* this takes one "if" out of the loops */
1007280304Sjkim    chunk = nbytes;
1008280304Sjkim    chunk %= PADLOCK_CHUNK;
1009280304Sjkim    if (chunk == 0)
1010280304Sjkim        chunk = PADLOCK_CHUNK;
1011238384Sjkim
1012280304Sjkim    if (out_misaligned) {
1013280304Sjkim        /* optmize for small input */
1014280304Sjkim        allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1015280304Sjkim        out = alloca(0x10 + allocated);
1016280304Sjkim        out = NEAREST_ALIGNED(out);
1017280304Sjkim    } else
1018280304Sjkim        out = out_arg;
1019238384Sjkim
1020280304Sjkim    cdata = ALIGNED_CIPHER_DATA(ctx);
1021280304Sjkim    padlock_verify_context(cdata);
1022238384Sjkim
1023280304Sjkim    switch (EVP_CIPHER_CTX_mode(ctx)) {
1024280304Sjkim    case EVP_CIPH_ECB_MODE:
1025280304Sjkim        do {
1026280304Sjkim            if (inp_misaligned)
1027280304Sjkim                inp = padlock_memcpy(out, in_arg, chunk);
1028280304Sjkim            else
1029280304Sjkim                inp = in_arg;
1030280304Sjkim            in_arg += chunk;
1031238384Sjkim
1032280304Sjkim            padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1033238384Sjkim
1034280304Sjkim            if (out_misaligned)
1035280304Sjkim                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1036280304Sjkim            else
1037280304Sjkim                out = out_arg += chunk;
1038238384Sjkim
1039280304Sjkim            nbytes -= chunk;
1040280304Sjkim            chunk = PADLOCK_CHUNK;
1041280304Sjkim        } while (nbytes);
1042280304Sjkim        break;
1043238384Sjkim
1044280304Sjkim    case EVP_CIPH_CBC_MODE:
1045280304Sjkim        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1046280304Sjkim        goto cbc_shortcut;
1047280304Sjkim        do {
1048280304Sjkim            if (iv != cdata->iv)
1049280304Sjkim                memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1050280304Sjkim            chunk = PADLOCK_CHUNK;
1051280304Sjkim cbc_shortcut:                 /* optimize for small input */
1052280304Sjkim            if (inp_misaligned)
1053280304Sjkim                inp = padlock_memcpy(out, in_arg, chunk);
1054280304Sjkim            else
1055280304Sjkim                inp = in_arg;
1056280304Sjkim            in_arg += chunk;
1057238384Sjkim
1058280304Sjkim            iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1059238384Sjkim
1060280304Sjkim            if (out_misaligned)
1061280304Sjkim                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1062280304Sjkim            else
1063280304Sjkim                out = out_arg += chunk;
1064238384Sjkim
1065280304Sjkim        } while (nbytes -= chunk);
1066280304Sjkim        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1067280304Sjkim        break;
1068238384Sjkim
1069280304Sjkim    case EVP_CIPH_CFB_MODE:
1070280304Sjkim        memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1071280304Sjkim        chunk &= ~(AES_BLOCK_SIZE - 1);
1072280304Sjkim        if (chunk)
1073280304Sjkim            goto cfb_shortcut;
1074280304Sjkim        else
1075280304Sjkim            goto cfb_skiploop;
1076280304Sjkim        do {
1077280304Sjkim            if (iv != cdata->iv)
1078280304Sjkim                memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1079280304Sjkim            chunk = PADLOCK_CHUNK;
1080280304Sjkim cfb_shortcut:                 /* optimize for small input */
1081280304Sjkim            if (inp_misaligned)
1082280304Sjkim                inp = padlock_memcpy(out, in_arg, chunk);
1083280304Sjkim            else
1084280304Sjkim                inp = in_arg;
1085280304Sjkim            in_arg += chunk;
1086238384Sjkim
1087280304Sjkim            iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1088238384Sjkim
1089280304Sjkim            if (out_misaligned)
1090280304Sjkim                out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1091280304Sjkim            else
1092280304Sjkim                out = out_arg += chunk;
1093238384Sjkim
1094280304Sjkim            nbytes -= chunk;
1095280304Sjkim        } while (nbytes >= AES_BLOCK_SIZE);
1096238384Sjkim
1097280304Sjkim cfb_skiploop:
1098280304Sjkim        if (nbytes) {
1099280304Sjkim            unsigned char *ivp = cdata->iv;
1100238384Sjkim
1101280304Sjkim            if (iv != ivp) {
1102280304Sjkim                memcpy(ivp, iv, AES_BLOCK_SIZE);
1103280304Sjkim                iv = ivp;
1104280304Sjkim            }
1105280304Sjkim            ctx->num = nbytes;
1106280304Sjkim            if (cdata->cword.b.encdec) {
1107280304Sjkim                cdata->cword.b.encdec = 0;
1108280304Sjkim                padlock_reload_key();
1109280304Sjkim                padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1110280304Sjkim                cdata->cword.b.encdec = 1;
1111280304Sjkim                padlock_reload_key();
1112280304Sjkim                while (nbytes) {
1113280304Sjkim                    unsigned char c = *(in_arg++);
1114280304Sjkim                    *(out_arg++) = c ^ *ivp;
1115280304Sjkim                    *(ivp++) = c, nbytes--;
1116280304Sjkim                }
1117280304Sjkim            } else {
1118280304Sjkim                padlock_reload_key();
1119280304Sjkim                padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1120280304Sjkim                padlock_reload_key();
1121280304Sjkim                while (nbytes) {
1122280304Sjkim                    *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1123280304Sjkim                    ivp++, nbytes--;
1124280304Sjkim                }
1125280304Sjkim            }
1126280304Sjkim        }
1127238384Sjkim
1128280304Sjkim        memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1129280304Sjkim        break;
1130238384Sjkim
1131280304Sjkim    case EVP_CIPH_OFB_MODE:
1132280304Sjkim        memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1133280304Sjkim        chunk &= ~(AES_BLOCK_SIZE - 1);
1134280304Sjkim        if (chunk)
1135280304Sjkim            do {
1136280304Sjkim                if (inp_misaligned)
1137280304Sjkim                    inp = padlock_memcpy(out, in_arg, chunk);
1138280304Sjkim                else
1139280304Sjkim                    inp = in_arg;
1140280304Sjkim                in_arg += chunk;
1141238384Sjkim
1142280304Sjkim                padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1143238384Sjkim
1144280304Sjkim                if (out_misaligned)
1145280304Sjkim                    out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1146280304Sjkim                else
1147280304Sjkim                    out = out_arg += chunk;
1148238384Sjkim
1149280304Sjkim                nbytes -= chunk;
1150280304Sjkim                chunk = PADLOCK_CHUNK;
1151280304Sjkim            } while (nbytes >= AES_BLOCK_SIZE);
1152238384Sjkim
1153280304Sjkim        if (nbytes) {
1154280304Sjkim            unsigned char *ivp = cdata->iv;
1155238384Sjkim
1156280304Sjkim            ctx->num = nbytes;
1157280304Sjkim            padlock_reload_key(); /* empirically found */
1158280304Sjkim            padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1159280304Sjkim            padlock_reload_key(); /* empirically found */
1160280304Sjkim            while (nbytes) {
1161280304Sjkim                *(out_arg++) = *(in_arg++) ^ *ivp;
1162280304Sjkim                ivp++, nbytes--;
1163280304Sjkim            }
1164280304Sjkim        }
1165238384Sjkim
1166280304Sjkim        memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1167280304Sjkim        break;
1168238384Sjkim
1169280304Sjkim    default:
1170280304Sjkim        return 0;
1171280304Sjkim    }
1172238384Sjkim
1173280304Sjkim    /* Clean the realign buffer if it was used */
1174280304Sjkim    if (out_misaligned) {
1175280304Sjkim        volatile unsigned long *p = (void *)out;
1176280304Sjkim        size_t n = allocated / sizeof(*p);
1177280304Sjkim        while (n--)
1178280304Sjkim            *p++ = 0;
1179280304Sjkim    }
1180238384Sjkim
1181280304Sjkim    memset(cdata->iv, 0, AES_BLOCK_SIZE);
1182238384Sjkim
1183280304Sjkim    return 1;
1184238384Sjkim}
1185238384Sjkim
1186280304Sjkim#   endif                       /* OPENSSL_NO_AES */
1187238384Sjkim
1188238384Sjkim/* ===== Random Number Generator ===== */
1189238384Sjkim/*
1190238384Sjkim * This code is not engaged. The reason is that it does not comply
1191238384Sjkim * with recommendations for VIA RNG usage for secure applications
1192238384Sjkim * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1193238384Sjkim * provide meaningful error control...
1194238384Sjkim */
1195280304Sjkim/*
1196280304Sjkim * Wrapper that provides an interface between the API and the raw PadLock
1197280304Sjkim * RNG
1198280304Sjkim */
1199280304Sjkimstatic int padlock_rand_bytes(unsigned char *output, int count)
1200238384Sjkim{
1201280304Sjkim    unsigned int eax, buf;
1202238384Sjkim
1203280304Sjkim    while (count >= 8) {
1204280304Sjkim        eax = padlock_xstore(output, 0);
1205280304Sjkim        if (!(eax & (1 << 6)))
1206280304Sjkim            return 0;           /* RNG disabled */
1207280304Sjkim        /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1208280304Sjkim        if (eax & (0x1F << 10))
1209280304Sjkim            return 0;
1210280304Sjkim        if ((eax & 0x1F) == 0)
1211280304Sjkim            continue;           /* no data, retry... */
1212280304Sjkim        if ((eax & 0x1F) != 8)
1213280304Sjkim            return 0;           /* fatal failure...  */
1214280304Sjkim        output += 8;
1215280304Sjkim        count -= 8;
1216280304Sjkim    }
1217280304Sjkim    while (count > 0) {
1218280304Sjkim        eax = padlock_xstore(&buf, 3);
1219280304Sjkim        if (!(eax & (1 << 6)))
1220280304Sjkim            return 0;           /* RNG disabled */
1221280304Sjkim        /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1222280304Sjkim        if (eax & (0x1F << 10))
1223280304Sjkim            return 0;
1224280304Sjkim        if ((eax & 0x1F) == 0)
1225280304Sjkim            continue;           /* no data, retry... */
1226280304Sjkim        if ((eax & 0x1F) != 1)
1227280304Sjkim            return 0;           /* fatal failure...  */
1228280304Sjkim        *output++ = (unsigned char)buf;
1229280304Sjkim        count--;
1230280304Sjkim    }
1231280304Sjkim    *(volatile unsigned int *)&buf = 0;
1232238384Sjkim
1233280304Sjkim    return 1;
1234238384Sjkim}
1235238384Sjkim
1236238384Sjkim/* Dummy but necessary function */
1237280304Sjkimstatic int padlock_rand_status(void)
1238238384Sjkim{
1239280304Sjkim    return 1;
1240238384Sjkim}
1241238384Sjkim
1242238384Sjkim/* Prepare structure for registration */
1243238384Sjkimstatic RAND_METHOD padlock_rand = {
1244280304Sjkim    NULL,                       /* seed */
1245280304Sjkim    padlock_rand_bytes,         /* bytes */
1246280304Sjkim    NULL,                       /* cleanup */
1247280304Sjkim    NULL,                       /* add */
1248280304Sjkim    padlock_rand_bytes,         /* pseudorand */
1249280304Sjkim    padlock_rand_status,        /* rand status */
1250238384Sjkim};
1251238384Sjkim
1252280304Sjkim#  else                         /* !COMPILE_HW_PADLOCK */
1253280304Sjkim#   ifndef OPENSSL_NO_DYNAMIC_ENGINE
1254238384SjkimOPENSSL_EXPORT
1255280304Sjkim    int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1256238384SjkimOPENSSL_EXPORT
1257280304Sjkim    int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1258280304Sjkim{
1259280304Sjkim    return 0;
1260280304Sjkim}
1261280304Sjkim
1262238384SjkimIMPLEMENT_DYNAMIC_CHECK_FN()
1263280304Sjkim#   endif
1264280304Sjkim#  endif                        /* COMPILE_HW_PADLOCK */
1265280304Sjkim# endif                         /* !OPENSSL_NO_HW_PADLOCK */
1266280304Sjkim#endif                          /* !OPENSSL_NO_HW */
1267