Deleted Added
full compact
e_padlock.c (276864) e_padlock.c (280304)
1/*
1/*-
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
5 *
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
5 *
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
9 */
10
11/* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions

--- 41 unchanged lines hidden (view full) ---

57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
8 * valuable work on this engine!
9 */
10
11/* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions

--- 41 unchanged lines hidden (view full) ---

57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
65
66#include <stdio.h>
67#include <string.h>
68
69#include <openssl/opensslconf.h>
70#include <openssl/crypto.h>
71#include <openssl/dso.h>
72#include <openssl/engine.h>
73#include <openssl/evp.h>
74#ifndef OPENSSL_NO_AES
65#include <stdio.h>
66#include <string.h>
67
68#include <openssl/opensslconf.h>
69#include <openssl/crypto.h>
70#include <openssl/dso.h>
71#include <openssl/engine.h>
72#include <openssl/evp.h>
73#ifndef OPENSSL_NO_AES
75#include
74# include <openssl/aes.h>
76#endif
77#include <openssl/rand.h>
78#include <openssl/err.h>
79
80#ifndef OPENSSL_NO_HW
75#endif
76#include <openssl/rand.h>
77#include <openssl/err.h>
78
79#ifndef OPENSSL_NO_HW
81#ifndef OPENSSL_NO_HW_PADLOCK
80# ifndef OPENSSL_NO_HW_PADLOCK
82
83/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
81
82/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84#if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85# ifndef OPENSSL_NO_DYNAMIC_ENGINE
83# if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84# ifndef OPENSSL_NO_DYNAMIC_ENGINE
86# define DYNAMIC_ENGINE
85# define DYNAMIC_ENGINE
87# endif
88#elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89# ifdef ENGINE_DYNAMIC_SUPPORT
86# endif
87# elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88# ifdef ENGINE_DYNAMIC_SUPPORT
90# define DYNAMIC_ENGINE
89# define DYNAMIC_ENGINE
90# endif
91# else
92# error "Only OpenSSL >= 0.9.7 is supported"
91# endif
93# endif
92#else
93# error "Only OpenSSL >= 0.9.7 is supported"
94#endif
95
94
96/* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97 Not only that it doesn't exist elsewhere, but it
98 even can't be compiled on other platforms!
99
100 In addition, because of the heavy use of inline assembler,
101 compiler choice is limited to GCC and Microsoft C. */
102#undef COMPILE_HW_PADLOCK
103#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
95/*
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98 *
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
101 */
102# undef COMPILE_HW_PADLOCK
103# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
105 (defined(_MSC_VER) && defined(_M_IX86))
106# define COMPILE_HW_PADLOCK
107# endif
108#endif
106# define COMPILE_HW_PADLOCK
107# endif
108# endif
109
109
110#ifdef OPENSSL_NO_DYNAMIC_ENGINE
111#ifdef COMPILE_HW_PADLOCK
112static ENGINE *ENGINE_padlock (void);
113#endif
110# ifdef OPENSSL_NO_DYNAMIC_ENGINE
111# ifdef COMPILE_HW_PADLOCK
112static ENGINE *ENGINE_padlock(void);
113# endif
114
114
115void ENGINE_load_padlock (void)
115void ENGINE_load_padlock(void)
116{
117/* On non-x86 CPUs it just returns. */
116{
117/* On non-x86 CPUs it just returns. */
118#ifdef COMPILE_HW_PADLOCK
119 ENGINE *toadd = ENGINE_padlock ();
120 if (!toadd) return;
121 ENGINE_add (toadd);
122 ENGINE_free (toadd);
123 ERR_clear_error ();
124#endif
118# ifdef COMPILE_HW_PADLOCK
119 ENGINE *toadd = ENGINE_padlock();
120 if (!toadd)
121 return;
122 ENGINE_add(toadd);
123 ENGINE_free(toadd);
124 ERR_clear_error();
125# endif
125}
126
126}
127
127#endif
128# endif
128
129
129#ifdef COMPILE_HW_PADLOCK
130/* We do these includes here to avoid header problems on platforms that
131 do not have the VIA padlock anyway... */
132#include <stdlib.h>
133#ifdef _WIN32
134# include <malloc.h>
135# ifndef alloca
136# define alloca _alloca
137# endif
138#elif defined(__GNUC__)
139# ifndef alloca
140# define alloca(s) __builtin_alloca(s)
141# endif
142#endif
130# ifdef COMPILE_HW_PADLOCK
131/*
132 * We do these includes here to avoid header problems on platforms that do
133 * not have the VIA padlock anyway...
134 */
135# include <stdlib.h>
136# ifdef _WIN32
137# include <malloc.h>
138# ifndef alloca
139# define alloca _alloca
140# endif
141# elif defined(__GNUC__)
142# ifndef alloca
143# define alloca(s) __builtin_alloca(s)
144# endif
145# endif
143
144/* Function for ENGINE detection and control */
145static int padlock_available(void);
146static int padlock_init(ENGINE *e);
147
148/* RNG Stuff */
149static RAND_METHOD padlock_rand;
150
151/* Cipher Stuff */
146
147/* Function for ENGINE detection and control */
148static int padlock_available(void);
149static int padlock_init(ENGINE *e);
150
151/* RNG Stuff */
152static RAND_METHOD padlock_rand;
153
154/* Cipher Stuff */
152#ifndef OPENSSL_NO_AES
153static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
154#endif
155# ifndef OPENSSL_NO_AES
156static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157 const int **nids, int nid);
158# endif
155
156/* Engine names */
157static const char *padlock_id = "padlock";
158static char padlock_name[100];
159
160/* Available features */
159
160/* Engine names */
161static const char *padlock_id = "padlock";
162static char padlock_name[100];
163
164/* Available features */
161static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
162static int padlock_use_rng = 0; /* Random Number Generator */
163#ifndef OPENSSL_NO_AES
165static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166static int padlock_use_rng = 0; /* Random Number Generator */
167# ifndef OPENSSL_NO_AES
164static int padlock_aes_align_required = 1;
168static int padlock_aes_align_required = 1;
165#endif
169# endif
166
167/* ===== Engine "management" functions ===== */
168
169/* Prepare the ENGINE structure for registration */
170
171/* ===== Engine "management" functions ===== */
172
173/* Prepare the ENGINE structure for registration */
170static int
171padlock_bind_helper(ENGINE *e)
174static int padlock_bind_helper(ENGINE *e)
172{
175{
173 /* Check available features */
174 padlock_available();
176 /* Check available features */
177 padlock_available();
175
178
176#if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
177 padlock_use_rng=0;
178#endif
179# if 1 /* disable RNG for now, see commentary in
180 * vicinity of RNG code */
181 padlock_use_rng = 0;
182# endif
179
183
180 /* Generate a nice engine name with available features */
181 BIO_snprintf(padlock_name, sizeof(padlock_name),
182 "VIA PadLock (%s, %s)",
183 padlock_use_rng ? "RNG" : "no-RNG",
184 padlock_use_ace ? "ACE" : "no-ACE");
184 /* Generate a nice engine name with available features */
185 BIO_snprintf(padlock_name, sizeof(padlock_name),
186 "VIA PadLock (%s, %s)",
187 padlock_use_rng ? "RNG" : "no-RNG",
188 padlock_use_ace ? "ACE" : "no-ACE");
185
189
186 /* Register everything or return with an error */
187 if (!ENGINE_set_id(e, padlock_id) ||
188 !ENGINE_set_name(e, padlock_name) ||
190 /* Register everything or return with an error */
191 if (!ENGINE_set_id(e, padlock_id) ||
192 !ENGINE_set_name(e, padlock_name) ||
193 !ENGINE_set_init_function(e, padlock_init) ||
194# ifndef OPENSSL_NO_AES
195 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196# endif
197 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198 return 0;
199 }
189
200
190 !ENGINE_set_init_function(e, padlock_init) ||
191#ifndef OPENSSL_NO_AES
192 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
193#endif
194 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
195 return 0;
196 }
197
198 /* Everything looks good */
199 return 1;
201 /* Everything looks good */
202 return 1;
200}
201
203}
204
202#ifdef OPENSSL_NO_DYNAMIC_ENGINE
205# ifdef OPENSSL_NO_DYNAMIC_ENGINE
203
204/* Constructor */
206
207/* Constructor */
205static ENGINE *
206ENGINE_padlock(void)
208static ENGINE *ENGINE_padlock(void)
207{
209{
208 ENGINE *eng = ENGINE_new();
210 ENGINE *eng = ENGINE_new();
209
211
210 if (!eng) {
211 return NULL;
212 }
212 if (!eng) {
213 return NULL;
214 }
213
215
214 if (!padlock_bind_helper(eng)) {
215 ENGINE_free(eng);
216 return NULL;
217 }
216 if (!padlock_bind_helper(eng)) {
217 ENGINE_free(eng);
218 return NULL;
219 }
218
220
219 return eng;
221 return eng;
220}
221
222}
223
222#endif
224# endif
223
224/* Check availability of the engine */
225
226/* Check availability of the engine */
225static int
226padlock_init(ENGINE *e)
227static int padlock_init(ENGINE *e)
227{
228{
228 return (padlock_use_rng || padlock_use_ace);
229 return (padlock_use_rng || padlock_use_ace);
229}
230
230}
231
231/* This stuff is needed if this ENGINE is being compiled into a self-contained
232 * shared-library.
232/*
233 * This stuff is needed if this ENGINE is being compiled into a
234 * self-contained shared-library.
233 */
235 */
234#ifdef DYNAMIC_ENGINE
235static int
236padlock_bind_fn(ENGINE *e, const char *id)
236# ifdef DYNAMIC_ENGINE
237static int padlock_bind_fn(ENGINE *e, const char *id)
237{
238{
238 if (id && (strcmp(id, padlock_id) != 0)) {
239 return 0;
240 }
239 if (id && (strcmp(id, padlock_id) != 0)) {
240 return 0;
241 }
241
242
242 if (!padlock_bind_helper(e)) {
243 return 0;
244 }
243 if (!padlock_bind_helper(e)) {
244 return 0;
245 }
245
246
246 return 1;
247 return 1;
247}
248
249IMPLEMENT_DYNAMIC_CHECK_FN()
248}
249
250IMPLEMENT_DYNAMIC_CHECK_FN()
250IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
251#endif /* DYNAMIC_ENGINE */
252
251 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252# endif /* DYNAMIC_ENGINE */
253/* ===== Here comes the "real" engine ===== */
253/* ===== Here comes the "real" engine ===== */
254
255#ifndef OPENSSL_NO_AES
254# ifndef OPENSSL_NO_AES
256/* Some AES-related constants */
255/* Some AES-related constants */
257#define AES_BLOCK_SIZE 16
258#define AES_KEY_SIZE_128 16
259#define AES_KEY_SIZE_192 24
260#define AES_KEY_SIZE_256 32
261
262/* Here we store the status information relevant to the
263 current context. */
264/* BIG FAT WARNING:
265 * Inline assembler in PADLOCK_XCRYPT_ASM()
266 * depends on the order of items in this structure.
267 * Don't blindly modify, reorder, etc!
268 */
269struct padlock_cipher_data
270{
271 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
272 union { unsigned int pad[4];
273 struct {
274 int rounds:4;
275 int dgst:1; /* n/a in C3 */
276 int align:1; /* n/a in C3 */
277 int ciphr:1; /* n/a in C3 */
278 unsigned int keygen:1;
279 int interm:1;
280 unsigned int encdec:1;
281 int ksize:2;
282 } b;
283 } cword; /* Control word */
284 AES_KEY ks; /* Encryption key */
256# define AES_BLOCK_SIZE 16
257# define AES_KEY_SIZE_128 16
258# define AES_KEY_SIZE_192 24
259# define AES_KEY_SIZE_256 32
260 /*
261 * Here we store the status information relevant to the current context.
262 */
263 /*
264 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265 * the order of items in this structure. Don't blindly modify, reorder,
266 * etc!
267 */
268struct padlock_cipher_data {
269 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270 union {
271 unsigned int pad[4];
272 struct {
273 int rounds:4;
274 int dgst:1; /* n/a in C3 */
275 int align:1; /* n/a in C3 */
276 int ciphr:1; /* n/a in C3 */
277 unsigned int keygen:1;
278 int interm:1;
279 unsigned int encdec:1;
280 int ksize:2;
281 } b;
282 } cword; /* Control word */
283 AES_KEY ks; /* Encryption key */
285};
286
287/*
288 * Essentially this variable belongs in thread local storage.
289 * Having this variable global on the other hand can only cause
290 * few bogus key reloads [if any at all on single-CPU system],
291 * so we accept the penatly...
292 */
293static volatile struct padlock_cipher_data *padlock_saved_context;
284};
285
286/*
287 * Essentially this variable belongs in thread local storage.
288 * Having this variable global on the other hand can only cause
289 * few bogus key reloads [if any at all on single-CPU system],
290 * so we accept the penatly...
291 */
292static volatile struct padlock_cipher_data *padlock_saved_context;
294#endif
293# endif
295
294
296/*
295/*-
297 * =======================================================
298 * Inline assembler section(s).
299 * =======================================================
300 * Order of arguments is chosen to facilitate Windows port
301 * using __fastcall calling convention. If you wish to add
302 * more routines, keep in mind that first __fastcall
303 * argument is passed in %ecx and second - in %edx.
304 * =======================================================
305 */
296 * =======================================================
297 * Inline assembler section(s).
298 * =======================================================
299 * Order of arguments is chosen to facilitate Windows port
300 * using __fastcall calling convention. If you wish to add
301 * more routines, keep in mind that first __fastcall
302 * argument is passed in %ecx and second - in %edx.
303 * =======================================================
304 */
306#if defined(__GNUC__) && __GNUC__>=2
305# if defined(__GNUC__) && __GNUC__>=2
307/*
308 * As for excessive "push %ebx"/"pop %ebx" found all over.
309 * When generating position-independent code GCC won't let
310 * us use "b" in assembler templates nor even respect "ebx"
311 * in "clobber description." Therefore the trouble...
312 */
313
306/*
307 * As for excessive "push %ebx"/"pop %ebx" found all over.
308 * When generating position-independent code GCC won't let
309 * us use "b" in assembler templates nor even respect "ebx"
310 * in "clobber description." Therefore the trouble...
311 */
312
314/* Helper function - check if a CPUID instruction
315 is available on this CPU */
316static int
317padlock_insn_cpuid_available(void)
313/*
314 * Helper function - check if a CPUID instruction is available on this CPU
315 */
316static int padlock_insn_cpuid_available(void)
318{
317{
319 int result = -1;
318 int result = -1;
320
319
321 /* We're checking if the bit #21 of EFLAGS
322 can be toggled. If yes = CPUID is available. */
323 asm volatile (
324 "pushf\n"
325 "popl %%eax\n"
326 "xorl $0x200000, %%eax\n"
327 "movl %%eax, %%ecx\n"
328 "andl $0x200000, %%ecx\n"
329 "pushl %%eax\n"
330 "popf\n"
331 "pushf\n"
332 "popl %%eax\n"
333 "andl $0x200000, %%eax\n"
334 "xorl %%eax, %%ecx\n"
335 "movl %%ecx, %0\n"
336 : "=r" (result) : : "eax", "ecx");
337
338 return (result == 0);
320 /*
321 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322 * CPUID is available.
323 */
324 asm volatile ("pushf\n"
325 "popl %%eax\n"
326 "xorl $0x200000, %%eax\n"
327 "movl %%eax, %%ecx\n"
328 "andl $0x200000, %%ecx\n"
329 "pushl %%eax\n"
330 "popf\n"
331 "pushf\n"
332 "popl %%eax\n"
333 "andl $0x200000, %%eax\n"
334 "xorl %%eax, %%ecx\n"
335 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336
337 return (result == 0);
339}
340
338}
339
341/* Load supported features of the CPU to see if
342 the PadLock is available. */
343static int
344padlock_available(void)
340/*
341 * Load supported features of the CPU to see if the PadLock is available.
342 */
343static int padlock_available(void)
345{
344{
346 char vendor_string[16];
347 unsigned int eax, edx;
345 char vendor_string[16];
346 unsigned int eax, edx;
348
347
349 /* First check if the CPUID instruction is available at all... */
350 if (! padlock_insn_cpuid_available())
351 return 0;
348 /* First check if the CPUID instruction is available at all... */
349 if (!padlock_insn_cpuid_available())
350 return 0;
352
351
353 /* Are we running on the Centaur (VIA) CPU? */
354 eax = 0x00000000;
355 vendor_string[12] = 0;
356 asm volatile (
357 "pushl %%ebx\n"
358 "cpuid\n"
359 "movl %%ebx,(%%edi)\n"
360 "movl %%edx,4(%%edi)\n"
361 "movl %%ecx,8(%%edi)\n"
362 "popl %%ebx"
363 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
364 if (strcmp(vendor_string, "CentaurHauls") != 0)
365 return 0;
352 /* Are we running on the Centaur (VIA) CPU? */
353 eax = 0x00000000;
354 vendor_string[12] = 0;
355 asm volatile ("pushl %%ebx\n"
356 "cpuid\n"
357 "movl %%ebx,(%%edi)\n"
358 "movl %%edx,4(%%edi)\n"
359 "movl %%ecx,8(%%edi)\n"
360 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361 if (strcmp(vendor_string, "CentaurHauls") != 0)
362 return 0;
366
363
367 /* Check for Centaur Extended Feature Flags presence */
368 eax = 0xC0000000;
369 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
370 : "+a"(eax) : : "ecx", "edx");
371 if (eax < 0xC0000001)
372 return 0;
364 /* Check for Centaur Extended Feature Flags presence */
365 eax = 0xC0000000;
366 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367 if (eax < 0xC0000001)
368 return 0;
373
369
374 /* Read the Centaur Extended Feature Flags */
375 eax = 0xC0000001;
376 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
377 : "+a"(eax), "=d"(edx) : : "ecx");
370 /* Read the Centaur Extended Feature Flags */
371 eax = 0xC0000001;
372 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373 "=d"(edx)::"ecx");
378
374
379 /* Fill up some flags */
380 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
381 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
375 /* Fill up some flags */
376 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
382
378
383 return padlock_use_ace + padlock_use_rng;
379 return padlock_use_ace + padlock_use_rng;
384}
385
380}
381
386#ifndef OPENSSL_NO_AES
387#ifndef AES_ASM
382# ifndef OPENSSL_NO_AES
383# ifndef AES_ASM
388/* Our own htonl()/ntohl() */
384/* Our own htonl()/ntohl() */
389static inline void
390padlock_bswapl(AES_KEY *ks)
385static inline void padlock_bswapl(AES_KEY *ks)
391{
386{
392 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
393 unsigned int *key = ks->rd_key;
387 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388 unsigned int *key = ks->rd_key;
394
389
395 while (i--) {
396 asm volatile ("bswapl %0" : "+r"(*key));
397 key++;
398 }
390 while (i--) {
391 asm volatile ("bswapl %0":"+r" (*key));
392 key++;
393 }
399}
394}
400#endif
401#endif
395# endif
396# endif
402
397
403/* Force key reload from memory to the CPU microcode.
404 Loading EFLAGS from the stack clears EFLAGS[30]
405 which does the trick. */
406static inline void
407padlock_reload_key(void)
398/*
399 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400 * stack clears EFLAGS[30] which does the trick.
401 */
402static inline void padlock_reload_key(void)
408{
403{
409 asm volatile ("pushfl; popfl");
404 asm volatile ("pushfl; popfl");
410}
411
405}
406
412#ifndef OPENSSL_NO_AES
407# ifndef OPENSSL_NO_AES
413/*
414 * This is heuristic key context tracing. At first one
415 * believes that one should use atomic swap instructions,
416 * but it's not actually necessary. Point is that if
417 * padlock_saved_context was changed by another thread
418 * after we've read it and before we compare it with cdata,
419 * our key *shall* be reloaded upon thread context switch
420 * and we are therefore set in either case...
421 */
408/*
409 * This is heuristic key context tracing. At first one
410 * believes that one should use atomic swap instructions,
411 * but it's not actually necessary. Point is that if
412 * padlock_saved_context was changed by another thread
413 * after we've read it and before we compare it with cdata,
414 * our key *shall* be reloaded upon thread context switch
415 * and we are therefore set in either case...
416 */
422static inline void
423padlock_verify_context(struct padlock_cipher_data *cdata)
417static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
424{
418{
425 asm volatile (
426 "pushfl\n"
427" btl $30,(%%esp)\n"
428" jnc 1f\n"
429" cmpl %2,%1\n"
430" je 1f\n"
431" popfl\n"
432" subl $4,%%esp\n"
433"1: addl $4,%%esp\n"
434" movl %2,%0"
435 :"+m"(padlock_saved_context)
436 : "r"(padlock_saved_context), "r"(cdata) : "cc");
419 asm volatile ("pushfl\n"
420 " btl $30,(%%esp)\n"
421 " jnc 1f\n"
422 " cmpl %2,%1\n"
423 " je 1f\n"
424 " popfl\n"
425 " subl $4,%%esp\n"
426 "1: addl $4,%%esp\n"
427 " movl %2,%0":"+m" (padlock_saved_context)
428 :"r"(padlock_saved_context), "r"(cdata):"cc");
437}
438
439/* Template for padlock_xcrypt_* modes */
429}
430
431/* Template for padlock_xcrypt_* modes */
440/* BIG FAT WARNING:
441 * The offsets used with 'leal' instructions
442 * describe items of the 'padlock_cipher_data'
443 * structure.
432/*
433 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434 * of the 'padlock_cipher_data' structure.
444 */
435 */
445#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
446static inline void *name(size_t cnt, \
447 struct padlock_cipher_data *cdata, \
448 void *out, const void *inp) \
449{ void *iv; \
450 asm volatile ( "pushl %%ebx\n" \
451 " leal 16(%0),%%edx\n" \
452 " leal 32(%0),%%ebx\n" \
453 rep_xcrypt "\n" \
454 " popl %%ebx" \
455 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
456 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
457 : "edx", "cc", "memory"); \
458 return iv; \
436# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
437static inline void *name(size_t cnt, \
438 struct padlock_cipher_data *cdata, \
439 void *out, const void *inp) \
440{ void *iv; \
441 asm volatile ( "pushl %%ebx\n" \
442 " leal 16(%0),%%edx\n" \
443 " leal 32(%0),%%ebx\n" \
444 rep_xcrypt "\n" \
445 " popl %%ebx" \
446 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
448 : "edx", "cc", "memory"); \
449 return iv; \
459}
460
461/* Generate all functions with appropriate opcodes */
450}
451
452/* Generate all functions with appropriate opcodes */
462PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
463PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
464PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
465PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
466#endif
467
453/* rep xcryptecb */
454PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455/* rep xcryptcbc */
456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457/* rep xcryptcfb */
458 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459/* rep xcryptofb */
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461# endif
468/* The RNG call itself */
462/* The RNG call itself */
469static inline unsigned int
470padlock_xstore(void *addr, unsigned int edx_in)
463static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
471{
464{
472 unsigned int eax_out;
465 unsigned int eax_out;
473
466
474 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
475 : "=a"(eax_out),"=m"(*(unsigned *)addr)
476 : "D"(addr), "d" (edx_in)
477 );
467 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468 :"=a" (eax_out), "=m"(*(unsigned *)addr)
469 :"D"(addr), "d"(edx_in)
470 );
478
471
479 return eax_out;
472 return eax_out;
480}
481
473}
474
482/* Why not inline 'rep movsd'? I failed to find information on what
483 * value in Direction Flag one can expect and consequently have to
484 * apply "better-safe-than-sorry" approach and assume "undefined."
485 * I could explicitly clear it and restore the original value upon
486 * return from padlock_aes_cipher, but it's presumably too much
487 * trouble for too little gain...
488 *
489 * In case you wonder 'rep xcrypt*' instructions above are *not*
490 * affected by the Direction Flag and pointers advance toward
491 * larger addresses unconditionally.
492 */
493static inline unsigned char *
494padlock_memcpy(void *dst,const void *src,size_t n)
475/*
476 * Why not inline 'rep movsd'? I failed to find information on what value in
477 * Direction Flag one can expect and consequently have to apply
478 * "better-safe-than-sorry" approach and assume "undefined." I could
479 * explicitly clear it and restore the original value upon return from
480 * padlock_aes_cipher, but it's presumably too much trouble for too little
481 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482 * affected by the Direction Flag and pointers advance toward larger
483 * addresses unconditionally.
484 */
485static inline unsigned char *padlock_memcpy(void *dst, const void *src,
486 size_t n)
495{
487{
496 long *d=dst;
497 const long *s=src;
488 long *d = dst;
489 const long *s = src;
498
490
499 n /= sizeof(*d);
500 do { *d++ = *s++; } while (--n);
491 n /= sizeof(*d);
492 do {
493 *d++ = *s++;
494 } while (--n);
501
495
502 return dst;
496 return dst;
503}
504
497}
498
505#elif defined(_MSC_VER)
499# elif defined(_MSC_VER)
506/*
507 * Unlike GCC these are real functions. In order to minimize impact
508 * on performance we adhere to __fastcall calling convention in
509 * order to get two first arguments passed through %ecx and %edx.
510 * Which kind of suits very well, as instructions in question use
511 * both %ecx and %edx as input:-)
512 */
500/*
501 * Unlike GCC these are real functions. In order to minimize impact
502 * on performance we adhere to __fastcall calling convention in
503 * order to get two first arguments passed through %ecx and %edx.
504 * Which kind of suits very well, as instructions in question use
505 * both %ecx and %edx as input:-)
506 */
513#define REP_XCRYPT(code) \
514 _asm _emit 0xf3 \
515 _asm _emit 0x0f _asm _emit 0xa7 \
516 _asm _emit code
507# define REP_XCRYPT(code) \
508 _asm _emit 0xf3 \
509 _asm _emit 0x0f _asm _emit 0xa7 \
510 _asm _emit code
517
511
518/* BIG FAT WARNING:
519 * The offsets used with 'lea' instructions
520 * describe items of the 'padlock_cipher_data'
521 * structure.
512/*
513 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514 * of the 'padlock_cipher_data' structure.
522 */
515 */
523#define PADLOCK_XCRYPT_ASM(name,code) \
524static void * __fastcall \
525 name (size_t cnt, void *cdata, \
526 void *outp, const void *inp) \
527{ _asm mov eax,edx \
528 _asm lea edx,[eax+16] \
529 _asm lea ebx,[eax+32] \
530 _asm mov edi,outp \
531 _asm mov esi,inp \
532 REP_XCRYPT(code) \
516# define PADLOCK_XCRYPT_ASM(name,code) \
517static void * __fastcall \
518 name (size_t cnt, void *cdata, \
519 void *outp, const void *inp) \
520{ _asm mov eax,edx \
521 _asm lea edx,[eax+16] \
522 _asm lea ebx,[eax+32] \
523 _asm mov edi,outp \
524 _asm mov esi,inp \
525 REP_XCRYPT(code) \
533}
534
535PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
536PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
537PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
538PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
539
526}
527
528PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
529PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
530PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
531PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
532
540static int __fastcall
541padlock_xstore(void *outp,unsigned int code)
542{ _asm mov edi,ecx
543 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
533static int __fastcall padlock_xstore(void *outp, unsigned int code)
534{
535 _asm mov edi,ecx
536 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
544}
545
537}
538
546static void __fastcall
547padlock_reload_key(void)
548{ _asm pushfd _asm popfd }
539static void __fastcall padlock_reload_key(void)
540{
541 _asm pushfd
542 _asm popfd
543}
549
544
550static void __fastcall
551padlock_verify_context(void *cdata)
552{ _asm {
553 pushfd
554 bt DWORD PTR[esp],30
555 jnc skip
556 cmp ecx,padlock_saved_context
557 je skip
558 popfd
559 sub esp,4
560 skip: add esp,4
561 mov padlock_saved_context,ecx
562 }
545static void __fastcall padlock_verify_context(void *cdata)
546{
547 _asm {
548 pushfd
549 bt DWORD PTR[esp],30
550 jnc skip
551 cmp ecx,padlock_saved_context
552 je skip
553 popfd
554 sub esp,4
555 skip: add esp,4
556 mov padlock_saved_context,ecx
557 }
563}
564
565static int
566padlock_available(void)
558}
559
560static int
561padlock_available(void)
567{ _asm {
568 pushfd
569 pop eax
570 mov ecx,eax
571 xor eax,1<<21
572 push eax
573 popfd
574 pushfd
575 pop eax
576 xor eax,ecx
577 bt eax,21
578 jnc noluck
579 mov eax,0
580 cpuid
581 xor eax,eax
582 cmp ebx,'tneC'
583 jne noluck
584 cmp edx,'Hrua'
585 jne noluck
586 cmp ecx,'slua'
587 jne noluck
588 mov eax,0xC0000000
589 cpuid
590 mov edx,eax
591 xor eax,eax
592 cmp edx,0xC0000001
593 jb noluck
594 mov eax,0xC0000001
595 cpuid
596 xor eax,eax
597 bt edx,6
598 jnc skip_a
599 bt edx,7
600 jnc skip_a
601 mov padlock_use_ace,1
602 inc eax
603 skip_a: bt edx,2
604 jnc skip_r
605 bt edx,3
606 jnc skip_r
607 mov padlock_use_rng,1
608 inc eax
609 skip_r:
610 noluck:
611 }
562{
563 _asm {
564 pushfd
565 pop eax
566 mov ecx,eax
567 xor eax,1<<21
568 push eax
569 popfd
570 pushfd
571 pop eax
572 xor eax,ecx
573 bt eax,21
574 jnc noluck
575 mov eax,0
576 cpuid
577 xor eax,eax
578 cmp ebx,'tneC'
579 jne noluck
580 cmp edx,'Hrua'
581 jne noluck
582 cmp ecx,'slua'
583 jne noluck
584 mov eax,0xC0000000
585 cpuid
586 mov edx,eax
587 xor eax,eax
588 cmp edx,0xC0000001
589 jb noluck
590 mov eax,0xC0000001
591 cpuid
592 xor eax,eax
593 bt edx,6
594 jnc skip_a
595 bt edx,7
596 jnc skip_a
597 mov padlock_use_ace,1
598 inc eax
599 skip_a: bt edx,2
600 jnc skip_r
601 bt edx,3
602 jnc skip_r
603 mov padlock_use_rng,1
604 inc eax
605 skip_r:
606 noluck:
607 }
612}
613
608}
609
614static void __fastcall
615padlock_bswapl(void *key)
616{ _asm {
617 pushfd
618 cld
619 mov esi,ecx
620 mov edi,ecx
621 mov ecx,60
622 up: lodsd
623 bswap eax
624 stosd
625 loop up
626 popfd
627 }
610static void __fastcall padlock_bswapl(void *key)
611{
612 _asm {
613 pushfd
614 cld
615 mov esi,ecx
616 mov edi,ecx
617 mov ecx,60
618 up: lodsd
619 bswap eax
620 stosd
621 loop up
622 popfd
623 }
628}
629
624}
625
630/* MS actually specifies status of Direction Flag and compiler even
631 * manages to compile following as 'rep movsd' all by itself...
626/*
627 * MS actually specifies status of Direction Flag and compiler even manages
628 * to compile following as 'rep movsd' all by itself...
632 */
629 */
633#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
634#endif
635
630# define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
631# endif
636/* ===== AES encryption/decryption ===== */
632/* ===== AES encryption/decryption ===== */
637#ifndef OPENSSL_NO_AES
633# ifndef OPENSSL_NO_AES
634# if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
635# define NID_aes_128_cfb NID_aes_128_cfb128
636# endif
637# if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
638# define NID_aes_128_ofb NID_aes_128_ofb128
639# endif
640# if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
641# define NID_aes_192_cfb NID_aes_192_cfb128
642# endif
643# if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644# define NID_aes_192_ofb NID_aes_192_ofb128
645# endif
646# if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
647# define NID_aes_256_cfb NID_aes_256_cfb128
648# endif
649# if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
650# define NID_aes_256_ofb NID_aes_256_ofb128
651# endif
652/*
653 * List of supported ciphers.
654 */ static int padlock_cipher_nids[] = {
655 NID_aes_128_ecb,
656 NID_aes_128_cbc,
657 NID_aes_128_cfb,
658 NID_aes_128_ofb,
638
659
639#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
640#define NID_aes_128_cfb NID_aes_128_cfb128
641#endif
660 NID_aes_192_ecb,
661 NID_aes_192_cbc,
662 NID_aes_192_cfb,
663 NID_aes_192_ofb,
642
664
643#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
644#define NID_aes_128_ofb NID_aes_128_ofb128
645#endif
646
647#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
648#define NID_aes_192_cfb NID_aes_192_cfb128
649#endif
650
651#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
652#define NID_aes_192_ofb NID_aes_192_ofb128
653#endif
654
655#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
656#define NID_aes_256_cfb NID_aes_256_cfb128
657#endif
658
659#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
660#define NID_aes_256_ofb NID_aes_256_ofb128
661#endif
662
663/* List of supported ciphers. */
664static int padlock_cipher_nids[] = {
665 NID_aes_128_ecb,
666 NID_aes_128_cbc,
667 NID_aes_128_cfb,
668 NID_aes_128_ofb,
669
670 NID_aes_192_ecb,
671 NID_aes_192_cbc,
672 NID_aes_192_cfb,
673 NID_aes_192_ofb,
674
675 NID_aes_256_ecb,
676 NID_aes_256_cbc,
677 NID_aes_256_cfb,
678 NID_aes_256_ofb,
665 NID_aes_256_ecb,
666 NID_aes_256_cbc,
667 NID_aes_256_cfb,
668 NID_aes_256_ofb,
679};
669};
680static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
681 sizeof(padlock_cipher_nids[0]));
682
670
671static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
672 sizeof(padlock_cipher_nids[0]));
673
683/* Function prototypes ... */
684static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
674/* Function prototypes ... */
675static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
685 const unsigned char *iv, int enc);
676 const unsigned char *iv, int enc);
686static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
677static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
687 const unsigned char *in, size_t nbytes);
678 const unsigned char *in, size_t nbytes);
688
679
689#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
690 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
691#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
692 NEAREST_ALIGNED(ctx->cipher_data))
680# define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
681 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
682# define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
683 NEAREST_ALIGNED(ctx->cipher_data))
693
684
694#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
695#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
696#define EVP_CIPHER_block_size_OFB 1
697#define EVP_CIPHER_block_size_CFB 1
685# define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
686# define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
687# define EVP_CIPHER_block_size_OFB 1
688# define EVP_CIPHER_block_size_CFB 1
698
689
699/* Declaring so many ciphers by hand would be a pain.
700 Instead introduce a bit of preprocessor magic :-) */
701#define DECLARE_AES_EVP(ksize,lmode,umode) \
702static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
703 NID_aes_##ksize##_##lmode, \
704 EVP_CIPHER_block_size_##umode, \
705 AES_KEY_SIZE_##ksize, \
706 AES_BLOCK_SIZE, \
707 0 | EVP_CIPH_##umode##_MODE, \
708 padlock_aes_init_key, \
709 padlock_aes_cipher, \
710 NULL, \
711 sizeof(struct padlock_cipher_data) + 16, \
712 EVP_CIPHER_set_asn1_iv, \
713 EVP_CIPHER_get_asn1_iv, \
714 NULL, \
715 NULL \
690/*
691 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
692 * of preprocessor magic :-)
693 */
694# define DECLARE_AES_EVP(ksize,lmode,umode) \
695static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
696 NID_aes_##ksize##_##lmode, \
697 EVP_CIPHER_block_size_##umode, \
698 AES_KEY_SIZE_##ksize, \
699 AES_BLOCK_SIZE, \
700 0 | EVP_CIPH_##umode##_MODE, \
701 padlock_aes_init_key, \
702 padlock_aes_cipher, \
703 NULL, \
704 sizeof(struct padlock_cipher_data) + 16, \
705 EVP_CIPHER_set_asn1_iv, \
706 EVP_CIPHER_get_asn1_iv, \
707 NULL, \
708 NULL \
716}
717
709}
710
718DECLARE_AES_EVP(128,ecb,ECB);
719DECLARE_AES_EVP(128,cbc,CBC);
720DECLARE_AES_EVP(128,cfb,CFB);
721DECLARE_AES_EVP(128,ofb,OFB);
711DECLARE_AES_EVP(128, ecb, ECB);
712DECLARE_AES_EVP(128, cbc, CBC);
713DECLARE_AES_EVP(128, cfb, CFB);
714DECLARE_AES_EVP(128, ofb, OFB);
722
715
723DECLARE_AES_EVP(192,ecb,ECB);
724DECLARE_AES_EVP(192,cbc,CBC);
725DECLARE_AES_EVP(192,cfb,CFB);
726DECLARE_AES_EVP(192,ofb,OFB);
716DECLARE_AES_EVP(192, ecb, ECB);
717DECLARE_AES_EVP(192, cbc, CBC);
718DECLARE_AES_EVP(192, cfb, CFB);
719DECLARE_AES_EVP(192, ofb, OFB);
727
720
728DECLARE_AES_EVP(256,ecb,ECB);
729DECLARE_AES_EVP(256,cbc,CBC);
730DECLARE_AES_EVP(256,cfb,CFB);
731DECLARE_AES_EVP(256,ofb,OFB);
721DECLARE_AES_EVP(256, ecb, ECB);
722DECLARE_AES_EVP(256, cbc, CBC);
723DECLARE_AES_EVP(256, cfb, CFB);
724DECLARE_AES_EVP(256, ofb, OFB);
732
733static int
725
726static int
734padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
727padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
728 int nid)
735{
729{
736 /* No specific cipher => return a list of supported nids ... */
737 if (!cipher) {
738 *nids = padlock_cipher_nids;
739 return padlock_cipher_nids_num;
740 }
730 /* No specific cipher => return a list of supported nids ... */
731 if (!cipher) {
732 *nids = padlock_cipher_nids;
733 return padlock_cipher_nids_num;
734 }
741
735
742 /* ... or the requested "cipher" otherwise */
743 switch (nid) {
744 case NID_aes_128_ecb:
745 *cipher = &padlock_aes_128_ecb;
746 break;
747 case NID_aes_128_cbc:
748 *cipher = &padlock_aes_128_cbc;
749 break;
750 case NID_aes_128_cfb:
751 *cipher = &padlock_aes_128_cfb;
752 break;
753 case NID_aes_128_ofb:
754 *cipher = &padlock_aes_128_ofb;
755 break;
736 /* ... or the requested "cipher" otherwise */
737 switch (nid) {
738 case NID_aes_128_ecb:
739 *cipher = &padlock_aes_128_ecb;
740 break;
741 case NID_aes_128_cbc:
742 *cipher = &padlock_aes_128_cbc;
743 break;
744 case NID_aes_128_cfb:
745 *cipher = &padlock_aes_128_cfb;
746 break;
747 case NID_aes_128_ofb:
748 *cipher = &padlock_aes_128_ofb;
749 break;
756
750
757 case NID_aes_192_ecb:
758 *cipher = &padlock_aes_192_ecb;
759 break;
760 case NID_aes_192_cbc:
761 *cipher = &padlock_aes_192_cbc;
762 break;
763 case NID_aes_192_cfb:
764 *cipher = &padlock_aes_192_cfb;
765 break;
766 case NID_aes_192_ofb:
767 *cipher = &padlock_aes_192_ofb;
768 break;
751 case NID_aes_192_ecb:
752 *cipher = &padlock_aes_192_ecb;
753 break;
754 case NID_aes_192_cbc:
755 *cipher = &padlock_aes_192_cbc;
756 break;
757 case NID_aes_192_cfb:
758 *cipher = &padlock_aes_192_cfb;
759 break;
760 case NID_aes_192_ofb:
761 *cipher = &padlock_aes_192_ofb;
762 break;
769
763
770 case NID_aes_256_ecb:
771 *cipher = &padlock_aes_256_ecb;
772 break;
773 case NID_aes_256_cbc:
774 *cipher = &padlock_aes_256_cbc;
775 break;
776 case NID_aes_256_cfb:
777 *cipher = &padlock_aes_256_cfb;
778 break;
779 case NID_aes_256_ofb:
780 *cipher = &padlock_aes_256_ofb;
781 break;
764 case NID_aes_256_ecb:
765 *cipher = &padlock_aes_256_ecb;
766 break;
767 case NID_aes_256_cbc:
768 *cipher = &padlock_aes_256_cbc;
769 break;
770 case NID_aes_256_cfb:
771 *cipher = &padlock_aes_256_cfb;
772 break;
773 case NID_aes_256_ofb:
774 *cipher = &padlock_aes_256_ofb;
775 break;
782
776
783 default:
784 /* Sorry, we don't support this NID */
785 *cipher = NULL;
786 return 0;
787 }
777 default:
778 /* Sorry, we don't support this NID */
779 *cipher = NULL;
780 return 0;
781 }
788
782
789 return 1;
783 return 1;
790}
791
792/* Prepare the encryption key for PadLock usage */
793static int
784}
785
786/* Prepare the encryption key for PadLock usage */
787static int
794padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
795 const unsigned char *iv, int enc)
788padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
789 const unsigned char *iv, int enc)
796{
790{
797 struct padlock_cipher_data *cdata;
798 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
791 struct padlock_cipher_data *cdata;
792 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
799
793
800 if (key==NULL) return 0; /* ERROR */
794 if (key == NULL)
795 return 0; /* ERROR */
801
796
802 cdata = ALIGNED_CIPHER_DATA(ctx);
803 memset(cdata, 0, sizeof(struct padlock_cipher_data));
797 cdata = ALIGNED_CIPHER_DATA(ctx);
798 memset(cdata, 0, sizeof(struct padlock_cipher_data));
804
799
805 /* Prepare Control word. */
806 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
807 cdata->cword.b.encdec = 0;
808 else
809 cdata->cword.b.encdec = (ctx->encrypt == 0);
810 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
811 cdata->cword.b.ksize = (key_len - 128) / 64;
800 /* Prepare Control word. */
801 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
802 cdata->cword.b.encdec = 0;
803 else
804 cdata->cword.b.encdec = (ctx->encrypt == 0);
805 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
806 cdata->cword.b.ksize = (key_len - 128) / 64;
812
807
813 switch(key_len) {
814 case 128:
815 /* PadLock can generate an extended key for
816 AES128 in hardware */
817 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
818 cdata->cword.b.keygen = 0;
819 break;
808 switch (key_len) {
809 case 128:
810 /*
811 * PadLock can generate an extended key for AES128 in hardware
812 */
813 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814 cdata->cword.b.keygen = 0;
815 break;
820
816
821 case 192:
822 case 256:
823 /* Generate an extended AES key in software.
824 Needed for AES192/AES256 */
825 /* Well, the above applies to Stepping 8 CPUs
826 and is listed as hardware errata. They most
827 likely will fix it at some point and then
828 a check for stepping would be due here. */
829 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
830 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
831 enc)
832 AES_set_encrypt_key(key, key_len, &cdata->ks);
833 else
834 AES_set_decrypt_key(key, key_len, &cdata->ks);
835#ifndef AES_ASM
836 /* OpenSSL C functions use byte-swapped extended key. */
837 padlock_bswapl(&cdata->ks);
838#endif
839 cdata->cword.b.keygen = 1;
840 break;
817 case 192:
818 case 256:
819 /*
820 * Generate an extended AES key in software. Needed for AES192/AES256
821 */
822 /*
823 * Well, the above applies to Stepping 8 CPUs and is listed as
824 * hardware errata. They most likely will fix it at some point and
825 * then a check for stepping would be due here.
826 */
827 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
829 AES_set_encrypt_key(key, key_len, &cdata->ks);
830 else
831 AES_set_decrypt_key(key, key_len, &cdata->ks);
832# ifndef AES_ASM
833 /*
834 * OpenSSL C functions use byte-swapped extended key.
835 */
836 padlock_bswapl(&cdata->ks);
837# endif
838 cdata->cword.b.keygen = 1;
839 break;
841
840
842 default:
843 /* ERROR */
844 return 0;
845 }
841 default:
842 /* ERROR */
843 return 0;
844 }
846
845
847 /*
848 * This is done to cover for cases when user reuses the
849 * context for new key. The catch is that if we don't do
850 * this, padlock_eas_cipher might proceed with old key...
851 */
852 padlock_reload_key ();
846 /*
847 * This is done to cover for cases when user reuses the
848 * context for new key. The catch is that if we don't do
849 * this, padlock_eas_cipher might proceed with old key...
850 */
851 padlock_reload_key();
853
852
854 return 1;
853 return 1;
855}
856
854}
855
857/*
856/*-
858 * Simplified version of padlock_aes_cipher() used when
859 * 1) both input and output buffers are at aligned addresses.
860 * or when
861 * 2) running on a newer CPU that doesn't require aligned buffers.
862 */
863static int
864padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
857 * Simplified version of padlock_aes_cipher() used when
858 * 1) both input and output buffers are at aligned addresses.
859 * or when
860 * 2) running on a newer CPU that doesn't require aligned buffers.
861 */
862static int
863padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
865 const unsigned char *in_arg, size_t nbytes)
864 const unsigned char *in_arg, size_t nbytes)
866{
865{
867 struct padlock_cipher_data *cdata;
868 void *iv;
866 struct padlock_cipher_data *cdata;
867 void *iv;
869
868
870 cdata = ALIGNED_CIPHER_DATA(ctx);
871 padlock_verify_context(cdata);
869 cdata = ALIGNED_CIPHER_DATA(ctx);
870 padlock_verify_context(cdata);
872
871
873 switch (EVP_CIPHER_CTX_mode(ctx)) {
874 case EVP_CIPH_ECB_MODE:
875 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
876 break;
872 switch (EVP_CIPHER_CTX_mode(ctx)) {
873 case EVP_CIPH_ECB_MODE:
874 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
875 break;
877
876
878 case EVP_CIPH_CBC_MODE:
879 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
880 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
881 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882 break;
877 case EVP_CIPH_CBC_MODE:
878 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
880 in_arg);
881 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882 break;
883
883
884 case EVP_CIPH_CFB_MODE:
885 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
887 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
888 break;
884 case EVP_CIPH_CFB_MODE:
885 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
887 in_arg);
888 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
889 break;
889
890
890 case EVP_CIPH_OFB_MODE:
891 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
892 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
893 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
894 break;
891 case EVP_CIPH_OFB_MODE:
892 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
893 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
894 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
895 break;
895
896
896 default:
897 return 0;
898 }
897 default:
898 return 0;
899 }
899
900
900 memset(cdata->iv, 0, AES_BLOCK_SIZE);
901 memset(cdata->iv, 0, AES_BLOCK_SIZE);
901
902
902 return 1;
903 return 1;
903}
904
904}
905
905#ifndef PADLOCK_CHUNK
906# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
907#endif
908#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
909# error "insane PADLOCK_CHUNK..."
910#endif
906# ifndef PADLOCK_CHUNK
907# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
908# endif
909# if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
910# error "insane PADLOCK_CHUNK..."
911# endif
911
912
912/* Re-align the arguments to 16-Bytes boundaries and run the
913 encryption function itself. This function is not AES-specific. */
913/*
914 * Re-align the arguments to 16-Bytes boundaries and run the encryption
915 * function itself. This function is not AES-specific.
916 */
914static int
915padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
917static int
918padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
916 const unsigned char *in_arg, size_t nbytes)
919 const unsigned char *in_arg, size_t nbytes)
917{
920{
918 struct padlock_cipher_data *cdata;
919 const void *inp;
920 unsigned char *out;
921 void *iv;
922 int inp_misaligned, out_misaligned, realign_in_loop;
923 size_t chunk, allocated=0;
921 struct padlock_cipher_data *cdata;
922 const void *inp;
923 unsigned char *out;
924 void *iv;
925 int inp_misaligned, out_misaligned, realign_in_loop;
926 size_t chunk, allocated = 0;
924
927
925 /* ctx->num is maintained in byte-oriented modes,
926 such as CFB and OFB... */
927 if ((chunk = ctx->num)) { /* borrow chunk variable */
928 unsigned char *ivp=ctx->iv;
928 /*
929 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
930 */
931 if ((chunk = ctx->num)) { /* borrow chunk variable */
932 unsigned char *ivp = ctx->iv;
929
933
930 switch (EVP_CIPHER_CTX_mode(ctx)) {
931 case EVP_CIPH_CFB_MODE:
932 if (chunk >= AES_BLOCK_SIZE)
933 return 0; /* bogus value */
934 switch (EVP_CIPHER_CTX_mode(ctx)) {
935 case EVP_CIPH_CFB_MODE:
936 if (chunk >= AES_BLOCK_SIZE)
937 return 0; /* bogus value */
934
938
935 if (ctx->encrypt)
936 while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
937 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
938 chunk++, nbytes--;
939 }
940 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
941 unsigned char c = *(in_arg++);
942 *(out_arg++) = c ^ ivp[chunk];
943 ivp[chunk++] = c, nbytes--;
944 }
939 if (ctx->encrypt)
940 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
941 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
942 chunk++, nbytes--;
943 } else
944 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
945 unsigned char c = *(in_arg++);
946 *(out_arg++) = c ^ ivp[chunk];
947 ivp[chunk++] = c, nbytes--;
948 }
945
949
946 ctx->num = chunk%AES_BLOCK_SIZE;
947 break;
948 case EVP_CIPH_OFB_MODE:
949 if (chunk >= AES_BLOCK_SIZE)
950 return 0; /* bogus value */
950 ctx->num = chunk % AES_BLOCK_SIZE;
951 break;
952 case EVP_CIPH_OFB_MODE:
953 if (chunk >= AES_BLOCK_SIZE)
954 return 0; /* bogus value */
951
955
952 while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
953 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
954 chunk++, nbytes--;
955 }
956 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
957 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
958 chunk++, nbytes--;
959 }
956
960
957 ctx->num = chunk%AES_BLOCK_SIZE;
958 break;
959 }
960 }
961 ctx->num = chunk % AES_BLOCK_SIZE;
962 break;
963 }
964 }
961
965
962 if (nbytes == 0)
963 return 1;
964#if 0
965 if (nbytes % AES_BLOCK_SIZE)
966 return 0; /* are we expected to do tail processing? */
967#else
968 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
969 modes and arbitrary value in byte-oriented modes, such as
970 CFB and OFB... */
971#endif
966 if (nbytes == 0)
967 return 1;
968# if 0
969 if (nbytes % AES_BLOCK_SIZE)
970 return 0; /* are we expected to do tail processing? */
971# else
972 /*
973 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
974 * arbitrary value in byte-oriented modes, such as CFB and OFB...
975 */
976# endif
972
977
973 /* VIA promises CPUs that won't require alignment in the future.
974 For now padlock_aes_align_required is initialized to 1 and
975 the condition is never met... */
976 /* C7 core is capable to manage unaligned input in non-ECB[!]
977 mode, but performance penalties appear to be approximately
978 same as for software alignment below or ~3x. They promise to
979 improve it in the future, but for now we can just as well
980 pretend that it can only handle aligned input... */
981 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
982 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
978 /*
979 * VIA promises CPUs that won't require alignment in the future. For now
980 * padlock_aes_align_required is initialized to 1 and the condition is
981 * never met...
982 */
983 /*
984 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
985 * performance penalties appear to be approximately same as for software
986 * alignment below or ~3x. They promise to improve it in the future, but
987 * for now we can just as well pretend that it can only handle aligned
988 * input...
989 */
990 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
991 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
983
992
984 inp_misaligned = (((size_t)in_arg) & 0x0F);
985 out_misaligned = (((size_t)out_arg) & 0x0F);
993 inp_misaligned = (((size_t)in_arg) & 0x0F);
994 out_misaligned = (((size_t)out_arg) & 0x0F);
986
995
987 /* Note that even if output is aligned and input not,
988 * I still prefer to loop instead of copy the whole
989 * input and then encrypt in one stroke. This is done
990 * in order to improve L1 cache utilization... */
991 realign_in_loop = out_misaligned|inp_misaligned;
996 /*
997 * Note that even if output is aligned and input not, I still prefer to
998 * loop instead of copy the whole input and then encrypt in one stroke.
999 * This is done in order to improve L1 cache utilization...
1000 */
1001 realign_in_loop = out_misaligned | inp_misaligned;
992
1002
993 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
994 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
1003 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
1004 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
995
1005
996 /* this takes one "if" out of the loops */
997 chunk = nbytes;
998 chunk %= PADLOCK_CHUNK;
999 if (chunk==0) chunk = PADLOCK_CHUNK;
1006 /* this takes one "if" out of the loops */
1007 chunk = nbytes;
1008 chunk %= PADLOCK_CHUNK;
1009 if (chunk == 0)
1010 chunk = PADLOCK_CHUNK;
1000
1011
1001 if (out_misaligned) {
1002 /* optmize for small input */
1003 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
1004 out = alloca(0x10 + allocated);
1005 out = NEAREST_ALIGNED(out);
1006 }
1007 else
1008 out = out_arg;
1012 if (out_misaligned) {
1013 /* optmize for small input */
1014 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1015 out = alloca(0x10 + allocated);
1016 out = NEAREST_ALIGNED(out);
1017 } else
1018 out = out_arg;
1009
1019
1010 cdata = ALIGNED_CIPHER_DATA(ctx);
1011 padlock_verify_context(cdata);
1020 cdata = ALIGNED_CIPHER_DATA(ctx);
1021 padlock_verify_context(cdata);
1012
1022
1013 switch (EVP_CIPHER_CTX_mode(ctx)) {
1014 case EVP_CIPH_ECB_MODE:
1015 do {
1016 if (inp_misaligned)
1017 inp = padlock_memcpy(out, in_arg, chunk);
1018 else
1019 inp = in_arg;
1020 in_arg += chunk;
1023 switch (EVP_CIPHER_CTX_mode(ctx)) {
1024 case EVP_CIPH_ECB_MODE:
1025 do {
1026 if (inp_misaligned)
1027 inp = padlock_memcpy(out, in_arg, chunk);
1028 else
1029 inp = in_arg;
1030 in_arg += chunk;
1021
1031
1022 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1032 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1023
1033
1024 if (out_misaligned)
1025 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1026 else
1027 out = out_arg+=chunk;
1034 if (out_misaligned)
1035 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1036 else
1037 out = out_arg += chunk;
1028
1038
1029 nbytes -= chunk;
1030 chunk = PADLOCK_CHUNK;
1031 } while (nbytes);
1032 break;
1039 nbytes -= chunk;
1040 chunk = PADLOCK_CHUNK;
1041 } while (nbytes);
1042 break;
1033
1043
1034 case EVP_CIPH_CBC_MODE:
1035 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1036 goto cbc_shortcut;
1037 do {
1038 if (iv != cdata->iv)
1039 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1040 chunk = PADLOCK_CHUNK;
1041 cbc_shortcut: /* optimize for small input */
1042 if (inp_misaligned)
1043 inp = padlock_memcpy(out, in_arg, chunk);
1044 else
1045 inp = in_arg;
1046 in_arg += chunk;
1044 case EVP_CIPH_CBC_MODE:
1045 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1046 goto cbc_shortcut;
1047 do {
1048 if (iv != cdata->iv)
1049 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1050 chunk = PADLOCK_CHUNK;
1051 cbc_shortcut: /* optimize for small input */
1052 if (inp_misaligned)
1053 inp = padlock_memcpy(out, in_arg, chunk);
1054 else
1055 inp = in_arg;
1056 in_arg += chunk;
1047
1057
1048 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1058 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1049
1059
1050 if (out_misaligned)
1051 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1052 else
1053 out = out_arg+=chunk;
1060 if (out_misaligned)
1061 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1062 else
1063 out = out_arg += chunk;
1054
1064
1055 } while (nbytes -= chunk);
1056 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1057 break;
1065 } while (nbytes -= chunk);
1066 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1067 break;
1058
1068
1059 case EVP_CIPH_CFB_MODE:
1060 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1061 chunk &= ~(AES_BLOCK_SIZE-1);
1062 if (chunk) goto cfb_shortcut;
1063 else goto cfb_skiploop;
1064 do {
1065 if (iv != cdata->iv)
1066 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1067 chunk = PADLOCK_CHUNK;
1068 cfb_shortcut: /* optimize for small input */
1069 if (inp_misaligned)
1070 inp = padlock_memcpy(out, in_arg, chunk);
1071 else
1072 inp = in_arg;
1073 in_arg += chunk;
1069 case EVP_CIPH_CFB_MODE:
1070 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1071 chunk &= ~(AES_BLOCK_SIZE - 1);
1072 if (chunk)
1073 goto cfb_shortcut;
1074 else
1075 goto cfb_skiploop;
1076 do {
1077 if (iv != cdata->iv)
1078 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1079 chunk = PADLOCK_CHUNK;
1080 cfb_shortcut: /* optimize for small input */
1081 if (inp_misaligned)
1082 inp = padlock_memcpy(out, in_arg, chunk);
1083 else
1084 inp = in_arg;
1085 in_arg += chunk;
1074
1086
1075 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1087 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1076
1088
1077 if (out_misaligned)
1078 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1079 else
1080 out = out_arg+=chunk;
1089 if (out_misaligned)
1090 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1091 else
1092 out = out_arg += chunk;
1081
1093
1082 nbytes -= chunk;
1083 } while (nbytes >= AES_BLOCK_SIZE);
1094 nbytes -= chunk;
1095 } while (nbytes >= AES_BLOCK_SIZE);
1084
1096
1085 cfb_skiploop:
1086 if (nbytes) {
1087 unsigned char *ivp = cdata->iv;
1097 cfb_skiploop:
1098 if (nbytes) {
1099 unsigned char *ivp = cdata->iv;
1088
1100
1089 if (iv != ivp) {
1090 memcpy(ivp, iv, AES_BLOCK_SIZE);
1091 iv = ivp;
1092 }
1093 ctx->num = nbytes;
1094 if (cdata->cword.b.encdec) {
1095 cdata->cword.b.encdec=0;
1096 padlock_reload_key();
1097 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1098 cdata->cword.b.encdec=1;
1099 padlock_reload_key();
1100 while(nbytes) {
1101 unsigned char c = *(in_arg++);
1102 *(out_arg++) = c ^ *ivp;
1103 *(ivp++) = c, nbytes--;
1104 }
1105 }
1106 else { padlock_reload_key();
1107 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1108 padlock_reload_key();
1109 while (nbytes) {
1110 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1111 ivp++, nbytes--;
1112 }
1113 }
1114 }
1101 if (iv != ivp) {
1102 memcpy(ivp, iv, AES_BLOCK_SIZE);
1103 iv = ivp;
1104 }
1105 ctx->num = nbytes;
1106 if (cdata->cword.b.encdec) {
1107 cdata->cword.b.encdec = 0;
1108 padlock_reload_key();
1109 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1110 cdata->cword.b.encdec = 1;
1111 padlock_reload_key();
1112 while (nbytes) {
1113 unsigned char c = *(in_arg++);
1114 *(out_arg++) = c ^ *ivp;
1115 *(ivp++) = c, nbytes--;
1116 }
1117 } else {
1118 padlock_reload_key();
1119 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1120 padlock_reload_key();
1121 while (nbytes) {
1122 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1123 ivp++, nbytes--;
1124 }
1125 }
1126 }
1115
1127
1116 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1117 break;
1128 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1129 break;
1118
1130
1119 case EVP_CIPH_OFB_MODE:
1120 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1121 chunk &= ~(AES_BLOCK_SIZE-1);
1122 if (chunk) do {
1123 if (inp_misaligned)
1124 inp = padlock_memcpy(out, in_arg, chunk);
1125 else
1126 inp = in_arg;
1127 in_arg += chunk;
1131 case EVP_CIPH_OFB_MODE:
1132 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1133 chunk &= ~(AES_BLOCK_SIZE - 1);
1134 if (chunk)
1135 do {
1136 if (inp_misaligned)
1137 inp = padlock_memcpy(out, in_arg, chunk);
1138 else
1139 inp = in_arg;
1140 in_arg += chunk;
1128
1141
1129 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1142 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1130
1143
1131 if (out_misaligned)
1132 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1133 else
1134 out = out_arg+=chunk;
1144 if (out_misaligned)
1145 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1146 else
1147 out = out_arg += chunk;
1135
1148
1136 nbytes -= chunk;
1137 chunk = PADLOCK_CHUNK;
1138 } while (nbytes >= AES_BLOCK_SIZE);
1149 nbytes -= chunk;
1150 chunk = PADLOCK_CHUNK;
1151 } while (nbytes >= AES_BLOCK_SIZE);
1139
1152
1140 if (nbytes) {
1141 unsigned char *ivp = cdata->iv;
1153 if (nbytes) {
1154 unsigned char *ivp = cdata->iv;
1142
1155
1143 ctx->num = nbytes;
1144 padlock_reload_key(); /* empirically found */
1145 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1146 padlock_reload_key(); /* empirically found */
1147 while (nbytes) {
1148 *(out_arg++) = *(in_arg++) ^ *ivp;
1149 ivp++, nbytes--;
1150 }
1151 }
1156 ctx->num = nbytes;
1157 padlock_reload_key(); /* empirically found */
1158 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1159 padlock_reload_key(); /* empirically found */
1160 while (nbytes) {
1161 *(out_arg++) = *(in_arg++) ^ *ivp;
1162 ivp++, nbytes--;
1163 }
1164 }
1152
1165
1153 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1154 break;
1166 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1167 break;
1155
1168
1156 default:
1157 return 0;
1158 }
1169 default:
1170 return 0;
1171 }
1159
1172
1160 /* Clean the realign buffer if it was used */
1161 if (out_misaligned) {
1162 volatile unsigned long *p=(void *)out;
1163 size_t n = allocated/sizeof(*p);
1164 while (n--) *p++=0;
1165 }
1173 /* Clean the realign buffer if it was used */
1174 if (out_misaligned) {
1175 volatile unsigned long *p = (void *)out;
1176 size_t n = allocated / sizeof(*p);
1177 while (n--)
1178 *p++ = 0;
1179 }
1166
1180
1167 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1181 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1168
1182
1169 return 1;
1183 return 1;
1170}
1171
1184}
1185
1172#endif /* OPENSSL_NO_AES */
1186# endif /* OPENSSL_NO_AES */
1173
1174/* ===== Random Number Generator ===== */
1175/*
1176 * This code is not engaged. The reason is that it does not comply
1177 * with recommendations for VIA RNG usage for secure applications
1178 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1179 * provide meaningful error control...
1180 */
1187
1188/* ===== Random Number Generator ===== */
1189/*
1190 * This code is not engaged. The reason is that it does not comply
1191 * with recommendations for VIA RNG usage for secure applications
1192 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1193 * provide meaningful error control...
1194 */
1181/* Wrapper that provides an interface between the API and
1182 the raw PadLock RNG */
1183static int
1184padlock_rand_bytes(unsigned char *output, int count)
1195/*
1196 * Wrapper that provides an interface between the API and the raw PadLock
1197 * RNG
1198 */
1199static int padlock_rand_bytes(unsigned char *output, int count)
1185{
1200{
1186 unsigned int eax, buf;
1201 unsigned int eax, buf;
1187
1202
1188 while (count >= 8) {
1189 eax = padlock_xstore(output, 0);
1190 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1191 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1192 if (eax&(0x1F<<10)) return 0;
1193 if ((eax&0x1F)==0) continue; /* no data, retry... */
1194 if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1195 output += 8;
1196 count -= 8;
1197 }
1198 while (count > 0) {
1199 eax = padlock_xstore(&buf, 3);
1200 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1201 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1202 if (eax&(0x1F<<10)) return 0;
1203 if ((eax&0x1F)==0) continue; /* no data, retry... */
1204 if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1205 *output++ = (unsigned char)buf;
1206 count--;
1207 }
1208 *(volatile unsigned int *)&buf=0;
1203 while (count >= 8) {
1204 eax = padlock_xstore(output, 0);
1205 if (!(eax & (1 << 6)))
1206 return 0; /* RNG disabled */
1207 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1208 if (eax & (0x1F << 10))
1209 return 0;
1210 if ((eax & 0x1F) == 0)
1211 continue; /* no data, retry... */
1212 if ((eax & 0x1F) != 8)
1213 return 0; /* fatal failure... */
1214 output += 8;
1215 count -= 8;
1216 }
1217 while (count > 0) {
1218 eax = padlock_xstore(&buf, 3);
1219 if (!(eax & (1 << 6)))
1220 return 0; /* RNG disabled */
1221 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1222 if (eax & (0x1F << 10))
1223 return 0;
1224 if ((eax & 0x1F) == 0)
1225 continue; /* no data, retry... */
1226 if ((eax & 0x1F) != 1)
1227 return 0; /* fatal failure... */
1228 *output++ = (unsigned char)buf;
1229 count--;
1230 }
1231 *(volatile unsigned int *)&buf = 0;
1209
1232
1210 return 1;
1233 return 1;
1211}
1212
1213/* Dummy but necessary function */
1234}
1235
1236/* Dummy but necessary function */
1214static int
1215padlock_rand_status(void)
1237static int padlock_rand_status(void)
1216{
1238{
1217 return 1;
1239 return 1;
1218}
1219
1220/* Prepare structure for registration */
1221static RAND_METHOD padlock_rand = {
1240}
1241
1242/* Prepare structure for registration */
1243static RAND_METHOD padlock_rand = {
1222 NULL, /* seed */
1223 padlock_rand_bytes, /* bytes */
1224 NULL, /* cleanup */
1225 NULL, /* add */
1226 padlock_rand_bytes, /* pseudorand */
1227 padlock_rand_status, /* rand status */
1244 NULL, /* seed */
1245 padlock_rand_bytes, /* bytes */
1246 NULL, /* cleanup */
1247 NULL, /* add */
1248 padlock_rand_bytes, /* pseudorand */
1249 padlock_rand_status, /* rand status */
1228};
1229
1250};
1251
1230#else /* !COMPILE_HW_PADLOCK */
1231#ifndef OPENSSL_NO_DYNAMIC_ENGINE
1252# else /* !COMPILE_HW_PADLOCK */
1253# ifndef OPENSSL_NO_DYNAMIC_ENGINE
1232OPENSSL_EXPORT
1254OPENSSL_EXPORT
1233int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1255 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1234OPENSSL_EXPORT
1256OPENSSL_EXPORT
1235int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
1236IMPLEMENT_DYNAMIC_CHECK_FN()
1237#endif
1238#endif /* COMPILE_HW_PADLOCK */
1257 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1258{
1259 return 0;
1260}
1239
1261
1240#endif /* !OPENSSL_NO_HW_PADLOCK */
1241#endif /* !OPENSSL_NO_HW */
1262IMPLEMENT_DYNAMIC_CHECK_FN()
1263# endif
1264# endif /* COMPILE_HW_PADLOCK */
1265# endif /* !OPENSSL_NO_HW_PADLOCK */
1266#endif /* !OPENSSL_NO_HW */