1/*
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 *            http://www.logix.cz/michal
5 *
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
9 */
10
11/* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 *    notice, this list of conditions and the following disclaimer in
23 *    the documentation and/or other materials provided with the
24 *    distribution.
25 *
26 * 3. All advertising materials mentioning features or use of this
27 *    software must display the following acknowledgment:
28 *    "This product includes software developed by the OpenSSL Project
29 *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 *
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 *    endorse or promote products derived from this software without
33 *    prior written permission. For written permission, please contact
34 *    licensing@OpenSSL.org.
35 *
36 * 5. Products derived from this software may not be called "OpenSSL"
37 *    nor may "OpenSSL" appear in their names without prior written
38 *    permission of the OpenSSL Project.
39 *
40 * 6. Redistributions of any form whatsoever must retain the following
41 *    acknowledgment:
42 *    "This product includes software developed by the OpenSSL Project
43 *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com).  This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
65
66#include <stdio.h>
67#include <string.h>
68
69#include <openssl/opensslconf.h>
70#include <openssl/crypto.h>
71#include <openssl/dso.h>
72#include <openssl/engine.h>
73#include <openssl/evp.h>
74#ifndef OPENSSL_NO_AES
75#include <openssl/aes.h>
76#endif
77#include <openssl/rand.h>
78#include <openssl/err.h>
79
80#ifndef OPENSSL_NO_HW
81#ifndef OPENSSL_NO_HW_PADLOCK
82
83/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84#if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85#  ifndef OPENSSL_NO_DYNAMIC_ENGINE
86#    define DYNAMIC_ENGINE
87#  endif
88#elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89#  ifdef ENGINE_DYNAMIC_SUPPORT
90#    define DYNAMIC_ENGINE
91#  endif
92#else
93#  error "Only OpenSSL >= 0.9.7 is supported"
94#endif
95
96/* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97   Not only that it doesn't exist elsewhere, but it
98   even can't be compiled on other platforms!
99
100   In addition, because of the heavy use of inline assembler,
101   compiler choice is limited to GCC and Microsoft C. */
102#undef COMPILE_HW_PADLOCK
103#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105     (defined(_MSC_VER) && defined(_M_IX86))
106#  define COMPILE_HW_PADLOCK
107static ENGINE *ENGINE_padlock (void);
108# endif
109#endif
110
111void ENGINE_load_padlock (void)
112{
113/* On non-x86 CPUs it just returns. */
114#ifdef COMPILE_HW_PADLOCK
115	ENGINE *toadd = ENGINE_padlock ();
116	if (!toadd) return;
117	ENGINE_add (toadd);
118	ENGINE_free (toadd);
119	ERR_clear_error ();
120#endif
121}
122
123#ifdef COMPILE_HW_PADLOCK
124/* We do these includes here to avoid header problems on platforms that
125   do not have the VIA padlock anyway... */
126#ifdef _MSC_VER
127# include <malloc.h>
128# define alloca _alloca
129#elif defined(NETWARE_CLIB) && defined(__GNUC__)
130  void *alloca(size_t);
131# define alloca(s) __builtin_alloca(s)
132#else
133# include <stdlib.h>
134#endif
135
136/* Function for ENGINE detection and control */
137static int padlock_available(void);
138static int padlock_init(ENGINE *e);
139
140/* RNG Stuff */
141static RAND_METHOD padlock_rand;
142
143/* Cipher Stuff */
144#ifndef OPENSSL_NO_AES
145static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
146#endif
147
148/* Engine names */
149static const char *padlock_id = "padlock";
150static char padlock_name[100];
151
152/* Available features */
153static int padlock_use_ace = 0;	/* Advanced Cryptography Engine */
154static int padlock_use_rng = 0;	/* Random Number Generator */
155#ifndef OPENSSL_NO_AES
156static int padlock_aes_align_required = 1;
157#endif
158
159/* ===== Engine "management" functions ===== */
160
161/* Prepare the ENGINE structure for registration */
162static int
163padlock_bind_helper(ENGINE *e)
164{
165	/* Check available features */
166	padlock_available();
167
168#if 1	/* disable RNG for now, see commentary in vicinity of RNG code */
169	padlock_use_rng=0;
170#endif
171
172	/* Generate a nice engine name with available features */
173	BIO_snprintf(padlock_name, sizeof(padlock_name),
174		"VIA PadLock (%s, %s)",
175		 padlock_use_rng ? "RNG" : "no-RNG",
176		 padlock_use_ace ? "ACE" : "no-ACE");
177
178	/* Register everything or return with an error */
179	if (!ENGINE_set_id(e, padlock_id) ||
180	    !ENGINE_set_name(e, padlock_name) ||
181
182	    !ENGINE_set_init_function(e, padlock_init) ||
183#ifndef OPENSSL_NO_AES
184	    (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
185#endif
186	    (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
187		return 0;
188	}
189
190	/* Everything looks good */
191	return 1;
192}
193
194/* Constructor */
195static ENGINE *
196ENGINE_padlock(void)
197{
198	ENGINE *eng = ENGINE_new();
199
200	if (!eng) {
201		return NULL;
202	}
203
204	if (!padlock_bind_helper(eng)) {
205		ENGINE_free(eng);
206		return NULL;
207	}
208
209	return eng;
210}
211
212/* Check availability of the engine */
213static int
214padlock_init(ENGINE *e)
215{
216	return (padlock_use_rng || padlock_use_ace);
217}
218
219/* This stuff is needed if this ENGINE is being compiled into a self-contained
220 * shared-library.
221 */
222#ifdef DYNAMIC_ENGINE
223static int
224padlock_bind_fn(ENGINE *e, const char *id)
225{
226	if (id && (strcmp(id, padlock_id) != 0)) {
227		return 0;
228	}
229
230	if (!padlock_bind_helper(e))  {
231		return 0;
232	}
233
234	return 1;
235}
236
237IMPLEMENT_DYNAMIC_CHECK_FN ()
238IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
239#endif /* DYNAMIC_ENGINE */
240
241/* ===== Here comes the "real" engine ===== */
242
243#ifndef OPENSSL_NO_AES
244/* Some AES-related constants */
245#define AES_BLOCK_SIZE		16
246#define AES_KEY_SIZE_128	16
247#define AES_KEY_SIZE_192	24
248#define AES_KEY_SIZE_256	32
249
250/* Here we store the status information relevant to the
251   current context. */
252/* BIG FAT WARNING:
253 * 	Inline assembler in PADLOCK_XCRYPT_ASM()
254 * 	depends on the order of items in this structure.
255 * 	Don't blindly modify, reorder, etc!
256 */
257struct padlock_cipher_data
258{
259	unsigned char iv[AES_BLOCK_SIZE];	/* Initialization vector */
260	union {	unsigned int pad[4];
261		struct {
262			int rounds:4;
263			int dgst:1;	/* n/a in C3 */
264			int align:1;	/* n/a in C3 */
265			int ciphr:1;	/* n/a in C3 */
266			unsigned int keygen:1;
267			int interm:1;
268			unsigned int encdec:1;
269			int ksize:2;
270		} b;
271	} cword;		/* Control word */
272	AES_KEY ks;		/* Encryption key */
273};
274
275/*
276 * Essentially this variable belongs in thread local storage.
277 * Having this variable global on the other hand can only cause
278 * few bogus key reloads [if any at all on single-CPU system],
279 * so we accept the penatly...
280 */
281static volatile struct padlock_cipher_data *padlock_saved_context;
282#endif
283
284/*
285 * =======================================================
286 * Inline assembler section(s).
287 * =======================================================
288 * Order of arguments is chosen to facilitate Windows port
289 * using __fastcall calling convention. If you wish to add
290 * more routines, keep in mind that first __fastcall
291 * argument is passed in %ecx and second - in %edx.
292 * =======================================================
293 */
294#if defined(__GNUC__) && __GNUC__>=2
295/*
296 * As for excessive "push %ebx"/"pop %ebx" found all over.
297 * When generating position-independent code GCC won't let
298 * us use "b" in assembler templates nor even respect "ebx"
299 * in "clobber description." Therefore the trouble...
300 */
301
302/* Helper function - check if a CPUID instruction
303   is available on this CPU */
304static int
305padlock_insn_cpuid_available(void)
306{
307	int result = -1;
308
309	/* We're checking if the bit #21 of EFLAGS
310	   can be toggled. If yes = CPUID is available. */
311	asm volatile (
312		"pushf\n"
313		"popl %%eax\n"
314		"xorl $0x200000, %%eax\n"
315		"movl %%eax, %%ecx\n"
316		"andl $0x200000, %%ecx\n"
317		"pushl %%eax\n"
318		"popf\n"
319		"pushf\n"
320		"popl %%eax\n"
321		"andl $0x200000, %%eax\n"
322		"xorl %%eax, %%ecx\n"
323		"movl %%ecx, %0\n"
324		: "=r" (result) : : "eax", "ecx");
325
326	return (result == 0);
327}
328
329/* Load supported features of the CPU to see if
330   the PadLock is available. */
331static int
332padlock_available(void)
333{
334	char vendor_string[16];
335	unsigned int eax, edx;
336
337	/* First check if the CPUID instruction is available at all... */
338	if (! padlock_insn_cpuid_available())
339		return 0;
340
341	/* Are we running on the Centaur (VIA) CPU? */
342	eax = 0x00000000;
343	vendor_string[12] = 0;
344	asm volatile (
345		"pushl	%%ebx\n"
346		"cpuid\n"
347		"movl	%%ebx,(%%edi)\n"
348		"movl	%%edx,4(%%edi)\n"
349		"movl	%%ecx,8(%%edi)\n"
350		"popl	%%ebx"
351		: "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
352	if (strcmp(vendor_string, "CentaurHauls") != 0)
353		return 0;
354
355	/* Check for Centaur Extended Feature Flags presence */
356	eax = 0xC0000000;
357	asm volatile ("pushl %%ebx; cpuid; popl	%%ebx"
358		: "+a"(eax) : : "ecx", "edx");
359	if (eax < 0xC0000001)
360		return 0;
361
362	/* Read the Centaur Extended Feature Flags */
363	eax = 0xC0000001;
364	asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
365		: "+a"(eax), "=d"(edx) : : "ecx");
366
367	/* Fill up some flags */
368	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
369	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
370
371	return padlock_use_ace + padlock_use_rng;
372}
373
374#ifndef OPENSSL_NO_AES
375/* Our own htonl()/ntohl() */
376static inline void
377padlock_bswapl(AES_KEY *ks)
378{
379	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
380	unsigned int *key = ks->rd_key;
381
382	while (i--) {
383		asm volatile ("bswapl %0" : "+r"(*key));
384		key++;
385	}
386}
387#endif
388
389/* Force key reload from memory to the CPU microcode.
390   Loading EFLAGS from the stack clears EFLAGS[30]
391   which does the trick. */
392static inline void
393padlock_reload_key(void)
394{
395	asm volatile ("pushfl; popfl");
396}
397
398#ifndef OPENSSL_NO_AES
399/*
400 * This is heuristic key context tracing. At first one
401 * believes that one should use atomic swap instructions,
402 * but it's not actually necessary. Point is that if
403 * padlock_saved_context was changed by another thread
404 * after we've read it and before we compare it with cdata,
405 * our key *shall* be reloaded upon thread context switch
406 * and we are therefore set in either case...
407 */
408static inline void
409padlock_verify_context(struct padlock_cipher_data *cdata)
410{
411	asm volatile (
412	"pushfl\n"
413"	btl	$30,(%%esp)\n"
414"	jnc	1f\n"
415"	cmpl	%2,%1\n"
416"	je	1f\n"
417"	popfl\n"
418"	subl	$4,%%esp\n"
419"1:	addl	$4,%%esp\n"
420"	movl	%2,%0"
421	:"+m"(padlock_saved_context)
422	: "r"(padlock_saved_context), "r"(cdata) : "cc");
423}
424
425/* Template for padlock_xcrypt_* modes */
426/* BIG FAT WARNING:
427 * 	The offsets used with 'leal' instructions
428 * 	describe items of the 'padlock_cipher_data'
429 * 	structure.
430 */
431#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)	\
432static inline void *name(size_t cnt,		\
433	struct padlock_cipher_data *cdata,	\
434	void *out, const void *inp) 		\
435{	void *iv; 				\
436	asm volatile ( "pushl	%%ebx\n"	\
437		"	leal	16(%0),%%edx\n"	\
438		"	leal	32(%0),%%ebx\n"	\
439			rep_xcrypt "\n"		\
440		"	popl	%%ebx"		\
441		: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
442		: "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
443		: "edx", "cc", "memory");	\
444	return iv;				\
445}
446
447/* Generate all functions with appropriate opcodes */
448PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")	/* rep xcryptecb */
449PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")	/* rep xcryptcbc */
450PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")	/* rep xcryptcfb */
451PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")	/* rep xcryptofb */
452#endif
453
454/* The RNG call itself */
455static inline unsigned int
456padlock_xstore(void *addr, unsigned int edx_in)
457{
458	unsigned int eax_out;
459
460	asm volatile (".byte 0x0f,0xa7,0xc0"	/* xstore */
461	    : "=a"(eax_out),"=m"(*(unsigned *)addr)
462	    : "D"(addr), "d" (edx_in)
463	    );
464
465	return eax_out;
466}
467
468/* Why not inline 'rep movsd'? I failed to find information on what
469 * value in Direction Flag one can expect and consequently have to
470 * apply "better-safe-than-sorry" approach and assume "undefined."
471 * I could explicitly clear it and restore the original value upon
472 * return from padlock_aes_cipher, but it's presumably too much
473 * trouble for too little gain...
474 *
475 * In case you wonder 'rep xcrypt*' instructions above are *not*
476 * affected by the Direction Flag and pointers advance toward
477 * larger addresses unconditionally.
478 */
479static inline unsigned char *
480padlock_memcpy(void *dst,const void *src,size_t n)
481{
482	long       *d=dst;
483	const long *s=src;
484
485	n /= sizeof(*d);
486	do { *d++ = *s++; } while (--n);
487
488	return dst;
489}
490
491#elif defined(_MSC_VER)
492/*
493 * Unlike GCC these are real functions. In order to minimize impact
494 * on performance we adhere to __fastcall calling convention in
495 * order to get two first arguments passed through %ecx and %edx.
496 * Which kind of suits very well, as instructions in question use
497 * both %ecx and %edx as input:-)
498 */
499#define REP_XCRYPT(code)		\
500	_asm _emit 0xf3			\
501	_asm _emit 0x0f _asm _emit 0xa7	\
502	_asm _emit code
503
504/* BIG FAT WARNING:
505 * 	The offsets used with 'lea' instructions
506 * 	describe items of the 'padlock_cipher_data'
507 * 	structure.
508 */
509#define PADLOCK_XCRYPT_ASM(name,code)	\
510static void * __fastcall 		\
511	name (size_t cnt, void *cdata,	\
512	void *outp, const void *inp)	\
513{	_asm	mov	eax,edx		\
514	_asm	lea	edx,[eax+16]	\
515	_asm	lea	ebx,[eax+32]	\
516	_asm	mov	edi,outp	\
517	_asm	mov	esi,inp		\
518	REP_XCRYPT(code)		\
519}
520
521PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
522PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
523PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
524PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
525
526static int __fastcall
527padlock_xstore(void *outp,unsigned int code)
528{	_asm	mov	edi,ecx
529	_asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
530}
531
532static void __fastcall
533padlock_reload_key(void)
534{	_asm pushfd _asm popfd		}
535
536static void __fastcall
537padlock_verify_context(void *cdata)
538{	_asm	{
539		pushfd
540		bt	DWORD PTR[esp],30
541		jnc	skip
542		cmp	ecx,padlock_saved_context
543		je	skip
544		popfd
545		sub	esp,4
546	skip:	add	esp,4
547		mov	padlock_saved_context,ecx
548		}
549}
550
551static int
552padlock_available(void)
553{	_asm	{
554		pushfd
555		pop	eax
556		mov	ecx,eax
557		xor	eax,1<<21
558		push	eax
559		popfd
560		pushfd
561		pop	eax
562		xor	eax,ecx
563		bt	eax,21
564		jnc	noluck
565		mov	eax,0
566		cpuid
567		xor	eax,eax
568		cmp	ebx,'tneC'
569		jne	noluck
570		cmp	edx,'Hrua'
571		jne	noluck
572		cmp	ecx,'slua'
573		jne	noluck
574		mov	eax,0xC0000000
575		cpuid
576		mov	edx,eax
577		xor	eax,eax
578		cmp	edx,0xC0000001
579		jb	noluck
580		mov	eax,0xC0000001
581		cpuid
582		xor	eax,eax
583		bt	edx,6
584		jnc	skip_a
585		bt	edx,7
586		jnc	skip_a
587		mov	padlock_use_ace,1
588		inc	eax
589	skip_a:	bt	edx,2
590		jnc	skip_r
591		bt	edx,3
592		jnc	skip_r
593		mov	padlock_use_rng,1
594		inc	eax
595	skip_r:
596	noluck:
597		}
598}
599
600static void __fastcall
601padlock_bswapl(void *key)
602{	_asm	{
603		pushfd
604		cld
605		mov	esi,ecx
606		mov	edi,ecx
607		mov	ecx,60
608	up:	lodsd
609		bswap	eax
610		stosd
611		loop	up
612		popfd
613		}
614}
615
616/* MS actually specifies status of Direction Flag and compiler even
617 * manages to compile following as 'rep movsd' all by itself...
618 */
619#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
620#endif
621
622/* ===== AES encryption/decryption ===== */
623#ifndef OPENSSL_NO_AES
624
625#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
626#define NID_aes_128_cfb	NID_aes_128_cfb128
627#endif
628
629#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
630#define NID_aes_128_ofb	NID_aes_128_ofb128
631#endif
632
633#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
634#define NID_aes_192_cfb	NID_aes_192_cfb128
635#endif
636
637#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
638#define NID_aes_192_ofb	NID_aes_192_ofb128
639#endif
640
641#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
642#define NID_aes_256_cfb	NID_aes_256_cfb128
643#endif
644
645#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
646#define NID_aes_256_ofb	NID_aes_256_ofb128
647#endif
648
649/* List of supported ciphers. */
650static int padlock_cipher_nids[] = {
651	NID_aes_128_ecb,
652	NID_aes_128_cbc,
653	NID_aes_128_cfb,
654	NID_aes_128_ofb,
655
656	NID_aes_192_ecb,
657	NID_aes_192_cbc,
658	NID_aes_192_cfb,
659	NID_aes_192_ofb,
660
661	NID_aes_256_ecb,
662	NID_aes_256_cbc,
663	NID_aes_256_cfb,
664	NID_aes_256_ofb,
665};
666static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
667				      sizeof(padlock_cipher_nids[0]));
668
669/* Function prototypes ... */
670static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
671				const unsigned char *iv, int enc);
672static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
673			      const unsigned char *in, size_t nbytes);
674
675#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +		\
676	( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )	)
677#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
678	NEAREST_ALIGNED(ctx->cipher_data))
679
680#define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
681#define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
682#define EVP_CIPHER_block_size_OFB	1
683#define EVP_CIPHER_block_size_CFB	1
684
685/* Declaring so many ciphers by hand would be a pain.
686   Instead introduce a bit of preprocessor magic :-) */
687#define	DECLARE_AES_EVP(ksize,lmode,umode)	\
688static const EVP_CIPHER padlock_aes_##ksize##_##lmode = {	\
689	NID_aes_##ksize##_##lmode,		\
690	EVP_CIPHER_block_size_##umode,	\
691	AES_KEY_SIZE_##ksize,		\
692	AES_BLOCK_SIZE,			\
693	0 | EVP_CIPH_##umode##_MODE,	\
694	padlock_aes_init_key,		\
695	padlock_aes_cipher,		\
696	NULL,				\
697	sizeof(struct padlock_cipher_data) + 16,	\
698	EVP_CIPHER_set_asn1_iv,		\
699	EVP_CIPHER_get_asn1_iv,		\
700	NULL,				\
701	NULL				\
702}
703
704DECLARE_AES_EVP(128,ecb,ECB);
705DECLARE_AES_EVP(128,cbc,CBC);
706DECLARE_AES_EVP(128,cfb,CFB);
707DECLARE_AES_EVP(128,ofb,OFB);
708
709DECLARE_AES_EVP(192,ecb,ECB);
710DECLARE_AES_EVP(192,cbc,CBC);
711DECLARE_AES_EVP(192,cfb,CFB);
712DECLARE_AES_EVP(192,ofb,OFB);
713
714DECLARE_AES_EVP(256,ecb,ECB);
715DECLARE_AES_EVP(256,cbc,CBC);
716DECLARE_AES_EVP(256,cfb,CFB);
717DECLARE_AES_EVP(256,ofb,OFB);
718
719static int
720padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
721{
722	/* No specific cipher => return a list of supported nids ... */
723	if (!cipher) {
724		*nids = padlock_cipher_nids;
725		return padlock_cipher_nids_num;
726	}
727
728	/* ... or the requested "cipher" otherwise */
729	switch (nid) {
730	  case NID_aes_128_ecb:
731	    *cipher = &padlock_aes_128_ecb;
732	    break;
733	  case NID_aes_128_cbc:
734	    *cipher = &padlock_aes_128_cbc;
735	    break;
736	  case NID_aes_128_cfb:
737	    *cipher = &padlock_aes_128_cfb;
738	    break;
739	  case NID_aes_128_ofb:
740	    *cipher = &padlock_aes_128_ofb;
741	    break;
742
743	  case NID_aes_192_ecb:
744	    *cipher = &padlock_aes_192_ecb;
745	    break;
746	  case NID_aes_192_cbc:
747	    *cipher = &padlock_aes_192_cbc;
748	    break;
749	  case NID_aes_192_cfb:
750	    *cipher = &padlock_aes_192_cfb;
751	    break;
752	  case NID_aes_192_ofb:
753	    *cipher = &padlock_aes_192_ofb;
754	    break;
755
756	  case NID_aes_256_ecb:
757	    *cipher = &padlock_aes_256_ecb;
758	    break;
759	  case NID_aes_256_cbc:
760	    *cipher = &padlock_aes_256_cbc;
761	    break;
762	  case NID_aes_256_cfb:
763	    *cipher = &padlock_aes_256_cfb;
764	    break;
765	  case NID_aes_256_ofb:
766	    *cipher = &padlock_aes_256_ofb;
767	    break;
768
769	  default:
770	    /* Sorry, we don't support this NID */
771	    *cipher = NULL;
772	    return 0;
773	}
774
775	return 1;
776}
777
778/* Prepare the encryption key for PadLock usage */
779static int
780padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
781		      const unsigned char *iv, int enc)
782{
783	struct padlock_cipher_data *cdata;
784	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
785
786	if (key==NULL) return 0;	/* ERROR */
787
788	cdata = ALIGNED_CIPHER_DATA(ctx);
789	memset(cdata, 0, sizeof(struct padlock_cipher_data));
790
791	/* Prepare Control word. */
792	if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
793		cdata->cword.b.encdec = 0;
794	else
795		cdata->cword.b.encdec = (ctx->encrypt == 0);
796	cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
797	cdata->cword.b.ksize = (key_len - 128) / 64;
798
799	switch(key_len) {
800		case 128:
801			/* PadLock can generate an extended key for
802			   AES128 in hardware */
803			memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
804			cdata->cword.b.keygen = 0;
805			break;
806
807		case 192:
808		case 256:
809			/* Generate an extended AES key in software.
810			   Needed for AES192/AES256 */
811			/* Well, the above applies to Stepping 8 CPUs
812			   and is listed as hardware errata. They most
813			   likely will fix it at some point and then
814			   a check for stepping would be due here. */
815			if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
816			    EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
817			    enc)
818				AES_set_encrypt_key(key, key_len, &cdata->ks);
819			else
820				AES_set_decrypt_key(key, key_len, &cdata->ks);
821#ifndef AES_ASM
822			/* OpenSSL C functions use byte-swapped extended key. */
823			padlock_bswapl(&cdata->ks);
824#endif
825			cdata->cword.b.keygen = 1;
826			break;
827
828		default:
829			/* ERROR */
830			return 0;
831	}
832
833	/*
834	 * This is done to cover for cases when user reuses the
835	 * context for new key. The catch is that if we don't do
836	 * this, padlock_eas_cipher might proceed with old key...
837	 */
838	padlock_reload_key ();
839
840	return 1;
841}
842
843/*
844 * Simplified version of padlock_aes_cipher() used when
845 * 1) both input and output buffers are at aligned addresses.
846 * or when
847 * 2) running on a newer CPU that doesn't require aligned buffers.
848 */
849static int
850padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
851		const unsigned char *in_arg, size_t nbytes)
852{
853	struct padlock_cipher_data *cdata;
854	void  *iv;
855
856	cdata = ALIGNED_CIPHER_DATA(ctx);
857	padlock_verify_context(cdata);
858
859	switch (EVP_CIPHER_CTX_mode(ctx)) {
860	case EVP_CIPH_ECB_MODE:
861		padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
862		break;
863
864	case EVP_CIPH_CBC_MODE:
865		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
866		iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
867		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
868		break;
869
870	case EVP_CIPH_CFB_MODE:
871		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
872		iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
873		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
874		break;
875
876	case EVP_CIPH_OFB_MODE:
877		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
878		padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
879		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
880		break;
881
882	default:
883		return 0;
884	}
885
886	memset(cdata->iv, 0, AES_BLOCK_SIZE);
887
888	return 1;
889}
890
891#ifndef  PADLOCK_CHUNK
892# define PADLOCK_CHUNK	512	/* Must be a power of 2 larger than 16 */
893#endif
894#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
895# error "insane PADLOCK_CHUNK..."
896#endif
897
898/* Re-align the arguments to 16-Bytes boundaries and run the
899   encryption function itself. This function is not AES-specific. */
900static int
901padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
902		   const unsigned char *in_arg, size_t nbytes)
903{
904	struct padlock_cipher_data *cdata;
905	const  void *inp;
906	unsigned char  *out;
907	void  *iv;
908	int    inp_misaligned, out_misaligned, realign_in_loop;
909	size_t chunk, allocated=0;
910
911	/* ctx->num is maintained in byte-oriented modes,
912	   such as CFB and OFB... */
913	if ((chunk = ctx->num)) { /* borrow chunk variable */
914		unsigned char *ivp=ctx->iv;
915
916		switch (EVP_CIPHER_CTX_mode(ctx)) {
917		case EVP_CIPH_CFB_MODE:
918			if (chunk >= AES_BLOCK_SIZE)
919				return 0; /* bogus value */
920
921			if (ctx->encrypt)
922				while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
923					ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
924					chunk++, nbytes--;
925				}
926			else	while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
927					unsigned char c = *(in_arg++);
928					*(out_arg++) = c ^ ivp[chunk];
929					ivp[chunk++] = c, nbytes--;
930				}
931
932			ctx->num = chunk%AES_BLOCK_SIZE;
933			break;
934		case EVP_CIPH_OFB_MODE:
935			if (chunk >= AES_BLOCK_SIZE)
936				return 0; /* bogus value */
937
938			while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
939				*(out_arg++) = *(in_arg++) ^ ivp[chunk];
940				chunk++, nbytes--;
941			}
942
943			ctx->num = chunk%AES_BLOCK_SIZE;
944			break;
945		}
946	}
947
948	if (nbytes == 0)
949		return 1;
950#if 0
951	if (nbytes % AES_BLOCK_SIZE)
952		return 0; /* are we expected to do tail processing? */
953#else
954	/* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
955	   modes and arbitrary value in byte-oriented modes, such as
956	   CFB and OFB... */
957#endif
958
959	/* VIA promises CPUs that won't require alignment in the future.
960	   For now padlock_aes_align_required is initialized to 1 and
961	   the condition is never met... */
962	/* C7 core is capable to manage unaligned input in non-ECB[!]
963	   mode, but performance penalties appear to be approximately
964	   same as for software alignment below or ~3x. They promise to
965	   improve it in the future, but for now we can just as well
966	   pretend that it can only handle aligned input... */
967	if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
968		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
969
970	inp_misaligned = (((size_t)in_arg) & 0x0F);
971	out_misaligned = (((size_t)out_arg) & 0x0F);
972
973	/* Note that even if output is aligned and input not,
974	 * I still prefer to loop instead of copy the whole
975	 * input and then encrypt in one stroke. This is done
976	 * in order to improve L1 cache utilization... */
977	realign_in_loop = out_misaligned|inp_misaligned;
978
979	if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
980		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
981
982	/* this takes one "if" out of the loops */
983	chunk  = nbytes;
984	chunk %= PADLOCK_CHUNK;
985	if (chunk==0) chunk = PADLOCK_CHUNK;
986
987	if (out_misaligned) {
988		/* optmize for small input */
989		allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
990		out = alloca(0x10 + allocated);
991		out = NEAREST_ALIGNED(out);
992	}
993	else
994		out = out_arg;
995
996	cdata = ALIGNED_CIPHER_DATA(ctx);
997	padlock_verify_context(cdata);
998
999	switch (EVP_CIPHER_CTX_mode(ctx)) {
1000	case EVP_CIPH_ECB_MODE:
1001		do	{
1002			if (inp_misaligned)
1003				inp = padlock_memcpy(out, in_arg, chunk);
1004			else
1005				inp = in_arg;
1006			in_arg += chunk;
1007
1008			padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1009
1010			if (out_misaligned)
1011				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1012			else
1013				out     = out_arg+=chunk;
1014
1015			nbytes -= chunk;
1016			chunk   = PADLOCK_CHUNK;
1017		} while (nbytes);
1018		break;
1019
1020	case EVP_CIPH_CBC_MODE:
1021		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1022		goto cbc_shortcut;
1023		do	{
1024			if (iv != cdata->iv)
1025				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1026			chunk = PADLOCK_CHUNK;
1027		cbc_shortcut: /* optimize for small input */
1028			if (inp_misaligned)
1029				inp = padlock_memcpy(out, in_arg, chunk);
1030			else
1031				inp = in_arg;
1032			in_arg += chunk;
1033
1034			iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1035
1036			if (out_misaligned)
1037				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1038			else
1039				out     = out_arg+=chunk;
1040
1041		} while (nbytes -= chunk);
1042		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1043		break;
1044
1045	case EVP_CIPH_CFB_MODE:
1046		memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1047		chunk &= ~(AES_BLOCK_SIZE-1);
1048		if (chunk)	goto cfb_shortcut;
1049		else		goto cfb_skiploop;
1050		do	{
1051			if (iv != cdata->iv)
1052				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1053			chunk = PADLOCK_CHUNK;
1054		cfb_shortcut: /* optimize for small input */
1055			if (inp_misaligned)
1056				inp = padlock_memcpy(out, in_arg, chunk);
1057			else
1058				inp = in_arg;
1059			in_arg += chunk;
1060
1061			iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1062
1063			if (out_misaligned)
1064				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1065			else
1066				out     = out_arg+=chunk;
1067
1068			nbytes -= chunk;
1069		} while (nbytes >= AES_BLOCK_SIZE);
1070
1071		cfb_skiploop:
1072		if (nbytes) {
1073			unsigned char *ivp = cdata->iv;
1074
1075			if (iv != ivp) {
1076				memcpy(ivp, iv, AES_BLOCK_SIZE);
1077				iv = ivp;
1078			}
1079			ctx->num = nbytes;
1080			if (cdata->cword.b.encdec) {
1081				cdata->cword.b.encdec=0;
1082				padlock_reload_key();
1083				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1084				cdata->cword.b.encdec=1;
1085				padlock_reload_key();
1086				while(nbytes) {
1087					unsigned char c = *(in_arg++);
1088					*(out_arg++) = c ^ *ivp;
1089					*(ivp++) = c, nbytes--;
1090				}
1091			}
1092			else {	padlock_reload_key();
1093				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1094				padlock_reload_key();
1095				while (nbytes) {
1096					*ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1097					ivp++, nbytes--;
1098				}
1099			}
1100		}
1101
1102		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1103		break;
1104
1105	case EVP_CIPH_OFB_MODE:
1106		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1107		chunk &= ~(AES_BLOCK_SIZE-1);
1108		if (chunk) do	{
1109			if (inp_misaligned)
1110				inp = padlock_memcpy(out, in_arg, chunk);
1111			else
1112				inp = in_arg;
1113			in_arg += chunk;
1114
1115			padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1116
1117			if (out_misaligned)
1118				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1119			else
1120				out     = out_arg+=chunk;
1121
1122			nbytes -= chunk;
1123			chunk   = PADLOCK_CHUNK;
1124		} while (nbytes >= AES_BLOCK_SIZE);
1125
1126		if (nbytes) {
1127			unsigned char *ivp = cdata->iv;
1128
1129			ctx->num = nbytes;
1130			padlock_reload_key();	/* empirically found */
1131			padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1132			padlock_reload_key();	/* empirically found */
1133			while (nbytes) {
1134				*(out_arg++) = *(in_arg++) ^ *ivp;
1135				ivp++, nbytes--;
1136			}
1137		}
1138
1139		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1140		break;
1141
1142	default:
1143		return 0;
1144	}
1145
1146	/* Clean the realign buffer if it was used */
1147	if (out_misaligned) {
1148		volatile unsigned long *p=(void *)out;
1149		size_t   n = allocated/sizeof(*p);
1150		while (n--) *p++=0;
1151	}
1152
1153	memset(cdata->iv, 0, AES_BLOCK_SIZE);
1154
1155	return 1;
1156}
1157
1158#endif /* OPENSSL_NO_AES */
1159
1160/* ===== Random Number Generator ===== */
1161/*
1162 * This code is not engaged. The reason is that it does not comply
1163 * with recommendations for VIA RNG usage for secure applications
1164 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1165 * provide meaningful error control...
1166 */
1167/* Wrapper that provides an interface between the API and
1168   the raw PadLock RNG */
1169static int
1170padlock_rand_bytes(unsigned char *output, int count)
1171{
1172	unsigned int eax, buf;
1173
1174	while (count >= 8) {
1175		eax = padlock_xstore(output, 0);
1176		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
1177		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
1178		if (eax&(0x1F<<10))	return 0;
1179		if ((eax&0x1F)==0)	continue; /* no data, retry... */
1180		if ((eax&0x1F)!=8)	return 0; /* fatal failure...  */
1181		output += 8;
1182		count  -= 8;
1183	}
1184	while (count > 0) {
1185		eax = padlock_xstore(&buf, 3);
1186		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
1187		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
1188		if (eax&(0x1F<<10))	return 0;
1189		if ((eax&0x1F)==0)	continue; /* no data, retry... */
1190		if ((eax&0x1F)!=1)	return 0; /* fatal failure...  */
1191		*output++ = (unsigned char)buf;
1192		count--;
1193	}
1194	*(volatile unsigned int *)&buf=0;
1195
1196	return 1;
1197}
1198
1199/* Dummy but necessary function */
1200static int
1201padlock_rand_status(void)
1202{
1203	return 1;
1204}
1205
1206/* Prepare structure for registration */
1207static RAND_METHOD padlock_rand = {
1208	NULL,			/* seed */
1209	padlock_rand_bytes,	/* bytes */
1210	NULL,			/* cleanup */
1211	NULL,			/* add */
1212	padlock_rand_bytes,	/* pseudorand */
1213	padlock_rand_status,	/* rand status */
1214};
1215
1216#endif /* COMPILE_HW_PADLOCK */
1217
1218#endif /* !OPENSSL_NO_HW_PADLOCK */
1219#endif /* !OPENSSL_NO_HW */
1220