1/*-
2 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/malloc.h>
35#include <sys/libkern.h>
36#include <sys/endian.h>
37#include <sys/pcpu.h>
38#if defined(__amd64__) || defined(__i386__)
39#include <machine/cpufunc.h>
40#include <machine/cputypes.h>
41#include <machine/md_var.h>
42#include <machine/specialreg.h>
43#endif
44#include <machine/pcb.h>
45
46#include <opencrypto/cryptodev.h>
47#include <opencrypto/xform.h>
48
49#include <crypto/via/padlock.h>
50
51/*
52 * Implementation notes.
53 *
54 * Some VIA CPUs provides SHA1 and SHA256 acceleration.
55 * We implement all HMAC algorithms provided by crypto(9) framework, but we do
56 * the crypto work in software unless this is HMAC/SHA1 or HMAC/SHA256 and
57 * our CPU can accelerate it.
58 *
59 * Additional CPU instructions, which preform SHA1 and SHA256 are one-shot
60 * functions - we have only one chance to give the data, CPU itself will add
61 * the padding and calculate hash automatically.
62 * This means, it is not possible to implement common init(), update(), final()
63 * methods.
64 * The way I've choosen is to keep adding data to the buffer on update()
65 * (reallocating the buffer if necessary) and call XSHA{1,256} instruction on
66 * final().
67 */
68
69struct padlock_sha_ctx {
70	uint8_t	*psc_buf;
71	int	 psc_offset;
72	int	 psc_size;
73};
74CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx));
75
76static void padlock_sha_init(void *vctx);
77static int padlock_sha_update(void *vctx, const void *buf, u_int bufsize);
78static void padlock_sha1_final(uint8_t *hash, void *vctx);
79static void padlock_sha256_final(uint8_t *hash, void *vctx);
80
81static struct auth_hash padlock_hmac_sha1 = {
82	.type = CRYPTO_SHA1_HMAC,
83	.name = "HMAC-SHA1",
84	.keysize = SHA1_BLOCK_LEN,
85	.hashsize = SHA1_HASH_LEN,
86	.ctxsize = sizeof(struct padlock_sha_ctx),
87	.blocksize = SHA1_BLOCK_LEN,
88        .Init = padlock_sha_init,
89	.Update = padlock_sha_update,
90	.Final = padlock_sha1_final,
91};
92
93static struct auth_hash padlock_hmac_sha256 = {
94	.type = CRYPTO_SHA2_256_HMAC,
95	.name = "HMAC-SHA2-256",
96	.keysize = SHA2_256_BLOCK_LEN,
97	.hashsize = SHA2_256_HASH_LEN,
98	.ctxsize = sizeof(struct padlock_sha_ctx),
99	.blocksize = SHA2_256_BLOCK_LEN,
100        .Init = padlock_sha_init,
101	.Update = padlock_sha_update,
102	.Final = padlock_sha256_final,
103};
104
105MALLOC_DECLARE(M_PADLOCK);
106
107static __inline void
108padlock_output_block(uint32_t *src, uint32_t *dst, size_t count)
109{
110
111	while (count-- > 0)
112		*dst++ = bswap32(*src++);
113}
114
115static void
116padlock_do_sha1(const u_char *in, u_char *out, int count)
117{
118	u_char buf[128+16];	/* PadLock needs at least 128 bytes buffer. */
119	u_char *result = PADLOCK_ALIGN(buf);
120
121	((uint32_t *)result)[0] = 0x67452301;
122	((uint32_t *)result)[1] = 0xEFCDAB89;
123	((uint32_t *)result)[2] = 0x98BADCFE;
124	((uint32_t *)result)[3] = 0x10325476;
125	((uint32_t *)result)[4] = 0xC3D2E1F0;
126
127#ifdef __GNUCLIKE_ASM
128	__asm __volatile(
129		".byte  0xf3, 0x0f, 0xa6, 0xc8" /* rep xsha1 */
130			: "+S"(in), "+D"(result)
131			: "c"(count), "a"(0)
132		);
133#endif
134
135	padlock_output_block((uint32_t *)result, (uint32_t *)out,
136	    SHA1_HASH_LEN / sizeof(uint32_t));
137}
138
139static void
140padlock_do_sha256(const char *in, char *out, int count)
141{
142	char buf[128+16];	/* PadLock needs at least 128 bytes buffer. */
143	char *result = PADLOCK_ALIGN(buf);
144
145	((uint32_t *)result)[0] = 0x6A09E667;
146	((uint32_t *)result)[1] = 0xBB67AE85;
147	((uint32_t *)result)[2] = 0x3C6EF372;
148	((uint32_t *)result)[3] = 0xA54FF53A;
149	((uint32_t *)result)[4] = 0x510E527F;
150	((uint32_t *)result)[5] = 0x9B05688C;
151	((uint32_t *)result)[6] = 0x1F83D9AB;
152	((uint32_t *)result)[7] = 0x5BE0CD19;
153
154#ifdef __GNUCLIKE_ASM
155	__asm __volatile(
156		".byte  0xf3, 0x0f, 0xa6, 0xd0" /* rep xsha256 */
157			: "+S"(in), "+D"(result)
158			: "c"(count), "a"(0)
159		);
160#endif
161
162	padlock_output_block((uint32_t *)result, (uint32_t *)out,
163	    SHA2_256_HASH_LEN / sizeof(uint32_t));
164}
165
166static void
167padlock_sha_init(void *vctx)
168{
169	struct padlock_sha_ctx *ctx;
170
171	ctx = vctx;
172	ctx->psc_buf = NULL;
173	ctx->psc_offset = 0;
174	ctx->psc_size = 0;
175}
176
177static int
178padlock_sha_update(void *vctx, const void *buf, u_int bufsize)
179{
180	struct padlock_sha_ctx *ctx;
181
182	ctx = vctx;
183	if (ctx->psc_size - ctx->psc_offset < bufsize) {
184		ctx->psc_size = MAX(ctx->psc_size * 2, ctx->psc_size + bufsize);
185		ctx->psc_buf = realloc(ctx->psc_buf, ctx->psc_size, M_PADLOCK,
186		    M_NOWAIT);
187		if(ctx->psc_buf == NULL)
188			return (ENOMEM);
189	}
190	bcopy(buf, ctx->psc_buf + ctx->psc_offset, bufsize);
191	ctx->psc_offset += bufsize;
192	return (0);
193}
194
195static void
196padlock_sha_free(void *vctx)
197{
198	struct padlock_sha_ctx *ctx;
199
200	ctx = vctx;
201	if (ctx->psc_buf != NULL) {
202		zfree(ctx->psc_buf, M_PADLOCK);
203		ctx->psc_buf = NULL;
204		ctx->psc_offset = 0;
205		ctx->psc_size = 0;
206	}
207}
208
209static void
210padlock_sha1_final(uint8_t *hash, void *vctx)
211{
212	struct padlock_sha_ctx *ctx;
213
214	ctx = vctx;
215	padlock_do_sha1(ctx->psc_buf, hash, ctx->psc_offset);
216	padlock_sha_free(ctx);
217}
218
219static void
220padlock_sha256_final(uint8_t *hash, void *vctx)
221{
222	struct padlock_sha_ctx *ctx;
223
224	ctx = vctx;
225	padlock_do_sha256(ctx->psc_buf, hash, ctx->psc_offset);
226	padlock_sha_free(ctx);
227}
228
229static void
230padlock_copy_ctx(struct auth_hash *axf, void *sctx, void *dctx)
231{
232
233	if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 &&
234	    (axf->type == CRYPTO_SHA1_HMAC ||
235	     axf->type == CRYPTO_SHA2_256_HMAC)) {
236		struct padlock_sha_ctx *spctx = sctx, *dpctx = dctx;
237
238		dpctx->psc_offset = spctx->psc_offset;
239		dpctx->psc_size = spctx->psc_size;
240		dpctx->psc_buf = malloc(dpctx->psc_size, M_PADLOCK, M_WAITOK);
241		bcopy(spctx->psc_buf, dpctx->psc_buf, dpctx->psc_size);
242	} else {
243		bcopy(sctx, dctx, axf->ctxsize);
244	}
245}
246
247static void
248padlock_free_ctx(struct auth_hash *axf, void *ctx)
249{
250
251	if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 &&
252	    (axf->type == CRYPTO_SHA1_HMAC ||
253	     axf->type == CRYPTO_SHA2_256_HMAC)) {
254		padlock_sha_free(ctx);
255	}
256}
257
258static void
259padlock_hash_key_setup(struct padlock_session *ses, const uint8_t *key,
260    int klen)
261{
262	struct auth_hash *axf;
263
264	axf = ses->ses_axf;
265
266	/*
267	 * Try to free contexts before using them, because
268	 * padlock_hash_key_setup() can be called twice - once from
269	 * padlock_newsession() and again from padlock_process().
270	 */
271	padlock_free_ctx(axf, ses->ses_ictx);
272	padlock_free_ctx(axf, ses->ses_octx);
273
274	hmac_init_ipad(axf, key, klen, ses->ses_ictx);
275	hmac_init_opad(axf, key, klen, ses->ses_octx);
276}
277
278/*
279 * Compute keyed-hash authenticator.
280 */
281static int
282padlock_authcompute(struct padlock_session *ses, struct cryptop *crp)
283{
284	u_char hash[HASH_MAX_LEN], hash2[HASH_MAX_LEN];
285	struct auth_hash *axf;
286	union authctx ctx;
287	int error;
288
289	axf = ses->ses_axf;
290
291	padlock_copy_ctx(axf, ses->ses_ictx, &ctx);
292	error = crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length,
293	    axf->Update, &ctx);
294	if (error != 0) {
295		padlock_free_ctx(axf, &ctx);
296		return (error);
297	}
298	error = crypto_apply(crp, crp->crp_payload_start,
299	    crp->crp_payload_length, axf->Update, &ctx);
300	if (error != 0) {
301		padlock_free_ctx(axf, &ctx);
302		return (error);
303	}
304	axf->Final(hash, &ctx);
305
306	padlock_copy_ctx(axf, ses->ses_octx, &ctx);
307	axf->Update(&ctx, hash, axf->hashsize);
308	axf->Final(hash, &ctx);
309
310	if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
311		crypto_copydata(crp, crp->crp_digest_start, ses->ses_mlen,
312		    hash2);
313		if (timingsafe_bcmp(hash, hash2, ses->ses_mlen) != 0)
314			return (EBADMSG);
315	} else
316		crypto_copyback(crp, crp->crp_digest_start, ses->ses_mlen,
317		    hash);
318	return (0);
319}
320
321/* Find software structure which describes HMAC algorithm. */
322static struct auth_hash *
323padlock_hash_lookup(int alg)
324{
325	struct auth_hash *axf;
326
327	switch (alg) {
328	case CRYPTO_NULL_HMAC:
329		axf = &auth_hash_null;
330		break;
331	case CRYPTO_SHA1_HMAC:
332		if ((via_feature_xcrypt & VIA_HAS_SHA) != 0)
333			axf = &padlock_hmac_sha1;
334		else
335			axf = &auth_hash_hmac_sha1;
336		break;
337	case CRYPTO_RIPEMD160_HMAC:
338		axf = &auth_hash_hmac_ripemd_160;
339		break;
340	case CRYPTO_SHA2_256_HMAC:
341		if ((via_feature_xcrypt & VIA_HAS_SHA) != 0)
342			axf = &padlock_hmac_sha256;
343		else
344			axf = &auth_hash_hmac_sha2_256;
345		break;
346	case CRYPTO_SHA2_384_HMAC:
347		axf = &auth_hash_hmac_sha2_384;
348		break;
349	case CRYPTO_SHA2_512_HMAC:
350		axf = &auth_hash_hmac_sha2_512;
351		break;
352	default:
353		axf = NULL;
354		break;
355	}
356	return (axf);
357}
358
359bool
360padlock_hash_check(const struct crypto_session_params *csp)
361{
362
363	return (padlock_hash_lookup(csp->csp_auth_alg) != NULL);
364}
365
366int
367padlock_hash_setup(struct padlock_session *ses,
368    const struct crypto_session_params *csp)
369{
370
371	ses->ses_axf = padlock_hash_lookup(csp->csp_auth_alg);
372	if (csp->csp_auth_mlen == 0)
373		ses->ses_mlen = ses->ses_axf->hashsize;
374	else
375		ses->ses_mlen = csp->csp_auth_mlen;
376
377	/* Allocate memory for HMAC inner and outer contexts. */
378	ses->ses_ictx = malloc(ses->ses_axf->ctxsize, M_PADLOCK,
379	    M_ZERO | M_NOWAIT);
380	ses->ses_octx = malloc(ses->ses_axf->ctxsize, M_PADLOCK,
381	    M_ZERO | M_NOWAIT);
382	if (ses->ses_ictx == NULL || ses->ses_octx == NULL)
383		return (ENOMEM);
384
385	/* Setup key if given. */
386	if (csp->csp_auth_key != NULL) {
387		padlock_hash_key_setup(ses, csp->csp_auth_key,
388		    csp->csp_auth_klen);
389	}
390	return (0);
391}
392
393int
394padlock_hash_process(struct padlock_session *ses, struct cryptop *crp,
395    const struct crypto_session_params *csp)
396{
397	struct thread *td;
398	int error;
399
400	td = curthread;
401	fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR);
402	if (crp->crp_auth_key != NULL)
403		padlock_hash_key_setup(ses, crp->crp_auth_key,
404		    csp->csp_auth_klen);
405
406	error = padlock_authcompute(ses, crp);
407	fpu_kern_leave(td, ses->ses_fpu_ctx);
408	return (error);
409}
410
411void
412padlock_hash_free(struct padlock_session *ses)
413{
414
415	if (ses->ses_ictx != NULL) {
416		padlock_free_ctx(ses->ses_axf, ses->ses_ictx);
417		zfree(ses->ses_ictx, M_PADLOCK);
418		ses->ses_ictx = NULL;
419	}
420	if (ses->ses_octx != NULL) {
421		padlock_free_ctx(ses->ses_axf, ses->ses_octx);
422		zfree(ses->ses_octx, M_PADLOCK);
423		ses->ses_octx = NULL;
424	}
425}
426