1/*-
2 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/kernel.h>
30#include <sys/module.h>
31#include <sys/malloc.h>
32#include <sys/libkern.h>
33#include <sys/endian.h>
34#include <sys/pcpu.h>
35#if defined(__amd64__) || defined(__i386__)
36#include <machine/cpufunc.h>
37#include <machine/cputypes.h>
38#include <machine/fpu.h>
39#include <machine/md_var.h>
40#include <machine/specialreg.h>
41#endif
42#include <machine/pcb.h>
43
44#include <opencrypto/cryptodev.h>
45#include <opencrypto/xform.h>
46
47#include <crypto/via/padlock.h>
48
49/*
50 * Implementation notes.
51 *
52 * Some VIA CPUs provides SHA1 and SHA256 acceleration.
53 * We implement all HMAC algorithms provided by crypto(9) framework, but we do
54 * the crypto work in software unless this is HMAC/SHA1 or HMAC/SHA256 and
55 * our CPU can accelerate it.
56 *
57 * Additional CPU instructions, which preform SHA1 and SHA256 are one-shot
58 * functions - we have only one chance to give the data, CPU itself will add
59 * the padding and calculate hash automatically.
60 * This means, it is not possible to implement common init(), update(), final()
61 * methods.
62 * The way I've choosen is to keep adding data to the buffer on update()
63 * (reallocating the buffer if necessary) and call XSHA{1,256} instruction on
64 * final().
65 */
66
67struct padlock_sha_ctx {
68	uint8_t	*psc_buf;
69	int	 psc_offset;
70	int	 psc_size;
71};
72CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx));
73
74static void padlock_sha_init(void *vctx);
75static int padlock_sha_update(void *vctx, const void *buf, u_int bufsize);
76static void padlock_sha1_final(uint8_t *hash, void *vctx);
77static void padlock_sha256_final(uint8_t *hash, void *vctx);
78
79static const struct auth_hash padlock_hmac_sha1 = {
80	.type = CRYPTO_SHA1_HMAC,
81	.name = "HMAC-SHA1",
82	.keysize = SHA1_BLOCK_LEN,
83	.hashsize = SHA1_HASH_LEN,
84	.ctxsize = sizeof(struct padlock_sha_ctx),
85	.blocksize = SHA1_BLOCK_LEN,
86        .Init = padlock_sha_init,
87	.Update = padlock_sha_update,
88	.Final = padlock_sha1_final,
89};
90
91static const struct auth_hash padlock_hmac_sha256 = {
92	.type = CRYPTO_SHA2_256_HMAC,
93	.name = "HMAC-SHA2-256",
94	.keysize = SHA2_256_BLOCK_LEN,
95	.hashsize = SHA2_256_HASH_LEN,
96	.ctxsize = sizeof(struct padlock_sha_ctx),
97	.blocksize = SHA2_256_BLOCK_LEN,
98        .Init = padlock_sha_init,
99	.Update = padlock_sha_update,
100	.Final = padlock_sha256_final,
101};
102
103MALLOC_DECLARE(M_PADLOCK);
104
105static __inline void
106padlock_output_block(uint32_t *src, uint32_t *dst, size_t count)
107{
108
109	while (count-- > 0)
110		*dst++ = bswap32(*src++);
111}
112
113static void
114padlock_do_sha1(const u_char *in, u_char *out, int count)
115{
116	u_char buf[128+16];	/* PadLock needs at least 128 bytes buffer. */
117	u_char *result = PADLOCK_ALIGN(buf);
118
119	((uint32_t *)result)[0] = 0x67452301;
120	((uint32_t *)result)[1] = 0xEFCDAB89;
121	((uint32_t *)result)[2] = 0x98BADCFE;
122	((uint32_t *)result)[3] = 0x10325476;
123	((uint32_t *)result)[4] = 0xC3D2E1F0;
124
125	__asm __volatile(
126		".byte  0xf3, 0x0f, 0xa6, 0xc8" /* rep xsha1 */
127			: "+S"(in), "+D"(result)
128			: "c"(count), "a"(0)
129		);
130
131	padlock_output_block((uint32_t *)result, (uint32_t *)out,
132	    SHA1_HASH_LEN / sizeof(uint32_t));
133}
134
135static void
136padlock_do_sha256(const char *in, char *out, int count)
137{
138	char buf[128+16];	/* PadLock needs at least 128 bytes buffer. */
139	char *result = PADLOCK_ALIGN(buf);
140
141	((uint32_t *)result)[0] = 0x6A09E667;
142	((uint32_t *)result)[1] = 0xBB67AE85;
143	((uint32_t *)result)[2] = 0x3C6EF372;
144	((uint32_t *)result)[3] = 0xA54FF53A;
145	((uint32_t *)result)[4] = 0x510E527F;
146	((uint32_t *)result)[5] = 0x9B05688C;
147	((uint32_t *)result)[6] = 0x1F83D9AB;
148	((uint32_t *)result)[7] = 0x5BE0CD19;
149
150	__asm __volatile(
151		".byte  0xf3, 0x0f, 0xa6, 0xd0" /* rep xsha256 */
152			: "+S"(in), "+D"(result)
153			: "c"(count), "a"(0)
154		);
155
156	padlock_output_block((uint32_t *)result, (uint32_t *)out,
157	    SHA2_256_HASH_LEN / sizeof(uint32_t));
158}
159
160static void
161padlock_sha_init(void *vctx)
162{
163	struct padlock_sha_ctx *ctx;
164
165	ctx = vctx;
166	ctx->psc_buf = NULL;
167	ctx->psc_offset = 0;
168	ctx->psc_size = 0;
169}
170
171static int
172padlock_sha_update(void *vctx, const void *buf, u_int bufsize)
173{
174	struct padlock_sha_ctx *ctx;
175
176	ctx = vctx;
177	if (ctx->psc_size - ctx->psc_offset < bufsize) {
178		ctx->psc_size = MAX(ctx->psc_size * 2, ctx->psc_size + bufsize);
179		ctx->psc_buf = realloc(ctx->psc_buf, ctx->psc_size, M_PADLOCK,
180		    M_NOWAIT);
181		if(ctx->psc_buf == NULL)
182			return (ENOMEM);
183	}
184	bcopy(buf, ctx->psc_buf + ctx->psc_offset, bufsize);
185	ctx->psc_offset += bufsize;
186	return (0);
187}
188
189static void
190padlock_sha_free(void *vctx)
191{
192	struct padlock_sha_ctx *ctx;
193
194	ctx = vctx;
195	if (ctx->psc_buf != NULL) {
196		zfree(ctx->psc_buf, M_PADLOCK);
197		ctx->psc_buf = NULL;
198		ctx->psc_offset = 0;
199		ctx->psc_size = 0;
200	}
201}
202
203static void
204padlock_sha1_final(uint8_t *hash, void *vctx)
205{
206	struct padlock_sha_ctx *ctx;
207
208	ctx = vctx;
209	padlock_do_sha1(ctx->psc_buf, hash, ctx->psc_offset);
210	padlock_sha_free(ctx);
211}
212
213static void
214padlock_sha256_final(uint8_t *hash, void *vctx)
215{
216	struct padlock_sha_ctx *ctx;
217
218	ctx = vctx;
219	padlock_do_sha256(ctx->psc_buf, hash, ctx->psc_offset);
220	padlock_sha_free(ctx);
221}
222
223static void
224padlock_copy_ctx(const struct auth_hash *axf, void *sctx, void *dctx)
225{
226
227	if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 &&
228	    (axf->type == CRYPTO_SHA1_HMAC ||
229	     axf->type == CRYPTO_SHA2_256_HMAC)) {
230		struct padlock_sha_ctx *spctx = sctx, *dpctx = dctx;
231
232		dpctx->psc_offset = spctx->psc_offset;
233		dpctx->psc_size = spctx->psc_size;
234		dpctx->psc_buf = malloc(dpctx->psc_size, M_PADLOCK, M_WAITOK);
235		bcopy(spctx->psc_buf, dpctx->psc_buf, dpctx->psc_size);
236	} else {
237		bcopy(sctx, dctx, axf->ctxsize);
238	}
239}
240
241static void
242padlock_free_ctx(const struct auth_hash *axf, void *ctx)
243{
244
245	if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 &&
246	    (axf->type == CRYPTO_SHA1_HMAC ||
247	     axf->type == CRYPTO_SHA2_256_HMAC)) {
248		padlock_sha_free(ctx);
249	}
250}
251
252static void
253padlock_hash_key_setup(struct padlock_session *ses, const uint8_t *key,
254    int klen)
255{
256	const struct auth_hash *axf;
257
258	axf = ses->ses_axf;
259
260	/*
261	 * Try to free contexts before using them, because
262	 * padlock_hash_key_setup() can be called twice - once from
263	 * padlock_newsession() and again from padlock_process().
264	 */
265	padlock_free_ctx(axf, ses->ses_ictx);
266	padlock_free_ctx(axf, ses->ses_octx);
267
268	hmac_init_ipad(axf, key, klen, ses->ses_ictx);
269	hmac_init_opad(axf, key, klen, ses->ses_octx);
270}
271
272/*
273 * Compute keyed-hash authenticator.
274 */
275static int
276padlock_authcompute(struct padlock_session *ses, struct cryptop *crp)
277{
278	u_char hash[HASH_MAX_LEN], hash2[HASH_MAX_LEN];
279	const struct auth_hash *axf;
280	union authctx ctx;
281	int error;
282
283	axf = ses->ses_axf;
284
285	padlock_copy_ctx(axf, ses->ses_ictx, &ctx);
286	error = crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length,
287	    axf->Update, &ctx);
288	if (error != 0) {
289		padlock_free_ctx(axf, &ctx);
290		return (error);
291	}
292	error = crypto_apply(crp, crp->crp_payload_start,
293	    crp->crp_payload_length, axf->Update, &ctx);
294	if (error != 0) {
295		padlock_free_ctx(axf, &ctx);
296		return (error);
297	}
298	axf->Final(hash, &ctx);
299
300	padlock_copy_ctx(axf, ses->ses_octx, &ctx);
301	axf->Update(&ctx, hash, axf->hashsize);
302	axf->Final(hash, &ctx);
303
304	if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
305		crypto_copydata(crp, crp->crp_digest_start, ses->ses_mlen,
306		    hash2);
307		if (timingsafe_bcmp(hash, hash2, ses->ses_mlen) != 0)
308			return (EBADMSG);
309	} else
310		crypto_copyback(crp, crp->crp_digest_start, ses->ses_mlen,
311		    hash);
312	return (0);
313}
314
315/* Find software structure which describes HMAC algorithm. */
316static const struct auth_hash *
317padlock_hash_lookup(int alg)
318{
319	const struct auth_hash *axf;
320
321	switch (alg) {
322	case CRYPTO_NULL_HMAC:
323		axf = &auth_hash_null;
324		break;
325	case CRYPTO_SHA1_HMAC:
326		if ((via_feature_xcrypt & VIA_HAS_SHA) != 0)
327			axf = &padlock_hmac_sha1;
328		else
329			axf = &auth_hash_hmac_sha1;
330		break;
331	case CRYPTO_RIPEMD160_HMAC:
332		axf = &auth_hash_hmac_ripemd_160;
333		break;
334	case CRYPTO_SHA2_256_HMAC:
335		if ((via_feature_xcrypt & VIA_HAS_SHA) != 0)
336			axf = &padlock_hmac_sha256;
337		else
338			axf = &auth_hash_hmac_sha2_256;
339		break;
340	case CRYPTO_SHA2_384_HMAC:
341		axf = &auth_hash_hmac_sha2_384;
342		break;
343	case CRYPTO_SHA2_512_HMAC:
344		axf = &auth_hash_hmac_sha2_512;
345		break;
346	default:
347		axf = NULL;
348		break;
349	}
350	return (axf);
351}
352
353bool
354padlock_hash_check(const struct crypto_session_params *csp)
355{
356
357	return (padlock_hash_lookup(csp->csp_auth_alg) != NULL);
358}
359
360int
361padlock_hash_setup(struct padlock_session *ses,
362    const struct crypto_session_params *csp)
363{
364
365	ses->ses_axf = padlock_hash_lookup(csp->csp_auth_alg);
366	if (csp->csp_auth_mlen == 0)
367		ses->ses_mlen = ses->ses_axf->hashsize;
368	else
369		ses->ses_mlen = csp->csp_auth_mlen;
370
371	/* Allocate memory for HMAC inner and outer contexts. */
372	ses->ses_ictx = malloc(ses->ses_axf->ctxsize, M_PADLOCK,
373	    M_ZERO | M_NOWAIT);
374	ses->ses_octx = malloc(ses->ses_axf->ctxsize, M_PADLOCK,
375	    M_ZERO | M_NOWAIT);
376	if (ses->ses_ictx == NULL || ses->ses_octx == NULL)
377		return (ENOMEM);
378
379	/* Setup key if given. */
380	if (csp->csp_auth_key != NULL) {
381		padlock_hash_key_setup(ses, csp->csp_auth_key,
382		    csp->csp_auth_klen);
383	}
384	return (0);
385}
386
387int
388padlock_hash_process(struct padlock_session *ses, struct cryptop *crp,
389    const struct crypto_session_params *csp)
390{
391	struct thread *td;
392	int error;
393
394	td = curthread;
395	fpu_kern_enter(td, NULL, FPU_KERN_NORMAL | FPU_KERN_NOCTX);
396	if (crp->crp_auth_key != NULL)
397		padlock_hash_key_setup(ses, crp->crp_auth_key,
398		    csp->csp_auth_klen);
399
400	error = padlock_authcompute(ses, crp);
401	fpu_kern_leave(td, NULL);
402	return (error);
403}
404
405void
406padlock_hash_free(struct padlock_session *ses)
407{
408
409	if (ses->ses_ictx != NULL) {
410		padlock_free_ctx(ses->ses_axf, ses->ses_ictx);
411		zfree(ses->ses_ictx, M_PADLOCK);
412		ses->ses_ictx = NULL;
413	}
414	if (ses->ses_octx != NULL) {
415		padlock_free_ctx(ses->ses_axf, ses->ses_octx);
416		zfree(ses->ses_octx, M_PADLOCK);
417		ses->ses_octx = NULL;
418	}
419}
420