1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4 * instructions. This file contains glue code.
5 *
6 * Copyright (c) 2009 Intel Corp.
7 *   Author: Huang Ying <ying.huang@intel.com>
8 */
9
10#include <linux/err.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/crypto.h>
15#include <crypto/algapi.h>
16#include <crypto/cryptd.h>
17#include <crypto/gf128mul.h>
18#include <crypto/internal/hash.h>
19#include <crypto/internal/simd.h>
20#include <asm/cpu_device_id.h>
21#include <asm/simd.h>
22#include <asm/unaligned.h>
23
24#define GHASH_BLOCK_SIZE	16
25#define GHASH_DIGEST_SIZE	16
26
27void clmul_ghash_mul(char *dst, const le128 *shash);
28
29void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
30			const le128 *shash);
31
32struct ghash_async_ctx {
33	struct cryptd_ahash *cryptd_tfm;
34};
35
36struct ghash_ctx {
37	le128 shash;
38};
39
40struct ghash_desc_ctx {
41	u8 buffer[GHASH_BLOCK_SIZE];
42	u32 bytes;
43};
44
45static int ghash_init(struct shash_desc *desc)
46{
47	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
48
49	memset(dctx, 0, sizeof(*dctx));
50
51	return 0;
52}
53
54static int ghash_setkey(struct crypto_shash *tfm,
55			const u8 *key, unsigned int keylen)
56{
57	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
58	u64 a, b;
59
60	if (keylen != GHASH_BLOCK_SIZE)
61		return -EINVAL;
62
63	/*
64	 * GHASH maps bits to polynomial coefficients backwards, which makes it
65	 * hard to implement.  But it can be shown that the GHASH multiplication
66	 *
67	 *	D * K (mod x^128 + x^7 + x^2 + x + 1)
68	 *
69	 * (where D is a data block and K is the key) is equivalent to:
70	 *
71	 *	bitreflect(D) * bitreflect(K) * x^(-127)
72	 *		(mod x^128 + x^127 + x^126 + x^121 + 1)
73	 *
74	 * So, the code below precomputes:
75	 *
76	 *	bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
77	 *
78	 * ... but in Montgomery form (so that Montgomery multiplication can be
79	 * used), i.e. with an extra x^128 factor, which means actually:
80	 *
81	 *	bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
82	 *
83	 * The within-a-byte part of bitreflect() cancels out GHASH's built-in
84	 * reflection, and thus bitreflect() is actually a byteswap.
85	 */
86	a = get_unaligned_be64(key);
87	b = get_unaligned_be64(key + 8);
88	ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
89	ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
90	if (a >> 63)
91		ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
92	return 0;
93}
94
95static int ghash_update(struct shash_desc *desc,
96			 const u8 *src, unsigned int srclen)
97{
98	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
99	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
100	u8 *dst = dctx->buffer;
101
102	kernel_fpu_begin();
103	if (dctx->bytes) {
104		int n = min(srclen, dctx->bytes);
105		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
106
107		dctx->bytes -= n;
108		srclen -= n;
109
110		while (n--)
111			*pos++ ^= *src++;
112
113		if (!dctx->bytes)
114			clmul_ghash_mul(dst, &ctx->shash);
115	}
116
117	clmul_ghash_update(dst, src, srclen, &ctx->shash);
118	kernel_fpu_end();
119
120	if (srclen & 0xf) {
121		src += srclen - (srclen & 0xf);
122		srclen &= 0xf;
123		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
124		while (srclen--)
125			*dst++ ^= *src++;
126	}
127
128	return 0;
129}
130
131static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
132{
133	u8 *dst = dctx->buffer;
134
135	if (dctx->bytes) {
136		u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
137
138		while (dctx->bytes--)
139			*tmp++ ^= 0;
140
141		kernel_fpu_begin();
142		clmul_ghash_mul(dst, &ctx->shash);
143		kernel_fpu_end();
144	}
145
146	dctx->bytes = 0;
147}
148
149static int ghash_final(struct shash_desc *desc, u8 *dst)
150{
151	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
152	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
153	u8 *buf = dctx->buffer;
154
155	ghash_flush(ctx, dctx);
156	memcpy(dst, buf, GHASH_BLOCK_SIZE);
157
158	return 0;
159}
160
161static struct shash_alg ghash_alg = {
162	.digestsize	= GHASH_DIGEST_SIZE,
163	.init		= ghash_init,
164	.update		= ghash_update,
165	.final		= ghash_final,
166	.setkey		= ghash_setkey,
167	.descsize	= sizeof(struct ghash_desc_ctx),
168	.base		= {
169		.cra_name		= "__ghash",
170		.cra_driver_name	= "__ghash-pclmulqdqni",
171		.cra_priority		= 0,
172		.cra_flags		= CRYPTO_ALG_INTERNAL,
173		.cra_blocksize		= GHASH_BLOCK_SIZE,
174		.cra_ctxsize		= sizeof(struct ghash_ctx),
175		.cra_module		= THIS_MODULE,
176	},
177};
178
179static int ghash_async_init(struct ahash_request *req)
180{
181	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
182	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
183	struct ahash_request *cryptd_req = ahash_request_ctx(req);
184	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
185	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
186	struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
187
188	desc->tfm = child;
189	return crypto_shash_init(desc);
190}
191
192static int ghash_async_update(struct ahash_request *req)
193{
194	struct ahash_request *cryptd_req = ahash_request_ctx(req);
195	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199	if (!crypto_simd_usable() ||
200	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
201		memcpy(cryptd_req, req, sizeof(*req));
202		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
203		return crypto_ahash_update(cryptd_req);
204	} else {
205		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
206		return shash_ahash_update(req, desc);
207	}
208}
209
210static int ghash_async_final(struct ahash_request *req)
211{
212	struct ahash_request *cryptd_req = ahash_request_ctx(req);
213	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
214	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
215	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
216
217	if (!crypto_simd_usable() ||
218	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
219		memcpy(cryptd_req, req, sizeof(*req));
220		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
221		return crypto_ahash_final(cryptd_req);
222	} else {
223		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
224		return crypto_shash_final(desc, req->result);
225	}
226}
227
228static int ghash_async_import(struct ahash_request *req, const void *in)
229{
230	struct ahash_request *cryptd_req = ahash_request_ctx(req);
231	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
232	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
233
234	ghash_async_init(req);
235	memcpy(dctx, in, sizeof(*dctx));
236	return 0;
237
238}
239
240static int ghash_async_export(struct ahash_request *req, void *out)
241{
242	struct ahash_request *cryptd_req = ahash_request_ctx(req);
243	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
244	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
245
246	memcpy(out, dctx, sizeof(*dctx));
247	return 0;
248
249}
250
251static int ghash_async_digest(struct ahash_request *req)
252{
253	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
254	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
255	struct ahash_request *cryptd_req = ahash_request_ctx(req);
256	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
257
258	if (!crypto_simd_usable() ||
259	    (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
260		memcpy(cryptd_req, req, sizeof(*req));
261		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
262		return crypto_ahash_digest(cryptd_req);
263	} else {
264		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
265		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
266
267		desc->tfm = child;
268		return shash_ahash_digest(req, desc);
269	}
270}
271
272static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
273			      unsigned int keylen)
274{
275	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
276	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
277
278	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
279	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
280			       & CRYPTO_TFM_REQ_MASK);
281	return crypto_ahash_setkey(child, key, keylen);
282}
283
284static int ghash_async_init_tfm(struct crypto_tfm *tfm)
285{
286	struct cryptd_ahash *cryptd_tfm;
287	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
288
289	cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
290					CRYPTO_ALG_INTERNAL,
291					CRYPTO_ALG_INTERNAL);
292	if (IS_ERR(cryptd_tfm))
293		return PTR_ERR(cryptd_tfm);
294	ctx->cryptd_tfm = cryptd_tfm;
295	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
296				 sizeof(struct ahash_request) +
297				 crypto_ahash_reqsize(&cryptd_tfm->base));
298
299	return 0;
300}
301
302static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
303{
304	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
305
306	cryptd_free_ahash(ctx->cryptd_tfm);
307}
308
309static struct ahash_alg ghash_async_alg = {
310	.init		= ghash_async_init,
311	.update		= ghash_async_update,
312	.final		= ghash_async_final,
313	.setkey		= ghash_async_setkey,
314	.digest		= ghash_async_digest,
315	.export		= ghash_async_export,
316	.import		= ghash_async_import,
317	.halg = {
318		.digestsize	= GHASH_DIGEST_SIZE,
319		.statesize = sizeof(struct ghash_desc_ctx),
320		.base = {
321			.cra_name		= "ghash",
322			.cra_driver_name	= "ghash-clmulni",
323			.cra_priority		= 400,
324			.cra_ctxsize		= sizeof(struct ghash_async_ctx),
325			.cra_flags		= CRYPTO_ALG_ASYNC,
326			.cra_blocksize		= GHASH_BLOCK_SIZE,
327			.cra_module		= THIS_MODULE,
328			.cra_init		= ghash_async_init_tfm,
329			.cra_exit		= ghash_async_exit_tfm,
330		},
331	},
332};
333
334static const struct x86_cpu_id pcmul_cpu_id[] = {
335	X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
336	{}
337};
338MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
339
340static int __init ghash_pclmulqdqni_mod_init(void)
341{
342	int err;
343
344	if (!x86_match_cpu(pcmul_cpu_id))
345		return -ENODEV;
346
347	err = crypto_register_shash(&ghash_alg);
348	if (err)
349		goto err_out;
350	err = crypto_register_ahash(&ghash_async_alg);
351	if (err)
352		goto err_shash;
353
354	return 0;
355
356err_shash:
357	crypto_unregister_shash(&ghash_alg);
358err_out:
359	return err;
360}
361
362static void __exit ghash_pclmulqdqni_mod_exit(void)
363{
364	crypto_unregister_ahash(&ghash_async_alg);
365	crypto_unregister_shash(&ghash_alg);
366}
367
368module_init(ghash_pclmulqdqni_mod_init);
369module_exit(ghash_pclmulqdqni_mod_exit);
370
371MODULE_LICENSE("GPL");
372MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
373MODULE_ALIAS_CRYPTO("ghash");
374