1// SPDX-License-Identifier: GPL-2.0
2/*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8#include <asm/hwcap.h>
9#include <asm/neon.h>
10#include <asm/simd.h>
11#include <asm/unaligned.h>
12#include <crypto/algapi.h>
13#include <crypto/internal/hash.h>
14#include <crypto/internal/poly1305.h>
15#include <crypto/internal/simd.h>
16#include <linux/cpufeature.h>
17#include <linux/crypto.h>
18#include <linux/jump_label.h>
19#include <linux/module.h>
20
21asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25
26static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27
28void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
29{
30	poly1305_init_arm64(&dctx->h, key);
31	dctx->s[0] = get_unaligned_le32(key + 16);
32	dctx->s[1] = get_unaligned_le32(key + 20);
33	dctx->s[2] = get_unaligned_le32(key + 24);
34	dctx->s[3] = get_unaligned_le32(key + 28);
35	dctx->buflen = 0;
36}
37EXPORT_SYMBOL(poly1305_init_arch);
38
39static int neon_poly1305_init(struct shash_desc *desc)
40{
41	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42
43	dctx->buflen = 0;
44	dctx->rset = 0;
45	dctx->sset = false;
46
47	return 0;
48}
49
50static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51				 u32 len, u32 hibit, bool do_neon)
52{
53	if (unlikely(!dctx->sset)) {
54		if (!dctx->rset) {
55			poly1305_init_arm64(&dctx->h, src);
56			src += POLY1305_BLOCK_SIZE;
57			len -= POLY1305_BLOCK_SIZE;
58			dctx->rset = 1;
59		}
60		if (len >= POLY1305_BLOCK_SIZE) {
61			dctx->s[0] = get_unaligned_le32(src +  0);
62			dctx->s[1] = get_unaligned_le32(src +  4);
63			dctx->s[2] = get_unaligned_le32(src +  8);
64			dctx->s[3] = get_unaligned_le32(src + 12);
65			src += POLY1305_BLOCK_SIZE;
66			len -= POLY1305_BLOCK_SIZE;
67			dctx->sset = true;
68		}
69		if (len < POLY1305_BLOCK_SIZE)
70			return;
71	}
72
73	len &= ~(POLY1305_BLOCK_SIZE - 1);
74
75	if (static_branch_likely(&have_neon) && likely(do_neon))
76		poly1305_blocks_neon(&dctx->h, src, len, hibit);
77	else
78		poly1305_blocks(&dctx->h, src, len, hibit);
79}
80
81static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82				    const u8 *src, u32 len, bool do_neon)
83{
84	if (unlikely(dctx->buflen)) {
85		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86
87		memcpy(dctx->buf + dctx->buflen, src, bytes);
88		src += bytes;
89		len -= bytes;
90		dctx->buflen += bytes;
91
92		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93			neon_poly1305_blocks(dctx, dctx->buf,
94					     POLY1305_BLOCK_SIZE, 1, false);
95			dctx->buflen = 0;
96		}
97	}
98
99	if (likely(len >= POLY1305_BLOCK_SIZE)) {
100		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101		src += round_down(len, POLY1305_BLOCK_SIZE);
102		len %= POLY1305_BLOCK_SIZE;
103	}
104
105	if (unlikely(len)) {
106		dctx->buflen = len;
107		memcpy(dctx->buf, src, len);
108	}
109}
110
111static int neon_poly1305_update(struct shash_desc *desc,
112				const u8 *src, unsigned int srclen)
113{
114	bool do_neon = crypto_simd_usable() && srclen > 128;
115	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116
117	if (static_branch_likely(&have_neon) && do_neon)
118		kernel_neon_begin();
119	neon_poly1305_do_update(dctx, src, srclen, do_neon);
120	if (static_branch_likely(&have_neon) && do_neon)
121		kernel_neon_end();
122	return 0;
123}
124
125void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126			  unsigned int nbytes)
127{
128	if (unlikely(dctx->buflen)) {
129		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130
131		memcpy(dctx->buf + dctx->buflen, src, bytes);
132		src += bytes;
133		nbytes -= bytes;
134		dctx->buflen += bytes;
135
136		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138			dctx->buflen = 0;
139		}
140	}
141
142	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144
145		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146			do {
147				unsigned int todo = min_t(unsigned int, len, SZ_4K);
148
149				kernel_neon_begin();
150				poly1305_blocks_neon(&dctx->h, src, todo, 1);
151				kernel_neon_end();
152
153				len -= todo;
154				src += todo;
155			} while (len);
156		} else {
157			poly1305_blocks(&dctx->h, src, len, 1);
158			src += len;
159		}
160		nbytes %= POLY1305_BLOCK_SIZE;
161	}
162
163	if (unlikely(nbytes)) {
164		dctx->buflen = nbytes;
165		memcpy(dctx->buf, src, nbytes);
166	}
167}
168EXPORT_SYMBOL(poly1305_update_arch);
169
170void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
171{
172	if (unlikely(dctx->buflen)) {
173		dctx->buf[dctx->buflen++] = 1;
174		memset(dctx->buf + dctx->buflen, 0,
175		       POLY1305_BLOCK_SIZE - dctx->buflen);
176		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
177	}
178
179	poly1305_emit(&dctx->h, dst, dctx->s);
180	memzero_explicit(dctx, sizeof(*dctx));
181}
182EXPORT_SYMBOL(poly1305_final_arch);
183
184static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
185{
186	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
187
188	if (unlikely(!dctx->sset))
189		return -ENOKEY;
190
191	poly1305_final_arch(dctx, dst);
192	return 0;
193}
194
195static struct shash_alg neon_poly1305_alg = {
196	.init			= neon_poly1305_init,
197	.update			= neon_poly1305_update,
198	.final			= neon_poly1305_final,
199	.digestsize		= POLY1305_DIGEST_SIZE,
200	.descsize		= sizeof(struct poly1305_desc_ctx),
201
202	.base.cra_name		= "poly1305",
203	.base.cra_driver_name	= "poly1305-neon",
204	.base.cra_priority	= 200,
205	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
206	.base.cra_module	= THIS_MODULE,
207};
208
209static int __init neon_poly1305_mod_init(void)
210{
211	if (!cpu_have_named_feature(ASIMD))
212		return 0;
213
214	static_branch_enable(&have_neon);
215
216	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
217		crypto_register_shash(&neon_poly1305_alg) : 0;
218}
219
220static void __exit neon_poly1305_mod_exit(void)
221{
222	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
223		crypto_unregister_shash(&neon_poly1305_alg);
224}
225
226module_init(neon_poly1305_mod_init);
227module_exit(neon_poly1305_mod_exit);
228
229MODULE_LICENSE("GPL v2");
230MODULE_ALIAS_CRYPTO("poly1305");
231MODULE_ALIAS_CRYPTO("poly1305-neon");
232