1// SPDX-License-Identifier: GPL-2.0
2/*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8#include <asm/hwcap.h>
9#include <asm/neon.h>
10#include <asm/simd.h>
11#include <asm/unaligned.h>
12#include <crypto/algapi.h>
13#include <crypto/internal/hash.h>
14#include <crypto/internal/poly1305.h>
15#include <crypto/internal/simd.h>
16#include <linux/cpufeature.h>
17#include <linux/crypto.h>
18#include <linux/jump_label.h>
19#include <linux/module.h>
20
21void poly1305_init_arm(void *state, const u8 *key);
22void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25
26void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27{
28}
29
30static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31
32void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33{
34	poly1305_init_arm(&dctx->h, key);
35	dctx->s[0] = get_unaligned_le32(key + 16);
36	dctx->s[1] = get_unaligned_le32(key + 20);
37	dctx->s[2] = get_unaligned_le32(key + 24);
38	dctx->s[3] = get_unaligned_le32(key + 28);
39	dctx->buflen = 0;
40}
41EXPORT_SYMBOL(poly1305_init_arch);
42
43static int arm_poly1305_init(struct shash_desc *desc)
44{
45	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46
47	dctx->buflen = 0;
48	dctx->rset = 0;
49	dctx->sset = false;
50
51	return 0;
52}
53
54static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55				 u32 len, u32 hibit, bool do_neon)
56{
57	if (unlikely(!dctx->sset)) {
58		if (!dctx->rset) {
59			poly1305_init_arm(&dctx->h, src);
60			src += POLY1305_BLOCK_SIZE;
61			len -= POLY1305_BLOCK_SIZE;
62			dctx->rset = 1;
63		}
64		if (len >= POLY1305_BLOCK_SIZE) {
65			dctx->s[0] = get_unaligned_le32(src +  0);
66			dctx->s[1] = get_unaligned_le32(src +  4);
67			dctx->s[2] = get_unaligned_le32(src +  8);
68			dctx->s[3] = get_unaligned_le32(src + 12);
69			src += POLY1305_BLOCK_SIZE;
70			len -= POLY1305_BLOCK_SIZE;
71			dctx->sset = true;
72		}
73		if (len < POLY1305_BLOCK_SIZE)
74			return;
75	}
76
77	len &= ~(POLY1305_BLOCK_SIZE - 1);
78
79	if (static_branch_likely(&have_neon) && likely(do_neon))
80		poly1305_blocks_neon(&dctx->h, src, len, hibit);
81	else
82		poly1305_blocks_arm(&dctx->h, src, len, hibit);
83}
84
85static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86				    const u8 *src, u32 len, bool do_neon)
87{
88	if (unlikely(dctx->buflen)) {
89		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90
91		memcpy(dctx->buf + dctx->buflen, src, bytes);
92		src += bytes;
93		len -= bytes;
94		dctx->buflen += bytes;
95
96		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97			arm_poly1305_blocks(dctx, dctx->buf,
98					    POLY1305_BLOCK_SIZE, 1, false);
99			dctx->buflen = 0;
100		}
101	}
102
103	if (likely(len >= POLY1305_BLOCK_SIZE)) {
104		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
105		src += round_down(len, POLY1305_BLOCK_SIZE);
106		len %= POLY1305_BLOCK_SIZE;
107	}
108
109	if (unlikely(len)) {
110		dctx->buflen = len;
111		memcpy(dctx->buf, src, len);
112	}
113}
114
115static int arm_poly1305_update(struct shash_desc *desc,
116			       const u8 *src, unsigned int srclen)
117{
118	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119
120	arm_poly1305_do_update(dctx, src, srclen, false);
121	return 0;
122}
123
124static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125						   const u8 *src,
126						   unsigned int srclen)
127{
128	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129	bool do_neon = crypto_simd_usable() && srclen > 128;
130
131	if (static_branch_likely(&have_neon) && do_neon)
132		kernel_neon_begin();
133	arm_poly1305_do_update(dctx, src, srclen, do_neon);
134	if (static_branch_likely(&have_neon) && do_neon)
135		kernel_neon_end();
136	return 0;
137}
138
139void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140			  unsigned int nbytes)
141{
142	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143		       crypto_simd_usable();
144
145	if (unlikely(dctx->buflen)) {
146		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147
148		memcpy(dctx->buf + dctx->buflen, src, bytes);
149		src += bytes;
150		nbytes -= bytes;
151		dctx->buflen += bytes;
152
153		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154			poly1305_blocks_arm(&dctx->h, dctx->buf,
155					    POLY1305_BLOCK_SIZE, 1);
156			dctx->buflen = 0;
157		}
158	}
159
160	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162
163		if (static_branch_likely(&have_neon) && do_neon) {
164			do {
165				unsigned int todo = min_t(unsigned int, len, SZ_4K);
166
167				kernel_neon_begin();
168				poly1305_blocks_neon(&dctx->h, src, todo, 1);
169				kernel_neon_end();
170
171				len -= todo;
172				src += todo;
173			} while (len);
174		} else {
175			poly1305_blocks_arm(&dctx->h, src, len, 1);
176			src += len;
177		}
178		nbytes %= POLY1305_BLOCK_SIZE;
179	}
180
181	if (unlikely(nbytes)) {
182		dctx->buflen = nbytes;
183		memcpy(dctx->buf, src, nbytes);
184	}
185}
186EXPORT_SYMBOL(poly1305_update_arch);
187
188void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189{
190	if (unlikely(dctx->buflen)) {
191		dctx->buf[dctx->buflen++] = 1;
192		memset(dctx->buf + dctx->buflen, 0,
193		       POLY1305_BLOCK_SIZE - dctx->buflen);
194		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
195	}
196
197	poly1305_emit_arm(&dctx->h, dst, dctx->s);
198	*dctx = (struct poly1305_desc_ctx){};
199}
200EXPORT_SYMBOL(poly1305_final_arch);
201
202static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203{
204	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205
206	if (unlikely(!dctx->sset))
207		return -ENOKEY;
208
209	poly1305_final_arch(dctx, dst);
210	return 0;
211}
212
213static struct shash_alg arm_poly1305_algs[] = {{
214	.init			= arm_poly1305_init,
215	.update			= arm_poly1305_update,
216	.final			= arm_poly1305_final,
217	.digestsize		= POLY1305_DIGEST_SIZE,
218	.descsize		= sizeof(struct poly1305_desc_ctx),
219
220	.base.cra_name		= "poly1305",
221	.base.cra_driver_name	= "poly1305-arm",
222	.base.cra_priority	= 150,
223	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
224	.base.cra_module	= THIS_MODULE,
225#ifdef CONFIG_KERNEL_MODE_NEON
226}, {
227	.init			= arm_poly1305_init,
228	.update			= arm_poly1305_update_neon,
229	.final			= arm_poly1305_final,
230	.digestsize		= POLY1305_DIGEST_SIZE,
231	.descsize		= sizeof(struct poly1305_desc_ctx),
232
233	.base.cra_name		= "poly1305",
234	.base.cra_driver_name	= "poly1305-neon",
235	.base.cra_priority	= 200,
236	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
237	.base.cra_module	= THIS_MODULE,
238#endif
239}};
240
241static int __init arm_poly1305_mod_init(void)
242{
243	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244	    (elf_hwcap & HWCAP_NEON))
245		static_branch_enable(&have_neon);
246	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247		/* register only the first entry */
248		return crypto_register_shash(&arm_poly1305_algs[0]);
249
250	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
251		crypto_register_shashes(arm_poly1305_algs,
252					ARRAY_SIZE(arm_poly1305_algs)) : 0;
253}
254
255static void __exit arm_poly1305_mod_exit(void)
256{
257	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
258		return;
259	if (!static_branch_likely(&have_neon)) {
260		crypto_unregister_shash(&arm_poly1305_algs[0]);
261		return;
262	}
263	crypto_unregister_shashes(arm_poly1305_algs,
264				  ARRAY_SIZE(arm_poly1305_algs));
265}
266
267module_init(arm_poly1305_mod_init);
268module_exit(arm_poly1305_mod_exit);
269
270MODULE_LICENSE("GPL v2");
271MODULE_ALIAS_CRYPTO("poly1305");
272MODULE_ALIAS_CRYPTO("poly1305-arm");
273MODULE_ALIAS_CRYPTO("poly1305-neon");
274