1/*	$NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * ChaCha pseudorandom function family and stream cipher portable C
31 * implementation.  Derived from the specification,
32 *
33 *	Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop
34 *	Record of the State of the Art in Stream Ciphers -- SASC 2008.
35 *	https://cr.yp.to/papers.html#chacha
36 *
37 * which in turn builds on the specification of Salsa20 available at
38 * <https://cr.yp.to/snuffle.html>.  The particular parametrization of
39 * the stream cipher, with a 32-bit block counter and 96-bit nonce, is
40 * described in
41 *
42 *	Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF
43 *	Protocols', IETF RFC 8439, June 2018.
44 *	https://tools.ietf.org/html/rfc8439
45 */
46
47#include "chacha_ref.h"
48
49static uint32_t
50rol32(uint32_t u, unsigned c)
51{
52
53	return (u << c) | (u >> (32 - c));
54}
55
56#define	CHACHA_QUARTERROUND(a, b, c, d) do				      \
57{									      \
58	(a) += (b); (d) ^= (a); (d) = rol32((d), 16);			      \
59	(c) += (d); (b) ^= (c); (b) = rol32((b), 12);			      \
60	(a) += (b); (d) ^= (a); (d) = rol32((d),  8);			      \
61	(c) += (d); (b) ^= (c); (b) = rol32((b),  7);			      \
62} while (/*CONSTCOND*/0)
63
64const uint8_t chacha_const32[16] = "expand 32-byte k";
65
66static void
67chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16],
68    const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
69{
70	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
71	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
72
73	x0 = y0 = le32dec(c + 0);
74	x1 = y1 = le32dec(c + 4);
75	x2 = y2 = le32dec(c + 8);
76	x3 = y3 = le32dec(c + 12);
77	x4 = y4 = le32dec(k + 0);
78	x5 = y5 = le32dec(k + 4);
79	x6 = y6 = le32dec(k + 8);
80	x7 = y7 = le32dec(k + 12);
81	x8 = y8 = le32dec(k + 16);
82	x9 = y9 = le32dec(k + 20);
83	x10 = y10 = le32dec(k + 24);
84	x11 = y11 = le32dec(k + 28);
85	x12 = y12 = le32dec(in + 0);
86	x13 = y13 = le32dec(in + 4);
87	x14 = y14 = le32dec(in + 8);
88	x15 = y15 = le32dec(in + 12);
89
90	for (; nr > 0; nr -= 2) {
91		CHACHA_QUARTERROUND( y0, y4, y8,y12);
92		CHACHA_QUARTERROUND( y1, y5, y9,y13);
93		CHACHA_QUARTERROUND( y2, y6,y10,y14);
94		CHACHA_QUARTERROUND( y3, y7,y11,y15);
95		CHACHA_QUARTERROUND( y0, y5,y10,y15);
96		CHACHA_QUARTERROUND( y1, y6,y11,y12);
97		CHACHA_QUARTERROUND( y2, y7, y8,y13);
98		CHACHA_QUARTERROUND( y3, y4, y9,y14);
99	}
100
101	le32enc(out + 0, x0 + y0);
102	le32enc(out + 4, x1 + y1);
103	le32enc(out + 8, x2 + y2);
104	le32enc(out + 12, x3 + y3);
105	le32enc(out + 16, x4 + y4);
106	le32enc(out + 20, x5 + y5);
107	le32enc(out + 24, x6 + y6);
108	le32enc(out + 28, x7 + y7);
109	le32enc(out + 32, x8 + y8);
110	le32enc(out + 36, x9 + y9);
111	le32enc(out + 40, x10 + y10);
112	le32enc(out + 44, x11 + y11);
113	le32enc(out + 48, x12 + y12);
114	le32enc(out + 52, x13 + y13);
115	le32enc(out + 56, x14 + y14);
116	le32enc(out + 60, x15 + y15);
117}
118
119/* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */
120
121static void
122chacha_stream_ref(uint8_t *restrict s, size_t nbytes,
123    uint32_t blkno,
124    const uint8_t nonce[static 12],
125    const uint8_t k[static 32],
126    unsigned nr)
127{
128	const uint8_t *c = chacha_const32;
129	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
130	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
131	unsigned i;
132
133	x0 = le32dec(c + 0);
134	x1 = le32dec(c + 4);
135	x2 = le32dec(c + 8);
136	x3 = le32dec(c + 12);
137	x4 = le32dec(k + 0);
138	x5 = le32dec(k + 4);
139	x6 = le32dec(k + 8);
140	x7 = le32dec(k + 12);
141	x8 = le32dec(k + 16);
142	x9 = le32dec(k + 20);
143	x10 = le32dec(k + 24);
144	x11 = le32dec(k + 28);
145	/* x12 = blkno */
146	x13 = le32dec(nonce + 0);
147	x14 = le32dec(nonce + 4);
148	x15 = le32dec(nonce + 8);
149
150	for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) {
151		y0 = x0;
152		y1 = x1;
153		y2 = x2;
154		y3 = x3;
155		y4 = x4;
156		y5 = x5;
157		y6 = x6;
158		y7 = x7;
159		y8 = x8;
160		y9 = x9;
161		y10 = x10;
162		y11 = x11;
163		y12 = x12 = blkno;
164		y13 = x13;
165		y14 = x14;
166		y15 = x15;
167		for (i = nr; i > 0; i -= 2) {
168			CHACHA_QUARTERROUND( y0, y4, y8,y12);
169			CHACHA_QUARTERROUND( y1, y5, y9,y13);
170			CHACHA_QUARTERROUND( y2, y6,y10,y14);
171			CHACHA_QUARTERROUND( y3, y7,y11,y15);
172			CHACHA_QUARTERROUND( y0, y5,y10,y15);
173			CHACHA_QUARTERROUND( y1, y6,y11,y12);
174			CHACHA_QUARTERROUND( y2, y7, y8,y13);
175			CHACHA_QUARTERROUND( y3, y4, y9,y14);
176		}
177		le32enc(s + 0, x0 + y0);
178		le32enc(s + 4, x1 + y1);
179		le32enc(s + 8, x2 + y2);
180		le32enc(s + 12, x3 + y3);
181		le32enc(s + 16, x4 + y4);
182		le32enc(s + 20, x5 + y5);
183		le32enc(s + 24, x6 + y6);
184		le32enc(s + 28, x7 + y7);
185		le32enc(s + 32, x8 + y8);
186		le32enc(s + 36, x9 + y9);
187		le32enc(s + 40, x10 + y10);
188		le32enc(s + 44, x11 + y11);
189		le32enc(s + 48, x12 + y12);
190		le32enc(s + 52, x13 + y13);
191		le32enc(s + 56, x14 + y14);
192		le32enc(s + 60, x15 + y15);
193	}
194
195	if (nbytes) {
196		uint8_t buf[64];
197
198		y0 = x0;
199		y1 = x1;
200		y2 = x2;
201		y3 = x3;
202		y4 = x4;
203		y5 = x5;
204		y6 = x6;
205		y7 = x7;
206		y8 = x8;
207		y9 = x9;
208		y10 = x10;
209		y11 = x11;
210		y12 = x12 = blkno;
211		y13 = x13;
212		y14 = x14;
213		y15 = x15;
214		for (i = nr; i > 0; i -= 2) {
215			CHACHA_QUARTERROUND( y0, y4, y8,y12);
216			CHACHA_QUARTERROUND( y1, y5, y9,y13);
217			CHACHA_QUARTERROUND( y2, y6,y10,y14);
218			CHACHA_QUARTERROUND( y3, y7,y11,y15);
219			CHACHA_QUARTERROUND( y0, y5,y10,y15);
220			CHACHA_QUARTERROUND( y1, y6,y11,y12);
221			CHACHA_QUARTERROUND( y2, y7, y8,y13);
222			CHACHA_QUARTERROUND( y3, y4, y9,y14);
223		}
224		le32enc(buf + 0, x0 + y0);
225		le32enc(buf + 4, x1 + y1);
226		le32enc(buf + 8, x2 + y2);
227		le32enc(buf + 12, x3 + y3);
228		le32enc(buf + 16, x4 + y4);
229		le32enc(buf + 20, x5 + y5);
230		le32enc(buf + 24, x6 + y6);
231		le32enc(buf + 28, x7 + y7);
232		le32enc(buf + 32, x8 + y8);
233		le32enc(buf + 36, x9 + y9);
234		le32enc(buf + 40, x10 + y10);
235		le32enc(buf + 44, x11 + y11);
236		le32enc(buf + 48, x12 + y12);
237		le32enc(buf + 52, x13 + y13);
238		le32enc(buf + 56, x14 + y14);
239		le32enc(buf + 60, x15 + y15);
240		memcpy(s, buf, nbytes);
241	}
242}
243
244static void
245chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes,
246    uint32_t blkno,
247    const uint8_t nonce[static 12],
248    const uint8_t k[static 32],
249    unsigned nr)
250{
251	const uint8_t *c = chacha_const32;
252	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
253	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
254	unsigned i;
255
256	x0 = le32dec(c + 0);
257	x1 = le32dec(c + 4);
258	x2 = le32dec(c + 8);
259	x3 = le32dec(c + 12);
260	x4 = le32dec(k + 0);
261	x5 = le32dec(k + 4);
262	x6 = le32dec(k + 8);
263	x7 = le32dec(k + 12);
264	x8 = le32dec(k + 16);
265	x9 = le32dec(k + 20);
266	x10 = le32dec(k + 24);
267	x11 = le32dec(k + 28);
268	/* x12 = blkno */
269	x13 = le32dec(nonce + 0);
270	x14 = le32dec(nonce + 4);
271	x15 = le32dec(nonce + 8);
272
273	for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) {
274		y0 = x0;
275		y1 = x1;
276		y2 = x2;
277		y3 = x3;
278		y4 = x4;
279		y5 = x5;
280		y6 = x6;
281		y7 = x7;
282		y8 = x8;
283		y9 = x9;
284		y10 = x10;
285		y11 = x11;
286		y12 = x12 = blkno;
287		y13 = x13;
288		y14 = x14;
289		y15 = x15;
290		for (i = nr; i > 0; i -= 2) {
291			CHACHA_QUARTERROUND( y0, y4, y8,y12);
292			CHACHA_QUARTERROUND( y1, y5, y9,y13);
293			CHACHA_QUARTERROUND( y2, y6,y10,y14);
294			CHACHA_QUARTERROUND( y3, y7,y11,y15);
295			CHACHA_QUARTERROUND( y0, y5,y10,y15);
296			CHACHA_QUARTERROUND( y1, y6,y11,y12);
297			CHACHA_QUARTERROUND( y2, y7, y8,y13);
298			CHACHA_QUARTERROUND( y3, y4, y9,y14);
299		}
300		le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0));
301		le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4));
302		le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8));
303		le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12));
304		le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16));
305		le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20));
306		le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24));
307		le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28));
308		le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32));
309		le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36));
310		le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40));
311		le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44));
312		le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48));
313		le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52));
314		le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56));
315		le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60));
316	}
317
318	if (nbytes) {
319		uint8_t buf[64];
320
321		y0 = x0;
322		y1 = x1;
323		y2 = x2;
324		y3 = x3;
325		y4 = x4;
326		y5 = x5;
327		y6 = x6;
328		y7 = x7;
329		y8 = x8;
330		y9 = x9;
331		y10 = x10;
332		y11 = x11;
333		y12 = x12 = blkno;
334		y13 = x13;
335		y14 = x14;
336		y15 = x15;
337		for (i = nr; i > 0; i -= 2) {
338			CHACHA_QUARTERROUND( y0, y4, y8,y12);
339			CHACHA_QUARTERROUND( y1, y5, y9,y13);
340			CHACHA_QUARTERROUND( y2, y6,y10,y14);
341			CHACHA_QUARTERROUND( y3, y7,y11,y15);
342			CHACHA_QUARTERROUND( y0, y5,y10,y15);
343			CHACHA_QUARTERROUND( y1, y6,y11,y12);
344			CHACHA_QUARTERROUND( y2, y7, y8,y13);
345			CHACHA_QUARTERROUND( y3, y4, y9,y14);
346		}
347		le32enc(buf + 0, x0 + y0);
348		le32enc(buf + 4, x1 + y1);
349		le32enc(buf + 8, x2 + y2);
350		le32enc(buf + 12, x3 + y3);
351		le32enc(buf + 16, x4 + y4);
352		le32enc(buf + 20, x5 + y5);
353		le32enc(buf + 24, x6 + y6);
354		le32enc(buf + 28, x7 + y7);
355		le32enc(buf + 32, x8 + y8);
356		le32enc(buf + 36, x9 + y9);
357		le32enc(buf + 40, x10 + y10);
358		le32enc(buf + 44, x11 + y11);
359		le32enc(buf + 48, x12 + y12);
360		le32enc(buf + 52, x13 + y13);
361		le32enc(buf + 56, x14 + y14);
362		le32enc(buf + 60, x15 + y15);
363		for (i = 0; i < nbytes - nbytes%4; i += 4)
364			le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i));
365		for (; i < nbytes; i++)
366			s[i] = p[i] ^ buf[i];
367	}
368}
369
370/* HChaCha */
371
372static void
373hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16],
374    const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
375{
376	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
377
378	y0 = le32dec(c + 0);
379	y1 = le32dec(c + 4);
380	y2 = le32dec(c + 8);
381	y3 = le32dec(c + 12);
382	y4 = le32dec(k + 0);
383	y5 = le32dec(k + 4);
384	y6 = le32dec(k + 8);
385	y7 = le32dec(k + 12);
386	y8 = le32dec(k + 16);
387	y9 = le32dec(k + 20);
388	y10 = le32dec(k + 24);
389	y11 = le32dec(k + 28);
390	y12 = le32dec(in + 0);
391	y13 = le32dec(in + 4);
392	y14 = le32dec(in + 8);
393	y15 = le32dec(in + 12);
394
395	for (; nr > 0; nr -= 2) {
396		CHACHA_QUARTERROUND( y0, y4, y8,y12);
397		CHACHA_QUARTERROUND( y1, y5, y9,y13);
398		CHACHA_QUARTERROUND( y2, y6,y10,y14);
399		CHACHA_QUARTERROUND( y3, y7,y11,y15);
400		CHACHA_QUARTERROUND( y0, y5,y10,y15);
401		CHACHA_QUARTERROUND( y1, y6,y11,y12);
402		CHACHA_QUARTERROUND( y2, y7, y8,y13);
403		CHACHA_QUARTERROUND( y3, y4, y9,y14);
404	}
405
406	le32enc(out + 0, y0);
407	le32enc(out + 4, y1);
408	le32enc(out + 8, y2);
409	le32enc(out + 12, y3);
410	le32enc(out + 16, y12);
411	le32enc(out + 20, y13);
412	le32enc(out + 24, y14);
413	le32enc(out + 28, y15);
414}
415
416/* XChaCha stream cipher */
417
418/* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */
419
420static void
421xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
422    const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr)
423{
424	uint8_t subkey[32];
425	uint8_t subnonce[12];
426
427	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
428	memset(subnonce, 0, 4);
429	memcpy(subnonce + 4, nonce + 16, 8);
430	chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr);
431}
432
433static void
434xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
435    uint32_t blkno,
436    const uint8_t nonce[static 24],
437    const uint8_t k[static 32],
438    unsigned nr)
439{
440	uint8_t subkey[32];
441	uint8_t subnonce[12];
442
443	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
444	memset(subnonce, 0, 4);
445	memcpy(subnonce + 4, nonce + 16, 8);
446	chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr);
447}
448
449static int
450chacha_probe_ref(void)
451{
452
453	/* The reference implementation is always available.  */
454	return 0;
455}
456
457const struct chacha_impl chacha_ref_impl = {
458	.ci_name = "Portable C ChaCha",
459	.ci_probe = chacha_probe_ref,
460	.ci_chacha_core = chacha_core_ref,
461	.ci_hchacha = hchacha_ref,
462	.ci_chacha_stream = chacha_stream_ref,
463	.ci_chacha_stream_xor = chacha_stream_xor_ref,
464	.ci_xchacha_stream = xchacha_stream_ref,
465	.ci_xchacha_stream_xor = xchacha_stream_xor_ref,
466};
467