1/* $OpenBSD: chacha-merged.c,v 1.13 2024/06/05 19:43:06 tb Exp $ */
2/*
3chacha-merged.c version 20080118
4D. J. Bernstein
5Public domain.
6*/
7
8#include <stdint.h>
9
10#define CHACHA_MINKEYLEN	16
11#define CHACHA_NONCELEN		8
12#define CHACHA_CTRLEN		8
13#define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
14#define CHACHA_BLOCKLEN		64
15
16typedef uint8_t u8;
17typedef uint32_t u32;
18
19struct chacha_ctx {
20	u32 input[16];
21	u8 ks[CHACHA_BLOCKLEN];
22	u8 unused;
23};
24
25static inline void chacha_keysetup(struct chacha_ctx *x, const u8 *k, u32 kbits)
26    __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
27static inline void chacha_ivsetup(struct chacha_ctx *x, const u8 *iv,
28    const u8 *ctr)
29    __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
30    __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
31static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u8 *m,
32    u8 *c, u32 bytes)
33    __attribute__((__bounded__(__buffer__, 2, 4)))
34    __attribute__((__bounded__(__buffer__, 3, 4)));
35
36typedef struct chacha_ctx chacha_ctx;
37
38#define U8C(v) (v##U)
39#define U32C(v) (v##U)
40
41#define U8V(v) ((u8)(v) & U8C(0xFF))
42#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
43
44#define ROTL32(v, n) \
45  (U32V((v) << (n)) | ((v) >> (32 - (n))))
46
47#define U8TO32_LITTLE(p) \
48  (((u32)((p)[0])) | \
49   ((u32)((p)[1]) <<  8) | \
50   ((u32)((p)[2]) << 16) | \
51   ((u32)((p)[3]) << 24))
52
53#define U32TO8_LITTLE(p, v) \
54  do { \
55    (p)[0] = U8V((v)); \
56    (p)[1] = U8V((v) >>  8); \
57    (p)[2] = U8V((v) >> 16); \
58    (p)[3] = U8V((v) >> 24); \
59  } while (0)
60
61#define ROTATE(v,c) (ROTL32(v,c))
62#define XOR(v,w) ((v) ^ (w))
63#define PLUS(v,w) (U32V((v) + (w)))
64#define PLUSONE(v) (PLUS((v),1))
65
66#define QUARTERROUND(a,b,c,d) \
67  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
68  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
69  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
70  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
71
72/* Initialise with "expand 32-byte k". */
73static const char sigma[16] = {
74	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
75	0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
76};
77
78/* Initialise with "expand 16-byte k". */
79static const char tau[16] = {
80	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
81	0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
82};
83
84static inline void
85chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
86{
87	const char *constants;
88
89	x->input[4] = U8TO32_LITTLE(k + 0);
90	x->input[5] = U8TO32_LITTLE(k + 4);
91	x->input[6] = U8TO32_LITTLE(k + 8);
92	x->input[7] = U8TO32_LITTLE(k + 12);
93	if (kbits == 256) { /* recommended */
94		k += 16;
95		constants = sigma;
96	} else { /* kbits == 128 */
97		constants = tau;
98	}
99	x->input[8] = U8TO32_LITTLE(k + 0);
100	x->input[9] = U8TO32_LITTLE(k + 4);
101	x->input[10] = U8TO32_LITTLE(k + 8);
102	x->input[11] = U8TO32_LITTLE(k + 12);
103	x->input[0] = U8TO32_LITTLE(constants + 0);
104	x->input[1] = U8TO32_LITTLE(constants + 4);
105	x->input[2] = U8TO32_LITTLE(constants + 8);
106	x->input[3] = U8TO32_LITTLE(constants + 12);
107}
108
109static inline void
110chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
111{
112	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
113	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
114	x->input[14] = U8TO32_LITTLE(iv + 0);
115	x->input[15] = U8TO32_LITTLE(iv + 4);
116}
117
118static inline void
119chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
120{
121	u32 x0, x1, x2, x3, x4, x5, x6, x7;
122	u32 x8, x9, x10, x11, x12, x13, x14, x15;
123	u32 j0, j1, j2, j3, j4, j5, j6, j7;
124	u32 j8, j9, j10, j11, j12, j13, j14, j15;
125	u8 *ctarget = NULL;
126	u8 tmp[64];
127	u32 i;
128
129	if (!bytes)
130		return;
131
132	j0 = x->input[0];
133	j1 = x->input[1];
134	j2 = x->input[2];
135	j3 = x->input[3];
136	j4 = x->input[4];
137	j5 = x->input[5];
138	j6 = x->input[6];
139	j7 = x->input[7];
140	j8 = x->input[8];
141	j9 = x->input[9];
142	j10 = x->input[10];
143	j11 = x->input[11];
144	j12 = x->input[12];
145	j13 = x->input[13];
146	j14 = x->input[14];
147	j15 = x->input[15];
148
149	for (;;) {
150		if (bytes < 64) {
151			for (i = 0; i < bytes; ++i)
152				tmp[i] = m[i];
153			m = tmp;
154			ctarget = c;
155			c = tmp;
156		}
157		x0 = j0;
158		x1 = j1;
159		x2 = j2;
160		x3 = j3;
161		x4 = j4;
162		x5 = j5;
163		x6 = j6;
164		x7 = j7;
165		x8 = j8;
166		x9 = j9;
167		x10 = j10;
168		x11 = j11;
169		x12 = j12;
170		x13 = j13;
171		x14 = j14;
172		x15 = j15;
173		for (i = 20; i > 0; i -= 2) {
174			QUARTERROUND(x0, x4, x8, x12)
175			QUARTERROUND(x1, x5, x9, x13)
176			QUARTERROUND(x2, x6, x10, x14)
177			QUARTERROUND(x3, x7, x11, x15)
178			QUARTERROUND(x0, x5, x10, x15)
179			QUARTERROUND(x1, x6, x11, x12)
180			QUARTERROUND(x2, x7, x8, x13)
181			QUARTERROUND(x3, x4, x9, x14)
182		}
183		x0 = PLUS(x0, j0);
184		x1 = PLUS(x1, j1);
185		x2 = PLUS(x2, j2);
186		x3 = PLUS(x3, j3);
187		x4 = PLUS(x4, j4);
188		x5 = PLUS(x5, j5);
189		x6 = PLUS(x6, j6);
190		x7 = PLUS(x7, j7);
191		x8 = PLUS(x8, j8);
192		x9 = PLUS(x9, j9);
193		x10 = PLUS(x10, j10);
194		x11 = PLUS(x11, j11);
195		x12 = PLUS(x12, j12);
196		x13 = PLUS(x13, j13);
197		x14 = PLUS(x14, j14);
198		x15 = PLUS(x15, j15);
199
200		if (bytes < 64) {
201			U32TO8_LITTLE(x->ks + 0, x0);
202			U32TO8_LITTLE(x->ks + 4, x1);
203			U32TO8_LITTLE(x->ks + 8, x2);
204			U32TO8_LITTLE(x->ks + 12, x3);
205			U32TO8_LITTLE(x->ks + 16, x4);
206			U32TO8_LITTLE(x->ks + 20, x5);
207			U32TO8_LITTLE(x->ks + 24, x6);
208			U32TO8_LITTLE(x->ks + 28, x7);
209			U32TO8_LITTLE(x->ks + 32, x8);
210			U32TO8_LITTLE(x->ks + 36, x9);
211			U32TO8_LITTLE(x->ks + 40, x10);
212			U32TO8_LITTLE(x->ks + 44, x11);
213			U32TO8_LITTLE(x->ks + 48, x12);
214			U32TO8_LITTLE(x->ks + 52, x13);
215			U32TO8_LITTLE(x->ks + 56, x14);
216			U32TO8_LITTLE(x->ks + 60, x15);
217		}
218
219		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
220		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
221		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
222		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
223		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
224		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
225		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
226		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
227		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
228		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
229		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
230		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
231		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
232		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
233		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
234		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
235
236		j12 = PLUSONE(j12);
237		if (!j12) {
238			j13 = PLUSONE(j13);
239			/*
240			 * Stopping at 2^70 bytes per nonce is the user's
241			 * responsibility.
242			 */
243		}
244
245		U32TO8_LITTLE(c + 0, x0);
246		U32TO8_LITTLE(c + 4, x1);
247		U32TO8_LITTLE(c + 8, x2);
248		U32TO8_LITTLE(c + 12, x3);
249		U32TO8_LITTLE(c + 16, x4);
250		U32TO8_LITTLE(c + 20, x5);
251		U32TO8_LITTLE(c + 24, x6);
252		U32TO8_LITTLE(c + 28, x7);
253		U32TO8_LITTLE(c + 32, x8);
254		U32TO8_LITTLE(c + 36, x9);
255		U32TO8_LITTLE(c + 40, x10);
256		U32TO8_LITTLE(c + 44, x11);
257		U32TO8_LITTLE(c + 48, x12);
258		U32TO8_LITTLE(c + 52, x13);
259		U32TO8_LITTLE(c + 56, x14);
260		U32TO8_LITTLE(c + 60, x15);
261
262		if (bytes <= 64) {
263			if (bytes < 64) {
264				for (i = 0; i < bytes; ++i)
265					ctarget[i] = c[i];
266			}
267			x->input[12] = j12;
268			x->input[13] = j13;
269			x->unused = 64 - bytes;
270			return;
271		}
272		bytes -= 64;
273		c += 64;
274		m += 64;
275	}
276}
277
278void
279CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
280    const unsigned char nonce[16])
281{
282	uint32_t x[16];
283	int i;
284
285	x[0] = U8TO32_LITTLE(sigma + 0);
286	x[1] = U8TO32_LITTLE(sigma + 4);
287	x[2] = U8TO32_LITTLE(sigma + 8);
288	x[3] = U8TO32_LITTLE(sigma + 12);
289	x[4] = U8TO32_LITTLE(key + 0);
290	x[5] = U8TO32_LITTLE(key + 4);
291	x[6] = U8TO32_LITTLE(key + 8);
292	x[7] = U8TO32_LITTLE(key + 12);
293	x[8] = U8TO32_LITTLE(key + 16);
294	x[9] = U8TO32_LITTLE(key + 20);
295	x[10] = U8TO32_LITTLE(key + 24);
296	x[11] = U8TO32_LITTLE(key + 28);
297	x[12] = U8TO32_LITTLE(nonce + 0);
298	x[13] = U8TO32_LITTLE(nonce + 4);
299	x[14] = U8TO32_LITTLE(nonce + 8);
300	x[15] = U8TO32_LITTLE(nonce + 12);
301
302	for (i = 20; i > 0; i -= 2) {
303		QUARTERROUND(x[0], x[4], x[8], x[12])
304		QUARTERROUND(x[1], x[5], x[9], x[13])
305		QUARTERROUND(x[2], x[6], x[10], x[14])
306		QUARTERROUND(x[3], x[7], x[11], x[15])
307		QUARTERROUND(x[0], x[5], x[10], x[15])
308		QUARTERROUND(x[1], x[6], x[11], x[12])
309		QUARTERROUND(x[2], x[7], x[8], x[13])
310		QUARTERROUND(x[3], x[4], x[9], x[14])
311	}
312
313	U32TO8_LITTLE(subkey + 0, x[0]);
314	U32TO8_LITTLE(subkey + 4, x[1]);
315	U32TO8_LITTLE(subkey + 8, x[2]);
316	U32TO8_LITTLE(subkey + 12, x[3]);
317
318	U32TO8_LITTLE(subkey + 16, x[12]);
319	U32TO8_LITTLE(subkey + 20, x[13]);
320	U32TO8_LITTLE(subkey + 24, x[14]);
321	U32TO8_LITTLE(subkey + 28, x[15]);
322}
323LCRYPTO_ALIAS(CRYPTO_hchacha_20);
324