1/* $NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * ChaCha pseudorandom function family and stream cipher portable C 31 * implementation. Derived from the specification, 32 * 33 * Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop 34 * Record of the State of the Art in Stream Ciphers -- SASC 2008. 35 * https://cr.yp.to/papers.html#chacha 36 * 37 * which in turn builds on the specification of Salsa20 available at 38 * <https://cr.yp.to/snuffle.html>. The particular parametrization of 39 * the stream cipher, with a 32-bit block counter and 96-bit nonce, is 40 * described in 41 * 42 * Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF 43 * Protocols', IETF RFC 8439, June 2018. 44 * https://tools.ietf.org/html/rfc8439 45 */ 46 47#include "chacha_ref.h" 48 49static uint32_t 50rol32(uint32_t u, unsigned c) 51{ 52 53 return (u << c) | (u >> (32 - c)); 54} 55 56#define CHACHA_QUARTERROUND(a, b, c, d) do \ 57{ \ 58 (a) += (b); (d) ^= (a); (d) = rol32((d), 16); \ 59 (c) += (d); (b) ^= (c); (b) = rol32((b), 12); \ 60 (a) += (b); (d) ^= (a); (d) = rol32((d), 8); \ 61 (c) += (d); (b) ^= (c); (b) = rol32((b), 7); \ 62} while (/*CONSTCOND*/0) 63 64const uint8_t chacha_const32[16] = "expand 32-byte k"; 65 66static void 67chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16], 68 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) 69{ 70 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 71 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 72 73 x0 = y0 = le32dec(c + 0); 74 x1 = y1 = le32dec(c + 4); 75 x2 = y2 = le32dec(c + 8); 76 x3 = y3 = le32dec(c + 12); 77 x4 = y4 = le32dec(k + 0); 78 x5 = y5 = le32dec(k + 4); 79 x6 = y6 = le32dec(k + 8); 80 x7 = y7 = le32dec(k + 12); 81 x8 = y8 = le32dec(k + 16); 82 x9 = y9 = le32dec(k + 20); 83 x10 = y10 = le32dec(k + 24); 84 x11 = y11 = le32dec(k + 28); 85 x12 = y12 = le32dec(in + 0); 86 x13 = y13 = le32dec(in + 4); 87 x14 = y14 = le32dec(in + 8); 88 x15 = y15 = le32dec(in + 12); 89 90 for (; nr > 0; nr -= 2) { 91 CHACHA_QUARTERROUND( y0, y4, y8,y12); 92 CHACHA_QUARTERROUND( y1, y5, y9,y13); 93 CHACHA_QUARTERROUND( y2, y6,y10,y14); 94 CHACHA_QUARTERROUND( y3, y7,y11,y15); 95 CHACHA_QUARTERROUND( y0, y5,y10,y15); 96 CHACHA_QUARTERROUND( y1, y6,y11,y12); 97 CHACHA_QUARTERROUND( y2, y7, y8,y13); 98 CHACHA_QUARTERROUND( y3, y4, y9,y14); 99 } 100 101 le32enc(out + 0, x0 + y0); 102 le32enc(out + 4, x1 + y1); 103 le32enc(out + 8, x2 + y2); 104 le32enc(out + 12, x3 + y3); 105 le32enc(out + 16, x4 + y4); 106 le32enc(out + 20, x5 + y5); 107 le32enc(out + 24, x6 + y6); 108 le32enc(out + 28, x7 + y7); 109 le32enc(out + 32, x8 + y8); 110 le32enc(out + 36, x9 + y9); 111 le32enc(out + 40, x10 + y10); 112 le32enc(out + 44, x11 + y11); 113 le32enc(out + 48, x12 + y12); 114 le32enc(out + 52, x13 + y13); 115 le32enc(out + 56, x14 + y14); 116 le32enc(out + 60, x15 + y15); 117} 118 119/* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */ 120 121static void 122chacha_stream_ref(uint8_t *restrict s, size_t nbytes, 123 uint32_t blkno, 124 const uint8_t nonce[static 12], 125 const uint8_t k[static 32], 126 unsigned nr) 127{ 128 const uint8_t *c = chacha_const32; 129 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 130 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 131 unsigned i; 132 133 x0 = le32dec(c + 0); 134 x1 = le32dec(c + 4); 135 x2 = le32dec(c + 8); 136 x3 = le32dec(c + 12); 137 x4 = le32dec(k + 0); 138 x5 = le32dec(k + 4); 139 x6 = le32dec(k + 8); 140 x7 = le32dec(k + 12); 141 x8 = le32dec(k + 16); 142 x9 = le32dec(k + 20); 143 x10 = le32dec(k + 24); 144 x11 = le32dec(k + 28); 145 /* x12 = blkno */ 146 x13 = le32dec(nonce + 0); 147 x14 = le32dec(nonce + 4); 148 x15 = le32dec(nonce + 8); 149 150 for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) { 151 y0 = x0; 152 y1 = x1; 153 y2 = x2; 154 y3 = x3; 155 y4 = x4; 156 y5 = x5; 157 y6 = x6; 158 y7 = x7; 159 y8 = x8; 160 y9 = x9; 161 y10 = x10; 162 y11 = x11; 163 y12 = x12 = blkno; 164 y13 = x13; 165 y14 = x14; 166 y15 = x15; 167 for (i = nr; i > 0; i -= 2) { 168 CHACHA_QUARTERROUND( y0, y4, y8,y12); 169 CHACHA_QUARTERROUND( y1, y5, y9,y13); 170 CHACHA_QUARTERROUND( y2, y6,y10,y14); 171 CHACHA_QUARTERROUND( y3, y7,y11,y15); 172 CHACHA_QUARTERROUND( y0, y5,y10,y15); 173 CHACHA_QUARTERROUND( y1, y6,y11,y12); 174 CHACHA_QUARTERROUND( y2, y7, y8,y13); 175 CHACHA_QUARTERROUND( y3, y4, y9,y14); 176 } 177 le32enc(s + 0, x0 + y0); 178 le32enc(s + 4, x1 + y1); 179 le32enc(s + 8, x2 + y2); 180 le32enc(s + 12, x3 + y3); 181 le32enc(s + 16, x4 + y4); 182 le32enc(s + 20, x5 + y5); 183 le32enc(s + 24, x6 + y6); 184 le32enc(s + 28, x7 + y7); 185 le32enc(s + 32, x8 + y8); 186 le32enc(s + 36, x9 + y9); 187 le32enc(s + 40, x10 + y10); 188 le32enc(s + 44, x11 + y11); 189 le32enc(s + 48, x12 + y12); 190 le32enc(s + 52, x13 + y13); 191 le32enc(s + 56, x14 + y14); 192 le32enc(s + 60, x15 + y15); 193 } 194 195 if (nbytes) { 196 uint8_t buf[64]; 197 198 y0 = x0; 199 y1 = x1; 200 y2 = x2; 201 y3 = x3; 202 y4 = x4; 203 y5 = x5; 204 y6 = x6; 205 y7 = x7; 206 y8 = x8; 207 y9 = x9; 208 y10 = x10; 209 y11 = x11; 210 y12 = x12 = blkno; 211 y13 = x13; 212 y14 = x14; 213 y15 = x15; 214 for (i = nr; i > 0; i -= 2) { 215 CHACHA_QUARTERROUND( y0, y4, y8,y12); 216 CHACHA_QUARTERROUND( y1, y5, y9,y13); 217 CHACHA_QUARTERROUND( y2, y6,y10,y14); 218 CHACHA_QUARTERROUND( y3, y7,y11,y15); 219 CHACHA_QUARTERROUND( y0, y5,y10,y15); 220 CHACHA_QUARTERROUND( y1, y6,y11,y12); 221 CHACHA_QUARTERROUND( y2, y7, y8,y13); 222 CHACHA_QUARTERROUND( y3, y4, y9,y14); 223 } 224 le32enc(buf + 0, x0 + y0); 225 le32enc(buf + 4, x1 + y1); 226 le32enc(buf + 8, x2 + y2); 227 le32enc(buf + 12, x3 + y3); 228 le32enc(buf + 16, x4 + y4); 229 le32enc(buf + 20, x5 + y5); 230 le32enc(buf + 24, x6 + y6); 231 le32enc(buf + 28, x7 + y7); 232 le32enc(buf + 32, x8 + y8); 233 le32enc(buf + 36, x9 + y9); 234 le32enc(buf + 40, x10 + y10); 235 le32enc(buf + 44, x11 + y11); 236 le32enc(buf + 48, x12 + y12); 237 le32enc(buf + 52, x13 + y13); 238 le32enc(buf + 56, x14 + y14); 239 le32enc(buf + 60, x15 + y15); 240 memcpy(s, buf, nbytes); 241 } 242} 243 244static void 245chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes, 246 uint32_t blkno, 247 const uint8_t nonce[static 12], 248 const uint8_t k[static 32], 249 unsigned nr) 250{ 251 const uint8_t *c = chacha_const32; 252 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 253 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 254 unsigned i; 255 256 x0 = le32dec(c + 0); 257 x1 = le32dec(c + 4); 258 x2 = le32dec(c + 8); 259 x3 = le32dec(c + 12); 260 x4 = le32dec(k + 0); 261 x5 = le32dec(k + 4); 262 x6 = le32dec(k + 8); 263 x7 = le32dec(k + 12); 264 x8 = le32dec(k + 16); 265 x9 = le32dec(k + 20); 266 x10 = le32dec(k + 24); 267 x11 = le32dec(k + 28); 268 /* x12 = blkno */ 269 x13 = le32dec(nonce + 0); 270 x14 = le32dec(nonce + 4); 271 x15 = le32dec(nonce + 8); 272 273 for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) { 274 y0 = x0; 275 y1 = x1; 276 y2 = x2; 277 y3 = x3; 278 y4 = x4; 279 y5 = x5; 280 y6 = x6; 281 y7 = x7; 282 y8 = x8; 283 y9 = x9; 284 y10 = x10; 285 y11 = x11; 286 y12 = x12 = blkno; 287 y13 = x13; 288 y14 = x14; 289 y15 = x15; 290 for (i = nr; i > 0; i -= 2) { 291 CHACHA_QUARTERROUND( y0, y4, y8,y12); 292 CHACHA_QUARTERROUND( y1, y5, y9,y13); 293 CHACHA_QUARTERROUND( y2, y6,y10,y14); 294 CHACHA_QUARTERROUND( y3, y7,y11,y15); 295 CHACHA_QUARTERROUND( y0, y5,y10,y15); 296 CHACHA_QUARTERROUND( y1, y6,y11,y12); 297 CHACHA_QUARTERROUND( y2, y7, y8,y13); 298 CHACHA_QUARTERROUND( y3, y4, y9,y14); 299 } 300 le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0)); 301 le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4)); 302 le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8)); 303 le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12)); 304 le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16)); 305 le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20)); 306 le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24)); 307 le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28)); 308 le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32)); 309 le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36)); 310 le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40)); 311 le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44)); 312 le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48)); 313 le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52)); 314 le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56)); 315 le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60)); 316 } 317 318 if (nbytes) { 319 uint8_t buf[64]; 320 321 y0 = x0; 322 y1 = x1; 323 y2 = x2; 324 y3 = x3; 325 y4 = x4; 326 y5 = x5; 327 y6 = x6; 328 y7 = x7; 329 y8 = x8; 330 y9 = x9; 331 y10 = x10; 332 y11 = x11; 333 y12 = x12 = blkno; 334 y13 = x13; 335 y14 = x14; 336 y15 = x15; 337 for (i = nr; i > 0; i -= 2) { 338 CHACHA_QUARTERROUND( y0, y4, y8,y12); 339 CHACHA_QUARTERROUND( y1, y5, y9,y13); 340 CHACHA_QUARTERROUND( y2, y6,y10,y14); 341 CHACHA_QUARTERROUND( y3, y7,y11,y15); 342 CHACHA_QUARTERROUND( y0, y5,y10,y15); 343 CHACHA_QUARTERROUND( y1, y6,y11,y12); 344 CHACHA_QUARTERROUND( y2, y7, y8,y13); 345 CHACHA_QUARTERROUND( y3, y4, y9,y14); 346 } 347 le32enc(buf + 0, x0 + y0); 348 le32enc(buf + 4, x1 + y1); 349 le32enc(buf + 8, x2 + y2); 350 le32enc(buf + 12, x3 + y3); 351 le32enc(buf + 16, x4 + y4); 352 le32enc(buf + 20, x5 + y5); 353 le32enc(buf + 24, x6 + y6); 354 le32enc(buf + 28, x7 + y7); 355 le32enc(buf + 32, x8 + y8); 356 le32enc(buf + 36, x9 + y9); 357 le32enc(buf + 40, x10 + y10); 358 le32enc(buf + 44, x11 + y11); 359 le32enc(buf + 48, x12 + y12); 360 le32enc(buf + 52, x13 + y13); 361 le32enc(buf + 56, x14 + y14); 362 le32enc(buf + 60, x15 + y15); 363 for (i = 0; i < nbytes - nbytes%4; i += 4) 364 le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i)); 365 for (; i < nbytes; i++) 366 s[i] = p[i] ^ buf[i]; 367 } 368} 369 370/* HChaCha */ 371 372static void 373hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16], 374 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) 375{ 376 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 377 378 y0 = le32dec(c + 0); 379 y1 = le32dec(c + 4); 380 y2 = le32dec(c + 8); 381 y3 = le32dec(c + 12); 382 y4 = le32dec(k + 0); 383 y5 = le32dec(k + 4); 384 y6 = le32dec(k + 8); 385 y7 = le32dec(k + 12); 386 y8 = le32dec(k + 16); 387 y9 = le32dec(k + 20); 388 y10 = le32dec(k + 24); 389 y11 = le32dec(k + 28); 390 y12 = le32dec(in + 0); 391 y13 = le32dec(in + 4); 392 y14 = le32dec(in + 8); 393 y15 = le32dec(in + 12); 394 395 for (; nr > 0; nr -= 2) { 396 CHACHA_QUARTERROUND( y0, y4, y8,y12); 397 CHACHA_QUARTERROUND( y1, y5, y9,y13); 398 CHACHA_QUARTERROUND( y2, y6,y10,y14); 399 CHACHA_QUARTERROUND( y3, y7,y11,y15); 400 CHACHA_QUARTERROUND( y0, y5,y10,y15); 401 CHACHA_QUARTERROUND( y1, y6,y11,y12); 402 CHACHA_QUARTERROUND( y2, y7, y8,y13); 403 CHACHA_QUARTERROUND( y3, y4, y9,y14); 404 } 405 406 le32enc(out + 0, y0); 407 le32enc(out + 4, y1); 408 le32enc(out + 8, y2); 409 le32enc(out + 12, y3); 410 le32enc(out + 16, y12); 411 le32enc(out + 20, y13); 412 le32enc(out + 24, y14); 413 le32enc(out + 28, y15); 414} 415 416/* XChaCha stream cipher */ 417 418/* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */ 419 420static void 421xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno, 422 const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr) 423{ 424 uint8_t subkey[32]; 425 uint8_t subnonce[12]; 426 427 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); 428 memset(subnonce, 0, 4); 429 memcpy(subnonce + 4, nonce + 16, 8); 430 chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr); 431} 432 433static void 434xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes, 435 uint32_t blkno, 436 const uint8_t nonce[static 24], 437 const uint8_t k[static 32], 438 unsigned nr) 439{ 440 uint8_t subkey[32]; 441 uint8_t subnonce[12]; 442 443 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); 444 memset(subnonce, 0, 4); 445 memcpy(subnonce + 4, nonce + 16, 8); 446 chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr); 447} 448 449static int 450chacha_probe_ref(void) 451{ 452 453 /* The reference implementation is always available. */ 454 return 0; 455} 456 457const struct chacha_impl chacha_ref_impl = { 458 .ci_name = "Portable C ChaCha", 459 .ci_probe = chacha_probe_ref, 460 .ci_chacha_core = chacha_core_ref, 461 .ci_hchacha = hchacha_ref, 462 .ci_chacha_stream = chacha_stream_ref, 463 .ci_chacha_stream_xor = chacha_stream_xor_ref, 464 .ci_xchacha_stream = xchacha_stream_ref, 465 .ci_xchacha_stream_xor = xchacha_stream_xor_ref, 466}; 467