1/*
2 * Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/* Adapted from the public domain code by D. Bernstein from SUPERCOP. */
11
12#include <string.h>
13
14#include "internal/endian.h"
15#include "crypto/chacha.h"
16#include "crypto/ctype.h"
17
18typedef unsigned int u32;
19typedef unsigned char u8;
20typedef union {
21    u32 u[16];
22    u8 c[64];
23} chacha_buf;
24
25# define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
26
27# define U32TO8_LITTLE(p, v) do { \
28                                (p)[0] = (u8)(v >>  0); \
29                                (p)[1] = (u8)(v >>  8); \
30                                (p)[2] = (u8)(v >> 16); \
31                                (p)[3] = (u8)(v >> 24); \
32                                } while(0)
33
34/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */
35# define QUARTERROUND(a,b,c,d) ( \
36                x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]),16), \
37                x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]),12), \
38                x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]), 8), \
39                x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]), 7)  )
40
41/* chacha_core performs 20 rounds of ChaCha on the input words in
42 * |input| and writes the 64 output bytes to |output|. */
43static void chacha20_core(chacha_buf *output, const u32 input[16])
44{
45    u32 x[16];
46    int i;
47    DECLARE_IS_ENDIAN;
48
49    memcpy(x, input, sizeof(x));
50
51    for (i = 20; i > 0; i -= 2) {
52        QUARTERROUND(0, 4, 8, 12);
53        QUARTERROUND(1, 5, 9, 13);
54        QUARTERROUND(2, 6, 10, 14);
55        QUARTERROUND(3, 7, 11, 15);
56        QUARTERROUND(0, 5, 10, 15);
57        QUARTERROUND(1, 6, 11, 12);
58        QUARTERROUND(2, 7, 8, 13);
59        QUARTERROUND(3, 4, 9, 14);
60    }
61
62    if (IS_LITTLE_ENDIAN) {
63        for (i = 0; i < 16; ++i)
64            output->u[i] = x[i] + input[i];
65    } else {
66        for (i = 0; i < 16; ++i)
67            U32TO8_LITTLE(output->c + 4 * i, (x[i] + input[i]));
68    }
69}
70
71void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
72                    size_t len, const unsigned int key[8],
73                    const unsigned int counter[4])
74{
75    u32 input[16];
76    chacha_buf buf;
77    size_t todo, i;
78
79    /* sigma constant "expand 32-byte k" in little-endian encoding */
80    input[0] = ((u32)ossl_toascii('e')) | ((u32)ossl_toascii('x') << 8)
81               | ((u32)ossl_toascii('p') << 16)
82               | ((u32)ossl_toascii('a') << 24);
83    input[1] = ((u32)ossl_toascii('n')) | ((u32)ossl_toascii('d') << 8)
84               | ((u32)ossl_toascii(' ') << 16)
85               | ((u32)ossl_toascii('3') << 24);
86    input[2] = ((u32)ossl_toascii('2')) | ((u32)ossl_toascii('-') << 8)
87               | ((u32)ossl_toascii('b') << 16)
88               | ((u32)ossl_toascii('y') << 24);
89    input[3] = ((u32)ossl_toascii('t')) | ((u32)ossl_toascii('e') << 8)
90               | ((u32)ossl_toascii(' ') << 16)
91               | ((u32)ossl_toascii('k') << 24);
92
93    input[4] = key[0];
94    input[5] = key[1];
95    input[6] = key[2];
96    input[7] = key[3];
97    input[8] = key[4];
98    input[9] = key[5];
99    input[10] = key[6];
100    input[11] = key[7];
101
102    input[12] = counter[0];
103    input[13] = counter[1];
104    input[14] = counter[2];
105    input[15] = counter[3];
106
107    while (len > 0) {
108        todo = sizeof(buf);
109        if (len < todo)
110            todo = len;
111
112        chacha20_core(&buf, input);
113
114        for (i = 0; i < todo; i++)
115            out[i] = inp[i] ^ buf.c[i];
116        out += todo;
117        inp += todo;
118        len -= todo;
119
120        /*
121         * Advance 32-bit counter. Note that as subroutine is so to
122         * say nonce-agnostic, this limited counter width doesn't
123         * prevent caller from implementing wider counter. It would
124         * simply take two calls split on counter overflow...
125         */
126        input[12]++;
127    }
128}
129