1/*	$OpenBSD: chacha_private.h,v 1.4 2020/07/22 13:54:30 tobhe Exp $	*/
2/*
3chacha-merged.c version 20080118
4D. J. Bernstein
5Public domain.
6*/
7
8#include <sys/systm.h>
9
10typedef unsigned char u8;
11typedef unsigned int u32;
12
13typedef struct
14{
15  u32 input[16]; /* could be compressed */
16} chacha_ctx;
17
18#define U8C(v) (v##U)
19#define U32C(v) (v##U)
20
21#define U8V(v) ((u8)(v) & U8C(0xFF))
22#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
23
24#define ROTL32(v, n) \
25  (U32V((v) << (n)) | ((v) >> (32 - (n))))
26
27#define U8TO32_LITTLE(p) \
28  (((u32)((p)[0])      ) | \
29   ((u32)((p)[1]) <<  8) | \
30   ((u32)((p)[2]) << 16) | \
31   ((u32)((p)[3]) << 24))
32
33#define U32TO8_LITTLE(p, v) \
34  do { \
35    (p)[0] = U8V((v)      ); \
36    (p)[1] = U8V((v) >>  8); \
37    (p)[2] = U8V((v) >> 16); \
38    (p)[3] = U8V((v) >> 24); \
39  } while (0)
40
41#define ROTATE(v,c) (ROTL32(v,c))
42#define XOR(v,w) ((v) ^ (w))
43#define PLUS(v,w) (U32V((v) + (w)))
44#define PLUSONE(v) (PLUS((v),1))
45
46#define QUARTERROUND(a,b,c,d) \
47  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
48  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
49  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
50  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
51
52static const char sigma[16] = "expand 32-byte k";
53static const char tau[16] = "expand 16-byte k";
54
55static inline void
56hchacha20(u32 derived_key[8], const u8 nonce[16], const u8 key[32])
57{
58  int i;
59  uint32_t x[] = {
60    U8TO32_LITTLE(sigma + 0),
61    U8TO32_LITTLE(sigma + 4),
62    U8TO32_LITTLE(sigma + 8),
63    U8TO32_LITTLE(sigma + 12),
64    U8TO32_LITTLE(key + 0),
65    U8TO32_LITTLE(key + 4),
66    U8TO32_LITTLE(key + 8),
67    U8TO32_LITTLE(key + 12),
68    U8TO32_LITTLE(key + 16),
69    U8TO32_LITTLE(key + 20),
70    U8TO32_LITTLE(key + 24),
71    U8TO32_LITTLE(key + 28),
72    U8TO32_LITTLE(nonce + 0),
73    U8TO32_LITTLE(nonce + 4),
74    U8TO32_LITTLE(nonce + 8),
75    U8TO32_LITTLE(nonce + 12)
76  };
77
78  for (i = 20;i > 0;i -= 2) {
79    QUARTERROUND( x[0], x[4], x[8],x[12])
80    QUARTERROUND( x[1], x[5], x[9],x[13])
81    QUARTERROUND( x[2], x[6],x[10],x[14])
82    QUARTERROUND( x[3], x[7],x[11],x[15])
83    QUARTERROUND( x[0], x[5],x[10],x[15])
84    QUARTERROUND( x[1], x[6],x[11],x[12])
85    QUARTERROUND( x[2], x[7], x[8],x[13])
86    QUARTERROUND( x[3], x[4], x[9],x[14])
87  }
88
89  memcpy(derived_key + 0, x +  0, sizeof(u32) * 4);
90  memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
91}
92
93static void
94chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
95{
96  const char *constants;
97
98  x->input[4] = U8TO32_LITTLE(k + 0);
99  x->input[5] = U8TO32_LITTLE(k + 4);
100  x->input[6] = U8TO32_LITTLE(k + 8);
101  x->input[7] = U8TO32_LITTLE(k + 12);
102  if (kbits == 256) { /* recommended */
103    k += 16;
104    constants = sigma;
105  } else { /* kbits == 128 */
106    constants = tau;
107  }
108  x->input[8] = U8TO32_LITTLE(k + 0);
109  x->input[9] = U8TO32_LITTLE(k + 4);
110  x->input[10] = U8TO32_LITTLE(k + 8);
111  x->input[11] = U8TO32_LITTLE(k + 12);
112  x->input[0] = U8TO32_LITTLE(constants + 0);
113  x->input[1] = U8TO32_LITTLE(constants + 4);
114  x->input[2] = U8TO32_LITTLE(constants + 8);
115  x->input[3] = U8TO32_LITTLE(constants + 12);
116}
117
118static void
119chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
120{
121  x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
122  x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
123  x->input[14] = U8TO32_LITTLE(iv + 0);
124  x->input[15] = U8TO32_LITTLE(iv + 4);
125}
126
127static void
128chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
129{
130  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
131  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
132  u8 *ctarget = NULL;
133  u8 tmp[64];
134  u_int i;
135
136  if (!bytes) return;
137
138  j0 = x->input[0];
139  j1 = x->input[1];
140  j2 = x->input[2];
141  j3 = x->input[3];
142  j4 = x->input[4];
143  j5 = x->input[5];
144  j6 = x->input[6];
145  j7 = x->input[7];
146  j8 = x->input[8];
147  j9 = x->input[9];
148  j10 = x->input[10];
149  j11 = x->input[11];
150  j12 = x->input[12];
151  j13 = x->input[13];
152  j14 = x->input[14];
153  j15 = x->input[15];
154
155  for (;;) {
156    if (bytes < 64) {
157      for (i = 0;i < bytes;++i) tmp[i] = m[i];
158      m = tmp;
159      ctarget = c;
160      c = tmp;
161    }
162    x0 = j0;
163    x1 = j1;
164    x2 = j2;
165    x3 = j3;
166    x4 = j4;
167    x5 = j5;
168    x6 = j6;
169    x7 = j7;
170    x8 = j8;
171    x9 = j9;
172    x10 = j10;
173    x11 = j11;
174    x12 = j12;
175    x13 = j13;
176    x14 = j14;
177    x15 = j15;
178    for (i = 20;i > 0;i -= 2) {
179      QUARTERROUND( x0, x4, x8,x12)
180      QUARTERROUND( x1, x5, x9,x13)
181      QUARTERROUND( x2, x6,x10,x14)
182      QUARTERROUND( x3, x7,x11,x15)
183      QUARTERROUND( x0, x5,x10,x15)
184      QUARTERROUND( x1, x6,x11,x12)
185      QUARTERROUND( x2, x7, x8,x13)
186      QUARTERROUND( x3, x4, x9,x14)
187    }
188    x0 = PLUS(x0,j0);
189    x1 = PLUS(x1,j1);
190    x2 = PLUS(x2,j2);
191    x3 = PLUS(x3,j3);
192    x4 = PLUS(x4,j4);
193    x5 = PLUS(x5,j5);
194    x6 = PLUS(x6,j6);
195    x7 = PLUS(x7,j7);
196    x8 = PLUS(x8,j8);
197    x9 = PLUS(x9,j9);
198    x10 = PLUS(x10,j10);
199    x11 = PLUS(x11,j11);
200    x12 = PLUS(x12,j12);
201    x13 = PLUS(x13,j13);
202    x14 = PLUS(x14,j14);
203    x15 = PLUS(x15,j15);
204
205#ifndef KEYSTREAM_ONLY
206    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
207    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
208    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
209    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
210    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
211    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
212    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
213    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
214    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
215    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
216    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
217    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
218    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
219    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
220    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
221    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
222#endif
223
224    j12 = PLUSONE(j12);
225    if (!j12) {
226      j13 = PLUSONE(j13);
227      /* stopping at 2^70 bytes per nonce is user's responsibility */
228    }
229
230    U32TO8_LITTLE(c + 0,x0);
231    U32TO8_LITTLE(c + 4,x1);
232    U32TO8_LITTLE(c + 8,x2);
233    U32TO8_LITTLE(c + 12,x3);
234    U32TO8_LITTLE(c + 16,x4);
235    U32TO8_LITTLE(c + 20,x5);
236    U32TO8_LITTLE(c + 24,x6);
237    U32TO8_LITTLE(c + 28,x7);
238    U32TO8_LITTLE(c + 32,x8);
239    U32TO8_LITTLE(c + 36,x9);
240    U32TO8_LITTLE(c + 40,x10);
241    U32TO8_LITTLE(c + 44,x11);
242    U32TO8_LITTLE(c + 48,x12);
243    U32TO8_LITTLE(c + 52,x13);
244    U32TO8_LITTLE(c + 56,x14);
245    U32TO8_LITTLE(c + 60,x15);
246
247    if (bytes <= 64) {
248      if (bytes < 64) {
249        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
250      }
251      x->input[12] = j12;
252      x->input[13] = j13;
253      return;
254    }
255    bytes -= 64;
256    c += 64;
257#ifndef KEYSTREAM_ONLY
258    m += 64;
259#endif
260  }
261}
262