1/* OPENBSD ORIGINAL: lib/libc/crypt/chacha_private.h */
2
3/*
4chacha-merged.c version 20080118
5D. J. Bernstein
6Public domain.
7*/
8
9/* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
10
11typedef unsigned char u8;
12typedef unsigned int u32;
13
14typedef struct
15{
16  u32 input[16]; /* could be compressed */
17} chacha_ctx;
18
19#define U8C(v) (v##U)
20#define U32C(v) (v##U)
21
22#define U8V(v) ((u8)(v) & U8C(0xFF))
23#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24
25#define ROTL32(v, n) \
26  (U32V((v) << (n)) | ((v) >> (32 - (n))))
27
28#define U8TO32_LITTLE(p) \
29  (((u32)((p)[0])      ) | \
30   ((u32)((p)[1]) <<  8) | \
31   ((u32)((p)[2]) << 16) | \
32   ((u32)((p)[3]) << 24))
33
34#define U32TO8_LITTLE(p, v) \
35  do { \
36    (p)[0] = U8V((v)      ); \
37    (p)[1] = U8V((v) >>  8); \
38    (p)[2] = U8V((v) >> 16); \
39    (p)[3] = U8V((v) >> 24); \
40  } while (0)
41
42#define ROTATE(v,c) (ROTL32(v,c))
43#define XOR(v,w) ((v) ^ (w))
44#define PLUS(v,w) (U32V((v) + (w)))
45#define PLUSONE(v) (PLUS((v),1))
46
47#define QUARTERROUND(a,b,c,d) \
48  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
49  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
50  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
51  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52
53static const char sigma[16] = "expand 32-byte k";
54static const char tau[16] = "expand 16-byte k";
55
56static void
57chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58{
59  const char *constants;
60
61  x->input[4] = U8TO32_LITTLE(k + 0);
62  x->input[5] = U8TO32_LITTLE(k + 4);
63  x->input[6] = U8TO32_LITTLE(k + 8);
64  x->input[7] = U8TO32_LITTLE(k + 12);
65  if (kbits == 256) { /* recommended */
66    k += 16;
67    constants = sigma;
68  } else { /* kbits == 128 */
69    constants = tau;
70  }
71  x->input[8] = U8TO32_LITTLE(k + 0);
72  x->input[9] = U8TO32_LITTLE(k + 4);
73  x->input[10] = U8TO32_LITTLE(k + 8);
74  x->input[11] = U8TO32_LITTLE(k + 12);
75  x->input[0] = U8TO32_LITTLE(constants + 0);
76  x->input[1] = U8TO32_LITTLE(constants + 4);
77  x->input[2] = U8TO32_LITTLE(constants + 8);
78  x->input[3] = U8TO32_LITTLE(constants + 12);
79}
80
81static void
82chacha_ivsetup(chacha_ctx *x,const u8 *iv)
83{
84  x->input[12] = 0;
85  x->input[13] = 0;
86  x->input[14] = U8TO32_LITTLE(iv + 0);
87  x->input[15] = U8TO32_LITTLE(iv + 4);
88}
89
90static void
91chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
92{
93  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
94  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
95  u8 *ctarget = NULL;
96  u8 tmp[64];
97  u_int i;
98
99  if (!bytes) return;
100
101  j0 = x->input[0];
102  j1 = x->input[1];
103  j2 = x->input[2];
104  j3 = x->input[3];
105  j4 = x->input[4];
106  j5 = x->input[5];
107  j6 = x->input[6];
108  j7 = x->input[7];
109  j8 = x->input[8];
110  j9 = x->input[9];
111  j10 = x->input[10];
112  j11 = x->input[11];
113  j12 = x->input[12];
114  j13 = x->input[13];
115  j14 = x->input[14];
116  j15 = x->input[15];
117
118  for (;;) {
119    if (bytes < 64) {
120      for (i = 0;i < bytes;++i) tmp[i] = m[i];
121      m = tmp;
122      ctarget = c;
123      c = tmp;
124    }
125    x0 = j0;
126    x1 = j1;
127    x2 = j2;
128    x3 = j3;
129    x4 = j4;
130    x5 = j5;
131    x6 = j6;
132    x7 = j7;
133    x8 = j8;
134    x9 = j9;
135    x10 = j10;
136    x11 = j11;
137    x12 = j12;
138    x13 = j13;
139    x14 = j14;
140    x15 = j15;
141    for (i = 20;i > 0;i -= 2) {
142      QUARTERROUND( x0, x4, x8,x12)
143      QUARTERROUND( x1, x5, x9,x13)
144      QUARTERROUND( x2, x6,x10,x14)
145      QUARTERROUND( x3, x7,x11,x15)
146      QUARTERROUND( x0, x5,x10,x15)
147      QUARTERROUND( x1, x6,x11,x12)
148      QUARTERROUND( x2, x7, x8,x13)
149      QUARTERROUND( x3, x4, x9,x14)
150    }
151    x0 = PLUS(x0,j0);
152    x1 = PLUS(x1,j1);
153    x2 = PLUS(x2,j2);
154    x3 = PLUS(x3,j3);
155    x4 = PLUS(x4,j4);
156    x5 = PLUS(x5,j5);
157    x6 = PLUS(x6,j6);
158    x7 = PLUS(x7,j7);
159    x8 = PLUS(x8,j8);
160    x9 = PLUS(x9,j9);
161    x10 = PLUS(x10,j10);
162    x11 = PLUS(x11,j11);
163    x12 = PLUS(x12,j12);
164    x13 = PLUS(x13,j13);
165    x14 = PLUS(x14,j14);
166    x15 = PLUS(x15,j15);
167
168#ifndef KEYSTREAM_ONLY
169    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
170    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
171    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
172    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
173    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
174    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
175    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
176    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
177    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
178    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
179    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
180    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
181    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
182    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
183    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
184    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
185#endif
186
187    j12 = PLUSONE(j12);
188    if (!j12) {
189      j13 = PLUSONE(j13);
190      /* stopping at 2^70 bytes per nonce is user's responsibility */
191    }
192
193    U32TO8_LITTLE(c + 0,x0);
194    U32TO8_LITTLE(c + 4,x1);
195    U32TO8_LITTLE(c + 8,x2);
196    U32TO8_LITTLE(c + 12,x3);
197    U32TO8_LITTLE(c + 16,x4);
198    U32TO8_LITTLE(c + 20,x5);
199    U32TO8_LITTLE(c + 24,x6);
200    U32TO8_LITTLE(c + 28,x7);
201    U32TO8_LITTLE(c + 32,x8);
202    U32TO8_LITTLE(c + 36,x9);
203    U32TO8_LITTLE(c + 40,x10);
204    U32TO8_LITTLE(c + 44,x11);
205    U32TO8_LITTLE(c + 48,x12);
206    U32TO8_LITTLE(c + 52,x13);
207    U32TO8_LITTLE(c + 56,x14);
208    U32TO8_LITTLE(c + 60,x15);
209
210    if (bytes <= 64) {
211      if (bytes < 64) {
212        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
213      }
214      x->input[12] = j12;
215      x->input[13] = j13;
216      return;
217    }
218    bytes -= 64;
219    c += 64;
220#ifndef KEYSTREAM_ONLY
221    m += 64;
222#endif
223  }
224}
225