1/*
2chacha-merged.c version 20080118
3D. J. Bernstein
4Public domain.
5*/
6
7/* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
8
9#include <sys/cdefs.h>
10__FBSDID("$FreeBSD$");
11
12#include <sys/param.h>
13#include <sys/types.h>
14
15#include <crypto/chacha20/chacha.h>
16
17typedef uint8_t u8;
18typedef uint32_t u32;
19
20typedef struct chacha_ctx chacha_ctx;
21
22#define U8C(v) (v##U)
23#define U32C(v) (v##U)
24
25#define U8V(v) ((u8)(v) & U8C(0xFF))
26#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
27
28#define ROTL32(v, n) \
29  (U32V((v) << (n)) | ((v) >> (32 - (n))))
30
31#define U8TO32_LITTLE(p) \
32  (((u32)((p)[0])      ) | \
33   ((u32)((p)[1]) <<  8) | \
34   ((u32)((p)[2]) << 16) | \
35   ((u32)((p)[3]) << 24))
36
37#define U32TO8_LITTLE(p, v) \
38  do { \
39    (p)[0] = U8V((v)      ); \
40    (p)[1] = U8V((v) >>  8); \
41    (p)[2] = U8V((v) >> 16); \
42    (p)[3] = U8V((v) >> 24); \
43  } while (0)
44
45#define ROTATE(v,c) (ROTL32(v,c))
46#define XOR(v,w) ((v) ^ (w))
47#define PLUS(v,w) (U32V((v) + (w)))
48#define PLUSONE(v) (PLUS((v),1))
49
50#define QUARTERROUND(a,b,c,d) \
51  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
52  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
53  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
54  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
55
56static const char sigma[16] = "expand 32-byte k";
57static const char tau[16] = "expand 16-byte k";
58
59LOCAL void
60chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
61{
62  const char *constants;
63
64  x->input[4] = U8TO32_LITTLE(k + 0);
65  x->input[5] = U8TO32_LITTLE(k + 4);
66  x->input[6] = U8TO32_LITTLE(k + 8);
67  x->input[7] = U8TO32_LITTLE(k + 12);
68  if (kbits == 256) { /* recommended */
69    k += 16;
70    constants = sigma;
71  } else { /* kbits == 128 */
72    constants = tau;
73  }
74  x->input[8] = U8TO32_LITTLE(k + 0);
75  x->input[9] = U8TO32_LITTLE(k + 4);
76  x->input[10] = U8TO32_LITTLE(k + 8);
77  x->input[11] = U8TO32_LITTLE(k + 12);
78  x->input[0] = U8TO32_LITTLE(constants + 0);
79  x->input[1] = U8TO32_LITTLE(constants + 4);
80  x->input[2] = U8TO32_LITTLE(constants + 8);
81  x->input[3] = U8TO32_LITTLE(constants + 12);
82}
83
84LOCAL void
85chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
86{
87  x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
88  x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
89  x->input[14] = U8TO32_LITTLE(iv + 0);
90  x->input[15] = U8TO32_LITTLE(iv + 4);
91}
92
93LOCAL void
94chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
95{
96  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
97  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
98  u8 *ctarget = NULL;
99  u8 tmp[64];
100  u_int i;
101
102  if (!bytes) return;
103
104  j0 = x->input[0];
105  j1 = x->input[1];
106  j2 = x->input[2];
107  j3 = x->input[3];
108  j4 = x->input[4];
109  j5 = x->input[5];
110  j6 = x->input[6];
111  j7 = x->input[7];
112  j8 = x->input[8];
113  j9 = x->input[9];
114  j10 = x->input[10];
115  j11 = x->input[11];
116  j12 = x->input[12];
117  j13 = x->input[13];
118  j14 = x->input[14];
119  j15 = x->input[15];
120
121  for (;;) {
122    if (bytes < 64) {
123      for (i = 0;i < bytes;++i) tmp[i] = m[i];
124      m = tmp;
125      ctarget = c;
126      c = tmp;
127    }
128    x0 = j0;
129    x1 = j1;
130    x2 = j2;
131    x3 = j3;
132    x4 = j4;
133    x5 = j5;
134    x6 = j6;
135    x7 = j7;
136    x8 = j8;
137    x9 = j9;
138    x10 = j10;
139    x11 = j11;
140    x12 = j12;
141    x13 = j13;
142    x14 = j14;
143    x15 = j15;
144    for (i = 20;i > 0;i -= 2) {
145      QUARTERROUND( x0, x4, x8,x12)
146      QUARTERROUND( x1, x5, x9,x13)
147      QUARTERROUND( x2, x6,x10,x14)
148      QUARTERROUND( x3, x7,x11,x15)
149      QUARTERROUND( x0, x5,x10,x15)
150      QUARTERROUND( x1, x6,x11,x12)
151      QUARTERROUND( x2, x7, x8,x13)
152      QUARTERROUND( x3, x4, x9,x14)
153    }
154    x0 = PLUS(x0,j0);
155    x1 = PLUS(x1,j1);
156    x2 = PLUS(x2,j2);
157    x3 = PLUS(x3,j3);
158    x4 = PLUS(x4,j4);
159    x5 = PLUS(x5,j5);
160    x6 = PLUS(x6,j6);
161    x7 = PLUS(x7,j7);
162    x8 = PLUS(x8,j8);
163    x9 = PLUS(x9,j9);
164    x10 = PLUS(x10,j10);
165    x11 = PLUS(x11,j11);
166    x12 = PLUS(x12,j12);
167    x13 = PLUS(x13,j13);
168    x14 = PLUS(x14,j14);
169    x15 = PLUS(x15,j15);
170
171#ifndef KEYSTREAM_ONLY
172    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
173    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
174    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
175    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
176    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
177    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
178    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
179    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
180    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
181    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
182    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
183    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
184    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
185    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
186    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
187    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
188#endif
189
190    j12 = PLUSONE(j12);
191    if (!j12) {
192      j13 = PLUSONE(j13);
193      /* stopping at 2^70 bytes per nonce is user's responsibility */
194    }
195
196    U32TO8_LITTLE(c + 0,x0);
197    U32TO8_LITTLE(c + 4,x1);
198    U32TO8_LITTLE(c + 8,x2);
199    U32TO8_LITTLE(c + 12,x3);
200    U32TO8_LITTLE(c + 16,x4);
201    U32TO8_LITTLE(c + 20,x5);
202    U32TO8_LITTLE(c + 24,x6);
203    U32TO8_LITTLE(c + 28,x7);
204    U32TO8_LITTLE(c + 32,x8);
205    U32TO8_LITTLE(c + 36,x9);
206    U32TO8_LITTLE(c + 40,x10);
207    U32TO8_LITTLE(c + 44,x11);
208    U32TO8_LITTLE(c + 48,x12);
209    U32TO8_LITTLE(c + 52,x13);
210    U32TO8_LITTLE(c + 56,x14);
211    U32TO8_LITTLE(c + 60,x15);
212
213    if (bytes <= 64) {
214      if (bytes < 64) {
215        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
216      }
217      x->input[12] = j12;
218      x->input[13] = j13;
219      return;
220    }
221    bytes -= 64;
222    c += 64;
223#ifndef KEYSTREAM_ONLY
224    m += 64;
225#endif
226  }
227}
228