1/* $OpenBSD: chacha.c,v 1.2 2023/07/17 05:26:38 djm Exp $ */
2/*
3chacha-merged.c version 20080118
4D. J. Bernstein
5Public domain.
6*/
7
8#include "includes.h"
9__RCSID("$NetBSD: chacha.c,v 1.6 2023/10/25 20:19:57 christos Exp $");
10
11#include <stdio.h>	/* for NULL */
12#include "chacha.h"
13
14typedef unsigned char u8;
15typedef unsigned int u32;
16
17typedef struct chacha_ctx chacha_ctx;
18
19#define U8C(v) (v##U)
20#define U32C(v) (v##U)
21
22#define U8V(v) ((u8)(v) & U8C(0xFF))
23#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24
25#define ROTL32(v, n) \
26  (U32V((v) << (n)) | ((v) >> (32 - (n))))
27
28#define U8TO32_LITTLE(p) \
29  (((u32)((p)[0])      ) | \
30   ((u32)((p)[1]) <<  8) | \
31   ((u32)((p)[2]) << 16) | \
32   ((u32)((p)[3]) << 24))
33
34#define U32TO8_LITTLE(p, v) \
35  do { \
36    (p)[0] = U8V((v)      ); \
37    (p)[1] = U8V((v) >>  8); \
38    (p)[2] = U8V((v) >> 16); \
39    (p)[3] = U8V((v) >> 24); \
40  } while (0)
41
42#define ROTATE(v,c) (ROTL32(v,c))
43#define XOR(v,w) ((v) ^ (w))
44#define PLUS(v,w) (U32V((v) + (w)))
45#define PLUSONE(v) (PLUS((v),1))
46
47#define QUARTERROUND(a,b,c,d) \
48  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
49  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
50  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
51  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52
53static const char sigma[16] = "expand 32-byte k";
54static const char tau[16] = "expand 16-byte k";
55
56void
57chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58{
59  const char *constants;
60
61  x->input[4] = U8TO32_LITTLE(k + 0);
62  x->input[5] = U8TO32_LITTLE(k + 4);
63  x->input[6] = U8TO32_LITTLE(k + 8);
64  x->input[7] = U8TO32_LITTLE(k + 12);
65  if (kbits == 256) { /* recommended */
66    k += 16;
67    constants = sigma;
68  } else { /* kbits == 128 */
69    constants = tau;
70  }
71  x->input[8] = U8TO32_LITTLE(k + 0);
72  x->input[9] = U8TO32_LITTLE(k + 4);
73  x->input[10] = U8TO32_LITTLE(k + 8);
74  x->input[11] = U8TO32_LITTLE(k + 12);
75  x->input[0] = U8TO32_LITTLE(constants + 0);
76  x->input[1] = U8TO32_LITTLE(constants + 4);
77  x->input[2] = U8TO32_LITTLE(constants + 8);
78  x->input[3] = U8TO32_LITTLE(constants + 12);
79}
80
81void
82chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
83{
84  x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
85  x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
86  x->input[14] = U8TO32_LITTLE(iv + 0);
87  x->input[15] = U8TO32_LITTLE(iv + 4);
88}
89
90void
91chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
92{
93  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
94  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
95  u8 *ctarget = NULL;
96  u8 tmp[64];
97  u_int i;
98
99  if (!bytes) return;
100
101  j0 = x->input[0];
102  j1 = x->input[1];
103  j2 = x->input[2];
104  j3 = x->input[3];
105  j4 = x->input[4];
106  j5 = x->input[5];
107  j6 = x->input[6];
108  j7 = x->input[7];
109  j8 = x->input[8];
110  j9 = x->input[9];
111  j10 = x->input[10];
112  j11 = x->input[11];
113  j12 = x->input[12];
114  j13 = x->input[13];
115  j14 = x->input[14];
116  j15 = x->input[15];
117
118  for (;;) {
119    if (bytes < 64) {
120      for (i = 0;i < bytes;++i) tmp[i] = m[i];
121      m = tmp;
122      ctarget = c;
123      c = tmp;
124    }
125    x0 = j0;
126    x1 = j1;
127    x2 = j2;
128    x3 = j3;
129    x4 = j4;
130    x5 = j5;
131    x6 = j6;
132    x7 = j7;
133    x8 = j8;
134    x9 = j9;
135    x10 = j10;
136    x11 = j11;
137    x12 = j12;
138    x13 = j13;
139    x14 = j14;
140    x15 = j15;
141    for (i = 20;i > 0;i -= 2) {
142      QUARTERROUND( x0, x4, x8,x12)
143      QUARTERROUND( x1, x5, x9,x13)
144      QUARTERROUND( x2, x6,x10,x14)
145      QUARTERROUND( x3, x7,x11,x15)
146      QUARTERROUND( x0, x5,x10,x15)
147      QUARTERROUND( x1, x6,x11,x12)
148      QUARTERROUND( x2, x7, x8,x13)
149      QUARTERROUND( x3, x4, x9,x14)
150    }
151    x0 = PLUS(x0,j0);
152    x1 = PLUS(x1,j1);
153    x2 = PLUS(x2,j2);
154    x3 = PLUS(x3,j3);
155    x4 = PLUS(x4,j4);
156    x5 = PLUS(x5,j5);
157    x6 = PLUS(x6,j6);
158    x7 = PLUS(x7,j7);
159    x8 = PLUS(x8,j8);
160    x9 = PLUS(x9,j9);
161    x10 = PLUS(x10,j10);
162    x11 = PLUS(x11,j11);
163    x12 = PLUS(x12,j12);
164    x13 = PLUS(x13,j13);
165    x14 = PLUS(x14,j14);
166    x15 = PLUS(x15,j15);
167
168    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
169    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
170    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
171    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
172    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
173    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
174    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
175    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
176    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
177    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
178    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
179    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
180    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
181    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
182    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
183    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
184
185    j12 = PLUSONE(j12);
186    if (!j12) {
187      j13 = PLUSONE(j13);
188      /* stopping at 2^70 bytes per nonce is user's responsibility */
189    }
190
191    U32TO8_LITTLE(c + 0,x0);
192    U32TO8_LITTLE(c + 4,x1);
193    U32TO8_LITTLE(c + 8,x2);
194    U32TO8_LITTLE(c + 12,x3);
195    U32TO8_LITTLE(c + 16,x4);
196    U32TO8_LITTLE(c + 20,x5);
197    U32TO8_LITTLE(c + 24,x6);
198    U32TO8_LITTLE(c + 28,x7);
199    U32TO8_LITTLE(c + 32,x8);
200    U32TO8_LITTLE(c + 36,x9);
201    U32TO8_LITTLE(c + 40,x10);
202    U32TO8_LITTLE(c + 44,x11);
203    U32TO8_LITTLE(c + 48,x12);
204    U32TO8_LITTLE(c + 52,x13);
205    U32TO8_LITTLE(c + 56,x14);
206    U32TO8_LITTLE(c + 60,x15);
207
208    if (bytes <= 64) {
209      if (bytes < 64) {
210        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
211      }
212      x->input[12] = j12;
213      x->input[13] = j13;
214      return;
215    }
216    bytes -= 64;
217    c += 64;
218    m += 64;
219  }
220}
221