1261287Sdes/* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2261287Sdes/*
3261287Sdesversion 20081011
4261287SdesMatthew Dempsky
5261287SdesPublic domain.
6261287SdesDerived from public domain code by D. J. Bernstein.
7261287Sdes*/
8261287Sdes
9261287Sdesint crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10261287Sdes
11261287Sdesstatic void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12261287Sdes{
13261287Sdes  unsigned int j;
14261287Sdes  unsigned int u;
15261287Sdes  u = 0;
16261287Sdes  for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17261287Sdes  u += a[31] + b[31]; out[31] = u;
18261287Sdes}
19261287Sdes
20261287Sdesstatic void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21261287Sdes{
22261287Sdes  unsigned int j;
23261287Sdes  unsigned int u;
24261287Sdes  u = 218;
25261287Sdes  for (j = 0;j < 31;++j) {
26261287Sdes    u += a[j] + 65280 - b[j];
27261287Sdes    out[j] = u & 255;
28261287Sdes    u >>= 8;
29261287Sdes  }
30261287Sdes  u += a[31] - b[31];
31261287Sdes  out[31] = u;
32261287Sdes}
33261287Sdes
34261287Sdesstatic void squeeze(unsigned int a[32])
35261287Sdes{
36261287Sdes  unsigned int j;
37261287Sdes  unsigned int u;
38261287Sdes  u = 0;
39261287Sdes  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40261287Sdes  u += a[31]; a[31] = u & 127;
41261287Sdes  u = 19 * (u >> 7);
42261287Sdes  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43261287Sdes  u += a[31]; a[31] = u;
44261287Sdes}
45261287Sdes
46261287Sdesstatic const unsigned int minusp[32] = {
47261287Sdes 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48261287Sdes} ;
49261287Sdes
50261287Sdesstatic void freeze(unsigned int a[32])
51261287Sdes{
52261287Sdes  unsigned int aorig[32];
53261287Sdes  unsigned int j;
54261287Sdes  unsigned int negative;
55261287Sdes
56261287Sdes  for (j = 0;j < 32;++j) aorig[j] = a[j];
57261287Sdes  add(a,a,minusp);
58261287Sdes  negative = -((a[31] >> 7) & 1);
59261287Sdes  for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60261287Sdes}
61261287Sdes
62261287Sdesstatic void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63261287Sdes{
64261287Sdes  unsigned int i;
65261287Sdes  unsigned int j;
66261287Sdes  unsigned int u;
67261287Sdes
68261287Sdes  for (i = 0;i < 32;++i) {
69261287Sdes    u = 0;
70261287Sdes    for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71261287Sdes    for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72261287Sdes    out[i] = u;
73261287Sdes  }
74261287Sdes  squeeze(out);
75261287Sdes}
76261287Sdes
77261287Sdesstatic void mult121665(unsigned int out[32],const unsigned int a[32])
78261287Sdes{
79261287Sdes  unsigned int j;
80261287Sdes  unsigned int u;
81261287Sdes
82261287Sdes  u = 0;
83261287Sdes  for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84261287Sdes  u += 121665 * a[31]; out[31] = u & 127;
85261287Sdes  u = 19 * (u >> 7);
86261287Sdes  for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87261287Sdes  u += out[j]; out[j] = u;
88261287Sdes}
89261287Sdes
90261287Sdesstatic void square(unsigned int out[32],const unsigned int a[32])
91261287Sdes{
92261287Sdes  unsigned int i;
93261287Sdes  unsigned int j;
94261287Sdes  unsigned int u;
95261287Sdes
96261287Sdes  for (i = 0;i < 32;++i) {
97261287Sdes    u = 0;
98261287Sdes    for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99261287Sdes    for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100261287Sdes    u *= 2;
101261287Sdes    if ((i & 1) == 0) {
102261287Sdes      u += a[i / 2] * a[i / 2];
103261287Sdes      u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104261287Sdes    }
105261287Sdes    out[i] = u;
106261287Sdes  }
107261287Sdes  squeeze(out);
108261287Sdes}
109261287Sdes
110261287Sdesstatic void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111261287Sdes{
112261287Sdes  unsigned int j;
113261287Sdes  unsigned int t;
114261287Sdes  unsigned int bminus1;
115261287Sdes
116261287Sdes  bminus1 = b - 1;
117261287Sdes  for (j = 0;j < 64;++j) {
118261287Sdes    t = bminus1 & (r[j] ^ s[j]);
119261287Sdes    p[j] = s[j] ^ t;
120261287Sdes    q[j] = r[j] ^ t;
121261287Sdes  }
122261287Sdes}
123261287Sdes
124261287Sdesstatic void mainloop(unsigned int work[64],const unsigned char e[32])
125261287Sdes{
126261287Sdes  unsigned int xzm1[64];
127261287Sdes  unsigned int xzm[64];
128261287Sdes  unsigned int xzmb[64];
129261287Sdes  unsigned int xzm1b[64];
130261287Sdes  unsigned int xznb[64];
131261287Sdes  unsigned int xzn1b[64];
132261287Sdes  unsigned int a0[64];
133261287Sdes  unsigned int a1[64];
134261287Sdes  unsigned int b0[64];
135261287Sdes  unsigned int b1[64];
136261287Sdes  unsigned int c1[64];
137261287Sdes  unsigned int r[32];
138261287Sdes  unsigned int s[32];
139261287Sdes  unsigned int t[32];
140261287Sdes  unsigned int u[32];
141261287Sdes  unsigned int j;
142261287Sdes  unsigned int b;
143261287Sdes  int pos;
144261287Sdes
145261287Sdes  for (j = 0;j < 32;++j) xzm1[j] = work[j];
146261287Sdes  xzm1[32] = 1;
147261287Sdes  for (j = 33;j < 64;++j) xzm1[j] = 0;
148261287Sdes
149261287Sdes  xzm[0] = 1;
150261287Sdes  for (j = 1;j < 64;++j) xzm[j] = 0;
151261287Sdes
152261287Sdes  for (pos = 254;pos >= 0;--pos) {
153261287Sdes    b = e[pos / 8] >> (pos & 7);
154261287Sdes    b &= 1;
155261287Sdes    select(xzmb,xzm1b,xzm,xzm1,b);
156261287Sdes    add(a0,xzmb,xzmb + 32);
157261287Sdes    sub(a0 + 32,xzmb,xzmb + 32);
158261287Sdes    add(a1,xzm1b,xzm1b + 32);
159261287Sdes    sub(a1 + 32,xzm1b,xzm1b + 32);
160261287Sdes    square(b0,a0);
161261287Sdes    square(b0 + 32,a0 + 32);
162261287Sdes    mult(b1,a1,a0 + 32);
163261287Sdes    mult(b1 + 32,a1 + 32,a0);
164261287Sdes    add(c1,b1,b1 + 32);
165261287Sdes    sub(c1 + 32,b1,b1 + 32);
166261287Sdes    square(r,c1 + 32);
167261287Sdes    sub(s,b0,b0 + 32);
168261287Sdes    mult121665(t,s);
169261287Sdes    add(u,t,b0);
170261287Sdes    mult(xznb,b0,b0 + 32);
171261287Sdes    mult(xznb + 32,s,u);
172261287Sdes    square(xzn1b,c1);
173261287Sdes    mult(xzn1b + 32,r,work);
174261287Sdes    select(xzm,xzm1,xznb,xzn1b,b);
175261287Sdes  }
176261287Sdes
177261287Sdes  for (j = 0;j < 64;++j) work[j] = xzm[j];
178261287Sdes}
179261287Sdes
180261287Sdesstatic void recip(unsigned int out[32],const unsigned int z[32])
181261287Sdes{
182261287Sdes  unsigned int z2[32];
183261287Sdes  unsigned int z9[32];
184261287Sdes  unsigned int z11[32];
185261287Sdes  unsigned int z2_5_0[32];
186261287Sdes  unsigned int z2_10_0[32];
187261287Sdes  unsigned int z2_20_0[32];
188261287Sdes  unsigned int z2_50_0[32];
189261287Sdes  unsigned int z2_100_0[32];
190261287Sdes  unsigned int t0[32];
191261287Sdes  unsigned int t1[32];
192261287Sdes  int i;
193261287Sdes
194261287Sdes  /* 2 */ square(z2,z);
195261287Sdes  /* 4 */ square(t1,z2);
196261287Sdes  /* 8 */ square(t0,t1);
197261287Sdes  /* 9 */ mult(z9,t0,z);
198261287Sdes  /* 11 */ mult(z11,z9,z2);
199261287Sdes  /* 22 */ square(t0,z11);
200261287Sdes  /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201261287Sdes
202261287Sdes  /* 2^6 - 2^1 */ square(t0,z2_5_0);
203261287Sdes  /* 2^7 - 2^2 */ square(t1,t0);
204261287Sdes  /* 2^8 - 2^3 */ square(t0,t1);
205261287Sdes  /* 2^9 - 2^4 */ square(t1,t0);
206261287Sdes  /* 2^10 - 2^5 */ square(t0,t1);
207261287Sdes  /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208261287Sdes
209261287Sdes  /* 2^11 - 2^1 */ square(t0,z2_10_0);
210261287Sdes  /* 2^12 - 2^2 */ square(t1,t0);
211261287Sdes  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212261287Sdes  /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213261287Sdes
214261287Sdes  /* 2^21 - 2^1 */ square(t0,z2_20_0);
215261287Sdes  /* 2^22 - 2^2 */ square(t1,t0);
216261287Sdes  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217261287Sdes  /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218261287Sdes
219261287Sdes  /* 2^41 - 2^1 */ square(t1,t0);
220261287Sdes  /* 2^42 - 2^2 */ square(t0,t1);
221261287Sdes  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222261287Sdes  /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223261287Sdes
224261287Sdes  /* 2^51 - 2^1 */ square(t0,z2_50_0);
225261287Sdes  /* 2^52 - 2^2 */ square(t1,t0);
226261287Sdes  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227261287Sdes  /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228261287Sdes
229261287Sdes  /* 2^101 - 2^1 */ square(t1,z2_100_0);
230261287Sdes  /* 2^102 - 2^2 */ square(t0,t1);
231261287Sdes  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232261287Sdes  /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233261287Sdes
234261287Sdes  /* 2^201 - 2^1 */ square(t0,t1);
235261287Sdes  /* 2^202 - 2^2 */ square(t1,t0);
236261287Sdes  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237261287Sdes  /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238261287Sdes
239261287Sdes  /* 2^251 - 2^1 */ square(t1,t0);
240261287Sdes  /* 2^252 - 2^2 */ square(t0,t1);
241261287Sdes  /* 2^253 - 2^3 */ square(t1,t0);
242261287Sdes  /* 2^254 - 2^4 */ square(t0,t1);
243261287Sdes  /* 2^255 - 2^5 */ square(t1,t0);
244261287Sdes  /* 2^255 - 21 */ mult(out,t1,z11);
245261287Sdes}
246261287Sdes
247261287Sdesint crypto_scalarmult_curve25519(unsigned char *q,
248261287Sdes  const unsigned char *n,
249261287Sdes  const unsigned char *p)
250261287Sdes{
251261287Sdes  unsigned int work[96];
252261287Sdes  unsigned char e[32];
253261287Sdes  unsigned int i;
254261287Sdes  for (i = 0;i < 32;++i) e[i] = n[i];
255261287Sdes  e[0] &= 248;
256261287Sdes  e[31] &= 127;
257261287Sdes  e[31] |= 64;
258261287Sdes  for (i = 0;i < 32;++i) work[i] = p[i];
259261287Sdes  mainloop(work,e);
260261287Sdes  recip(work + 32,work + 32);
261261287Sdes  mult(work + 64,work,work + 32);
262261287Sdes  freeze(work + 64);
263261287Sdes  for (i = 0;i < 32;++i) q[i] = work[64 + i];
264261287Sdes  return 0;
265261287Sdes}
266