1#include <stddef.h>
2#include <stdint.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "crypto_verify_32.h"
7#include "private/common.h"
8#include "private/ed25519_ref10.h"
9#include "utils.h"
10
11static inline uint64_t
12load_3(const unsigned char *in)
13{
14    uint64_t result;
15
16    result = (uint64_t) in[0];
17    result |= ((uint64_t) in[1]) << 8;
18    result |= ((uint64_t) in[2]) << 16;
19
20    return result;
21}
22
23static inline uint64_t
24load_4(const unsigned char *in)
25{
26    uint64_t result;
27
28    result = (uint64_t) in[0];
29    result |= ((uint64_t) in[1]) << 8;
30    result |= ((uint64_t) in[2]) << 16;
31    result |= ((uint64_t) in[3]) << 24;
32
33    return result;
34}
35
36/*
37 * Field arithmetic:
38 * Use 5*51 bit limbs on 64-bit systems with support for 128 bit arithmetic,
39 * and 10*25.5 bit limbs elsewhere.
40 *
41 * Functions used elsewhere that are candidates for inlining are defined
42 * via "private/curve25519_ref10.h".
43 */
44
45#ifdef HAVE_TI_MODE
46# include "fe_51/constants.h"
47# include "fe_51/fe.h"
48#else
49# include "fe_25_5/constants.h"
50# include "fe_25_5/fe.h"
51#endif
52
53void
54fe25519_invert(fe25519 out, const fe25519 z)
55{
56    fe25519 t0;
57    fe25519 t1;
58    fe25519 t2;
59    fe25519 t3;
60    int     i;
61
62    fe25519_sq(t0, z);
63    fe25519_sq(t1, t0);
64    fe25519_sq(t1, t1);
65    fe25519_mul(t1, z, t1);
66    fe25519_mul(t0, t0, t1);
67    fe25519_sq(t2, t0);
68    fe25519_mul(t1, t1, t2);
69    fe25519_sq(t2, t1);
70    for (i = 1; i < 5; ++i) {
71        fe25519_sq(t2, t2);
72    }
73    fe25519_mul(t1, t2, t1);
74    fe25519_sq(t2, t1);
75    for (i = 1; i < 10; ++i) {
76        fe25519_sq(t2, t2);
77    }
78    fe25519_mul(t2, t2, t1);
79    fe25519_sq(t3, t2);
80    for (i = 1; i < 20; ++i) {
81        fe25519_sq(t3, t3);
82    }
83    fe25519_mul(t2, t3, t2);
84    fe25519_sq(t2, t2);
85    for (i = 1; i < 10; ++i) {
86        fe25519_sq(t2, t2);
87    }
88    fe25519_mul(t1, t2, t1);
89    fe25519_sq(t2, t1);
90    for (i = 1; i < 50; ++i) {
91        fe25519_sq(t2, t2);
92    }
93    fe25519_mul(t2, t2, t1);
94    fe25519_sq(t3, t2);
95    for (i = 1; i < 100; ++i) {
96        fe25519_sq(t3, t3);
97    }
98    fe25519_mul(t2, t3, t2);
99    fe25519_sq(t2, t2);
100    for (i = 1; i < 50; ++i) {
101        fe25519_sq(t2, t2);
102    }
103    fe25519_mul(t1, t2, t1);
104    fe25519_sq(t1, t1);
105    for (i = 1; i < 5; ++i) {
106        fe25519_sq(t1, t1);
107    }
108    fe25519_mul(out, t1, t0);
109}
110
111static void
112fe25519_pow22523(fe25519 out, const fe25519 z)
113{
114    fe25519 t0;
115    fe25519 t1;
116    fe25519 t2;
117    int     i;
118
119    fe25519_sq(t0, z);
120    fe25519_sq(t1, t0);
121    fe25519_sq(t1, t1);
122    fe25519_mul(t1, z, t1);
123    fe25519_mul(t0, t0, t1);
124    fe25519_sq(t0, t0);
125    fe25519_mul(t0, t1, t0);
126    fe25519_sq(t1, t0);
127    for (i = 1; i < 5; ++i) {
128        fe25519_sq(t1, t1);
129    }
130    fe25519_mul(t0, t1, t0);
131    fe25519_sq(t1, t0);
132    for (i = 1; i < 10; ++i) {
133        fe25519_sq(t1, t1);
134    }
135    fe25519_mul(t1, t1, t0);
136    fe25519_sq(t2, t1);
137    for (i = 1; i < 20; ++i) {
138        fe25519_sq(t2, t2);
139    }
140    fe25519_mul(t1, t2, t1);
141    fe25519_sq(t1, t1);
142    for (i = 1; i < 10; ++i) {
143        fe25519_sq(t1, t1);
144    }
145    fe25519_mul(t0, t1, t0);
146    fe25519_sq(t1, t0);
147    for (i = 1; i < 50; ++i) {
148        fe25519_sq(t1, t1);
149    }
150    fe25519_mul(t1, t1, t0);
151    fe25519_sq(t2, t1);
152    for (i = 1; i < 100; ++i) {
153        fe25519_sq(t2, t2);
154    }
155    fe25519_mul(t1, t2, t1);
156    fe25519_sq(t1, t1);
157    for (i = 1; i < 50; ++i) {
158        fe25519_sq(t1, t1);
159    }
160    fe25519_mul(t0, t1, t0);
161    fe25519_sq(t0, t0);
162    fe25519_sq(t0, t0);
163    fe25519_mul(out, t0, z);
164}
165
166/*
167 r = p + q
168 */
169
170void
171ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
172{
173    fe25519 t0;
174
175    fe25519_add(r->X, p->Y, p->X);
176    fe25519_sub(r->Y, p->Y, p->X);
177    fe25519_mul(r->Z, r->X, q->YplusX);
178    fe25519_mul(r->Y, r->Y, q->YminusX);
179    fe25519_mul(r->T, q->T2d, p->T);
180    fe25519_mul(r->X, p->Z, q->Z);
181    fe25519_add(t0, r->X, r->X);
182    fe25519_sub(r->X, r->Z, r->Y);
183    fe25519_add(r->Y, r->Z, r->Y);
184    fe25519_add(r->Z, t0, r->T);
185    fe25519_sub(r->T, t0, r->T);
186}
187
188static void
189slide_vartime(signed char *r, const unsigned char *a)
190{
191    int i;
192    int b;
193    int k;
194    int ribs;
195    int cmp;
196
197    for (i = 0; i < 256; ++i) {
198        r[i] = 1 & (a[i >> 3] >> (i & 7));
199    }
200    for (i = 0; i < 256; ++i) {
201        if (! r[i]) {
202            continue;
203        }
204        for (b = 1; b <= 6 && i + b < 256; ++b) {
205            if (! r[i + b]) {
206                continue;
207            }
208            ribs = r[i + b] << b;
209            cmp = r[i] + ribs;
210            if (cmp <= 15) {
211                r[i] = cmp;
212                r[i + b] = 0;
213            } else {
214                cmp = r[i] - ribs;
215                if (cmp < -15) {
216                    break;
217                }
218                r[i] = cmp;
219                for (k = i + b; k < 256; ++k) {
220                    if (! r[k]) {
221                        r[k] = 1;
222                        break;
223                    }
224                    r[k] = 0;
225                }
226            }
227        }
228    }
229}
230
231int
232ge25519_frombytes(ge25519_p3 *h, const unsigned char *s)
233{
234    fe25519 u;
235    fe25519 v;
236    fe25519 v3;
237    fe25519 vxx;
238    fe25519 m_root_check, p_root_check;
239    fe25519 negx;
240    fe25519 x_sqrtm1;
241    int     has_m_root, has_p_root;
242
243    fe25519_frombytes(h->Y, s);
244    fe25519_1(h->Z);
245    fe25519_sq(u, h->Y);
246    fe25519_mul(v, u, d);
247    fe25519_sub(u, u, h->Z); /* u = y^2-1 */
248    fe25519_add(v, v, h->Z); /* v = dy^2+1 */
249
250    fe25519_sq(v3, v);
251    fe25519_mul(v3, v3, v); /* v3 = v^3 */
252    fe25519_sq(h->X, v3);
253    fe25519_mul(h->X, h->X, v);
254    fe25519_mul(h->X, h->X, u); /* x = uv^7 */
255
256    fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
257    fe25519_mul(h->X, h->X, v3);
258    fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
259
260    fe25519_sq(vxx, h->X);
261    fe25519_mul(vxx, vxx, v);
262    fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
263    fe25519_add(p_root_check, vxx, u); /* vx^2+u */
264    has_m_root = fe25519_iszero(m_root_check);
265    has_p_root = fe25519_iszero(p_root_check);
266    fe25519_mul(x_sqrtm1, h->X, sqrtm1); /* x*sqrt(-1) */
267    fe25519_cmov(h->X, x_sqrtm1, 1 - has_m_root);
268
269    fe25519_neg(negx, h->X);
270    fe25519_cmov(h->X, negx, fe25519_isnegative(h->X) ^ (s[31] >> 7));
271    fe25519_mul(h->T, h->X, h->Y);
272
273    return (has_m_root | has_p_root) - 1;
274}
275
276int
277ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s)
278{
279    fe25519 u;
280    fe25519 v;
281    fe25519 v3;
282    fe25519 vxx;
283    fe25519 m_root_check, p_root_check;
284
285    fe25519_frombytes(h->Y, s);
286    fe25519_1(h->Z);
287    fe25519_sq(u, h->Y);
288    fe25519_mul(v, u, d);
289    fe25519_sub(u, u, h->Z); /* u = y^2-1 */
290    fe25519_add(v, v, h->Z); /* v = dy^2+1 */
291
292    fe25519_sq(v3, v);
293    fe25519_mul(v3, v3, v); /* v3 = v^3 */
294    fe25519_sq(h->X, v3);
295    fe25519_mul(h->X, h->X, v);
296    fe25519_mul(h->X, h->X, u); /* x = uv^7 */
297
298    fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
299    fe25519_mul(h->X, h->X, v3);
300    fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
301
302    fe25519_sq(vxx, h->X);
303    fe25519_mul(vxx, vxx, v);
304    fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
305    if (fe25519_iszero(m_root_check) == 0) {
306        fe25519_add(p_root_check, vxx, u); /* vx^2+u */
307        if (fe25519_iszero(p_root_check) == 0) {
308            return -1;
309        }
310        fe25519_mul(h->X, h->X, sqrtm1);
311    }
312
313    if (fe25519_isnegative(h->X) == (s[31] >> 7)) {
314        fe25519_neg(h->X, h->X);
315    }
316    fe25519_mul(h->T, h->X, h->Y);
317
318    return 0;
319}
320
321/*
322 r = p + q
323 */
324
325static void
326ge25519_madd(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
327{
328    fe25519 t0;
329
330    fe25519_add(r->X, p->Y, p->X);
331    fe25519_sub(r->Y, p->Y, p->X);
332    fe25519_mul(r->Z, r->X, q->yplusx);
333    fe25519_mul(r->Y, r->Y, q->yminusx);
334    fe25519_mul(r->T, q->xy2d, p->T);
335    fe25519_add(t0, p->Z, p->Z);
336    fe25519_sub(r->X, r->Z, r->Y);
337    fe25519_add(r->Y, r->Z, r->Y);
338    fe25519_add(r->Z, t0, r->T);
339    fe25519_sub(r->T, t0, r->T);
340}
341
342/*
343 r = p - q
344 */
345
346static void
347ge25519_msub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
348{
349    fe25519 t0;
350
351    fe25519_add(r->X, p->Y, p->X);
352    fe25519_sub(r->Y, p->Y, p->X);
353    fe25519_mul(r->Z, r->X, q->yminusx);
354    fe25519_mul(r->Y, r->Y, q->yplusx);
355    fe25519_mul(r->T, q->xy2d, p->T);
356    fe25519_add(t0, p->Z, p->Z);
357    fe25519_sub(r->X, r->Z, r->Y);
358    fe25519_add(r->Y, r->Z, r->Y);
359    fe25519_sub(r->Z, t0, r->T);
360    fe25519_add(r->T, t0, r->T);
361}
362
363/*
364 r = p
365 */
366
367void
368ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
369{
370    fe25519_mul(r->X, p->X, p->T);
371    fe25519_mul(r->Y, p->Y, p->Z);
372    fe25519_mul(r->Z, p->Z, p->T);
373}
374
375/*
376 r = p
377 */
378
379void
380ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
381{
382    fe25519_mul(r->X, p->X, p->T);
383    fe25519_mul(r->Y, p->Y, p->Z);
384    fe25519_mul(r->Z, p->Z, p->T);
385    fe25519_mul(r->T, p->X, p->Y);
386}
387
388static void
389ge25519_p2_0(ge25519_p2 *h)
390{
391    fe25519_0(h->X);
392    fe25519_1(h->Y);
393    fe25519_1(h->Z);
394}
395
396/*
397 r = 2 * p
398 */
399
400static void
401ge25519_p2_dbl(ge25519_p1p1 *r, const ge25519_p2 *p)
402{
403    fe25519 t0;
404
405    fe25519_sq(r->X, p->X);
406    fe25519_sq(r->Z, p->Y);
407    fe25519_sq2(r->T, p->Z);
408    fe25519_add(r->Y, p->X, p->Y);
409    fe25519_sq(t0, r->Y);
410    fe25519_add(r->Y, r->Z, r->X);
411    fe25519_sub(r->Z, r->Z, r->X);
412    fe25519_sub(r->X, t0, r->Y);
413    fe25519_sub(r->T, r->T, r->Z);
414}
415
416static void
417ge25519_p3_0(ge25519_p3 *h)
418{
419    fe25519_0(h->X);
420    fe25519_1(h->Y);
421    fe25519_1(h->Z);
422    fe25519_0(h->T);
423}
424
425static void
426ge25519_cached_0(ge25519_cached *h)
427{
428    fe25519_1(h->YplusX);
429    fe25519_1(h->YminusX);
430    fe25519_1(h->Z);
431    fe25519_0(h->T2d);
432}
433
434/*
435 r = p
436 */
437
438void
439ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p)
440{
441    fe25519_add(r->YplusX, p->Y, p->X);
442    fe25519_sub(r->YminusX, p->Y, p->X);
443    fe25519_copy(r->Z, p->Z);
444    fe25519_mul(r->T2d, p->T, d2);
445}
446
447static void
448ge25519_p3_to_precomp(ge25519_precomp *pi, const ge25519_p3 *p)
449{
450    fe25519 recip;
451    fe25519 x;
452    fe25519 y;
453    fe25519 xy;
454
455    fe25519_invert(recip, p->Z);
456    fe25519_mul(x, p->X, recip);
457    fe25519_mul(y, p->Y, recip);
458    fe25519_add(pi->yplusx, y, x);
459    fe25519_sub(pi->yminusx, y, x);
460    fe25519_mul(xy, x, y);
461    fe25519_mul(pi->xy2d, xy, d2);
462}
463
464/*
465 r = p
466 */
467
468static void
469ge25519_p3_to_p2(ge25519_p2 *r, const ge25519_p3 *p)
470{
471    fe25519_copy(r->X, p->X);
472    fe25519_copy(r->Y, p->Y);
473    fe25519_copy(r->Z, p->Z);
474}
475
476void
477ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h)
478{
479    fe25519 recip;
480    fe25519 x;
481    fe25519 y;
482
483    fe25519_invert(recip, h->Z);
484    fe25519_mul(x, h->X, recip);
485    fe25519_mul(y, h->Y, recip);
486    fe25519_tobytes(s, y);
487    s[31] ^= fe25519_isnegative(x) << 7;
488}
489
490/*
491 r = 2 * p
492 */
493
494static void
495ge25519_p3_dbl(ge25519_p1p1 *r, const ge25519_p3 *p)
496{
497    ge25519_p2 q;
498    ge25519_p3_to_p2(&q, p);
499    ge25519_p2_dbl(r, &q);
500}
501
502static void
503ge25519_precomp_0(ge25519_precomp *h)
504{
505    fe25519_1(h->yplusx);
506    fe25519_1(h->yminusx);
507    fe25519_0(h->xy2d);
508}
509
510static unsigned char
511equal(signed char b, signed char c)
512{
513    unsigned char ub = b;
514    unsigned char uc = c;
515    unsigned char x  = ub ^ uc; /* 0: yes; 1..255: no */
516    uint32_t      y  = x;       /* 0: yes; 1..255: no */
517
518    y -= 1;   /* 4294967295: yes; 0..254: no */
519    y >>= 31; /* 1: yes; 0: no */
520
521    return y;
522}
523
524static unsigned char
525negative(signed char b)
526{
527    /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
528    uint64_t x = b;
529
530    x >>= 63; /* 1: yes; 0: no */
531
532    return x;
533}
534
535static void
536ge25519_cmov(ge25519_precomp *t, const ge25519_precomp *u, unsigned char b)
537{
538    fe25519_cmov(t->yplusx, u->yplusx, b);
539    fe25519_cmov(t->yminusx, u->yminusx, b);
540    fe25519_cmov(t->xy2d, u->xy2d, b);
541}
542
543static void
544ge25519_cmov_cached(ge25519_cached *t, const ge25519_cached *u, unsigned char b)
545{
546    fe25519_cmov(t->YplusX, u->YplusX, b);
547    fe25519_cmov(t->YminusX, u->YminusX, b);
548    fe25519_cmov(t->Z, u->Z, b);
549    fe25519_cmov(t->T2d, u->T2d, b);
550}
551
552static void
553ge25519_select(ge25519_precomp *t, const ge25519_precomp precomp[8], const signed char b)
554{
555    ge25519_precomp     minust;
556    const unsigned char bnegative = negative(b);
557    const unsigned char babs      = b - (((-bnegative) & b) * ((signed char) 1 << 1));
558
559    ge25519_precomp_0(t);
560    ge25519_cmov(t, &precomp[0], equal(babs, 1));
561    ge25519_cmov(t, &precomp[1], equal(babs, 2));
562    ge25519_cmov(t, &precomp[2], equal(babs, 3));
563    ge25519_cmov(t, &precomp[3], equal(babs, 4));
564    ge25519_cmov(t, &precomp[4], equal(babs, 5));
565    ge25519_cmov(t, &precomp[5], equal(babs, 6));
566    ge25519_cmov(t, &precomp[6], equal(babs, 7));
567    ge25519_cmov(t, &precomp[7], equal(babs, 8));
568    fe25519_copy(minust.yplusx, t->yminusx);
569    fe25519_copy(minust.yminusx, t->yplusx);
570    fe25519_neg(minust.xy2d, t->xy2d);
571    ge25519_cmov(t, &minust, bnegative);
572}
573
574static void
575ge25519_select_base(ge25519_precomp *t, const int pos, const signed char b)
576{
577    static const ge25519_precomp base[32][8] = { /* base[i][j] = (j+1)*256^i*B */
578#ifdef HAVE_TI_MODE
579# include "fe_51/base.h"
580#else
581# include "fe_25_5/base.h"
582#endif
583    };
584    ge25519_select(t, base[pos], b);
585}
586
587static void
588ge25519_select_cached(ge25519_cached *t, const ge25519_cached cached[8], const signed char b)
589{
590    ge25519_cached      minust;
591    const unsigned char bnegative = negative(b);
592    const unsigned char babs      = b - (((-bnegative) & b) * ((signed char) 1 << 1));
593
594    ge25519_cached_0(t);
595    ge25519_cmov_cached(t, &cached[0], equal(babs, 1));
596    ge25519_cmov_cached(t, &cached[1], equal(babs, 2));
597    ge25519_cmov_cached(t, &cached[2], equal(babs, 3));
598    ge25519_cmov_cached(t, &cached[3], equal(babs, 4));
599    ge25519_cmov_cached(t, &cached[4], equal(babs, 5));
600    ge25519_cmov_cached(t, &cached[5], equal(babs, 6));
601    ge25519_cmov_cached(t, &cached[6], equal(babs, 7));
602    ge25519_cmov_cached(t, &cached[7], equal(babs, 8));
603    fe25519_copy(minust.YplusX, t->YminusX);
604    fe25519_copy(minust.YminusX, t->YplusX);
605    fe25519_copy(minust.Z, t->Z);
606    fe25519_neg(minust.T2d, t->T2d);
607    ge25519_cmov_cached(t, &minust, bnegative);
608}
609
610/*
611 r = p - q
612 */
613
614void
615ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
616{
617    fe25519 t0;
618
619    fe25519_add(r->X, p->Y, p->X);
620    fe25519_sub(r->Y, p->Y, p->X);
621    fe25519_mul(r->Z, r->X, q->YminusX);
622    fe25519_mul(r->Y, r->Y, q->YplusX);
623    fe25519_mul(r->T, q->T2d, p->T);
624    fe25519_mul(r->X, p->Z, q->Z);
625    fe25519_add(t0, r->X, r->X);
626    fe25519_sub(r->X, r->Z, r->Y);
627    fe25519_add(r->Y, r->Z, r->Y);
628    fe25519_sub(r->Z, t0, r->T);
629    fe25519_add(r->T, t0, r->T);
630}
631
632void
633ge25519_tobytes(unsigned char *s, const ge25519_p2 *h)
634{
635    fe25519 recip;
636    fe25519 x;
637    fe25519 y;
638
639    fe25519_invert(recip, h->Z);
640    fe25519_mul(x, h->X, recip);
641    fe25519_mul(y, h->Y, recip);
642    fe25519_tobytes(s, y);
643    s[31] ^= fe25519_isnegative(x) << 7;
644}
645
646/*
647 r = a * A + b * B
648 where a = a[0]+256*a[1]+...+256^31 a[31].
649 and b = b[0]+256*b[1]+...+256^31 b[31].
650 B is the Ed25519 base point (x,4/5) with x positive.
651
652 Only used for signatures verification.
653 */
654
655void
656ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
657                                  const ge25519_p3 *A, const unsigned char *b)
658{
659    static const ge25519_precomp Bi[8] = {
660#ifdef HAVE_TI_MODE
661# include "fe_51/base2.h"
662#else
663# include "fe_25_5/base2.h"
664#endif
665    };
666    signed char    aslide[256];
667    signed char    bslide[256];
668    ge25519_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
669    ge25519_p1p1   t;
670    ge25519_p3     u;
671    ge25519_p3     A2;
672    int            i;
673
674    slide_vartime(aslide, a);
675    slide_vartime(bslide, b);
676
677    ge25519_p3_to_cached(&Ai[0], A);
678
679    ge25519_p3_dbl(&t, A);
680    ge25519_p1p1_to_p3(&A2, &t);
681
682    ge25519_add(&t, &A2, &Ai[0]);
683    ge25519_p1p1_to_p3(&u, &t);
684    ge25519_p3_to_cached(&Ai[1], &u);
685
686    ge25519_add(&t, &A2, &Ai[1]);
687    ge25519_p1p1_to_p3(&u, &t);
688    ge25519_p3_to_cached(&Ai[2], &u);
689
690    ge25519_add(&t, &A2, &Ai[2]);
691    ge25519_p1p1_to_p3(&u, &t);
692    ge25519_p3_to_cached(&Ai[3], &u);
693
694    ge25519_add(&t, &A2, &Ai[3]);
695    ge25519_p1p1_to_p3(&u, &t);
696    ge25519_p3_to_cached(&Ai[4], &u);
697
698    ge25519_add(&t, &A2, &Ai[4]);
699    ge25519_p1p1_to_p3(&u, &t);
700    ge25519_p3_to_cached(&Ai[5], &u);
701
702    ge25519_add(&t, &A2, &Ai[5]);
703    ge25519_p1p1_to_p3(&u, &t);
704    ge25519_p3_to_cached(&Ai[6], &u);
705
706    ge25519_add(&t, &A2, &Ai[6]);
707    ge25519_p1p1_to_p3(&u, &t);
708    ge25519_p3_to_cached(&Ai[7], &u);
709
710    ge25519_p2_0(r);
711
712    for (i = 255; i >= 0; --i) {
713        if (aslide[i] || bslide[i]) {
714            break;
715        }
716    }
717
718    for (; i >= 0; --i) {
719        ge25519_p2_dbl(&t, r);
720
721        if (aslide[i] > 0) {
722            ge25519_p1p1_to_p3(&u, &t);
723            ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
724        } else if (aslide[i] < 0) {
725            ge25519_p1p1_to_p3(&u, &t);
726            ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
727        }
728
729        if (bslide[i] > 0) {
730            ge25519_p1p1_to_p3(&u, &t);
731            ge25519_madd(&t, &u, &Bi[bslide[i] / 2]);
732        } else if (bslide[i] < 0) {
733            ge25519_p1p1_to_p3(&u, &t);
734            ge25519_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
735        }
736
737        ge25519_p1p1_to_p2(r, &t);
738    }
739}
740
741/*
742 h = a * p
743 where a = a[0]+256*a[1]+...+256^31 a[31]
744
745 Preconditions:
746 a[31] <= 127
747
748 p is public
749 */
750
751void
752ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p)
753{
754    signed char     e[64];
755    signed char     carry;
756    ge25519_p1p1    r;
757    ge25519_p2      s;
758    ge25519_p1p1    t2, t3, t4, t5, t6, t7, t8;
759    ge25519_p3      p2, p3, p4, p5, p6, p7, p8;
760    ge25519_cached  pi[8];
761    ge25519_cached  t;
762    int             i;
763
764    ge25519_p3_to_cached(&pi[1 - 1], p);   /* p */
765
766    ge25519_p3_dbl(&t2, p);
767    ge25519_p1p1_to_p3(&p2, &t2);
768    ge25519_p3_to_cached(&pi[2 - 1], &p2); /* 2p = 2*p */
769
770    ge25519_add(&t3, p, &pi[2 - 1]);
771    ge25519_p1p1_to_p3(&p3, &t3);
772    ge25519_p3_to_cached(&pi[3 - 1], &p3); /* 3p = 2p+p */
773
774    ge25519_p3_dbl(&t4, &p2);
775    ge25519_p1p1_to_p3(&p4, &t4);
776    ge25519_p3_to_cached(&pi[4 - 1], &p4); /* 4p = 2*2p */
777
778    ge25519_add(&t5, p, &pi[4 - 1]);
779    ge25519_p1p1_to_p3(&p5, &t5);
780    ge25519_p3_to_cached(&pi[5 - 1], &p5); /* 5p = 4p+p */
781
782    ge25519_p3_dbl(&t6, &p3);
783    ge25519_p1p1_to_p3(&p6, &t6);
784    ge25519_p3_to_cached(&pi[6 - 1], &p6); /* 6p = 2*3p */
785
786    ge25519_add(&t7, p, &pi[6 - 1]);
787    ge25519_p1p1_to_p3(&p7, &t7);
788    ge25519_p3_to_cached(&pi[7 - 1], &p7); /* 7p = 6p+p */
789
790    ge25519_p3_dbl(&t8, &p4);
791    ge25519_p1p1_to_p3(&p8, &t8);
792    ge25519_p3_to_cached(&pi[8 - 1], &p8); /* 8p = 2*4p */
793
794    for (i = 0; i < 32; ++i) {
795        e[2 * i + 0] = (a[i] >> 0) & 15;
796        e[2 * i + 1] = (a[i] >> 4) & 15;
797    }
798    /* each e[i] is between 0 and 15 */
799    /* e[63] is between 0 and 7 */
800
801    carry = 0;
802    for (i = 0; i < 63; ++i) {
803        e[i] += carry;
804        carry = e[i] + 8;
805        carry >>= 4;
806        e[i] -= carry * ((signed char) 1 << 4);
807    }
808    e[63] += carry;
809    /* each e[i] is between -8 and 8 */
810
811    ge25519_p3_0(h);
812
813    for (i = 63; i != 0; i--) {
814        ge25519_select_cached(&t, pi, e[i]);
815        ge25519_add(&r, h, &t);
816
817        ge25519_p1p1_to_p2(&s, &r);
818        ge25519_p2_dbl(&r, &s);
819        ge25519_p1p1_to_p2(&s, &r);
820        ge25519_p2_dbl(&r, &s);
821        ge25519_p1p1_to_p2(&s, &r);
822        ge25519_p2_dbl(&r, &s);
823        ge25519_p1p1_to_p2(&s, &r);
824        ge25519_p2_dbl(&r, &s);
825
826        ge25519_p1p1_to_p3(h, &r);  /* *16 */
827    }
828    ge25519_select_cached(&t, pi, e[i]);
829    ge25519_add(&r, h, &t);
830
831    ge25519_p1p1_to_p3(h, &r);
832}
833
834/*
835 h = a * B (with precomputation)
836 where a = a[0]+256*a[1]+...+256^31 a[31]
837 B is the Ed25519 base point (x,4/5) with x positive
838 (as bytes: 0x5866666666666666666666666666666666666666666666666666666666666666)
839
840 Preconditions:
841 a[31] <= 127
842 */
843
844void
845ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a)
846{
847    signed char     e[64];
848    signed char     carry;
849    ge25519_p1p1    r;
850    ge25519_p2      s;
851    ge25519_precomp t;
852    int             i;
853
854    for (i = 0; i < 32; ++i) {
855        e[2 * i + 0] = (a[i] >> 0) & 15;
856        e[2 * i + 1] = (a[i] >> 4) & 15;
857    }
858    /* each e[i] is between 0 and 15 */
859    /* e[63] is between 0 and 7 */
860
861    carry = 0;
862    for (i = 0; i < 63; ++i) {
863        e[i] += carry;
864        carry = e[i] + 8;
865        carry >>= 4;
866        e[i] -= carry * ((signed char) 1 << 4);
867    }
868    e[63] += carry;
869    /* each e[i] is between -8 and 8 */
870
871    ge25519_p3_0(h);
872
873    for (i = 1; i < 64; i += 2) {
874        ge25519_select_base(&t, i / 2, e[i]);
875        ge25519_madd(&r, h, &t);
876        ge25519_p1p1_to_p3(h, &r);
877    }
878
879    ge25519_p3_dbl(&r, h);
880    ge25519_p1p1_to_p2(&s, &r);
881    ge25519_p2_dbl(&r, &s);
882    ge25519_p1p1_to_p2(&s, &r);
883    ge25519_p2_dbl(&r, &s);
884    ge25519_p1p1_to_p2(&s, &r);
885    ge25519_p2_dbl(&r, &s);
886    ge25519_p1p1_to_p3(h, &r);
887
888    for (i = 0; i < 64; i += 2) {
889        ge25519_select_base(&t, i / 2, e[i]);
890        ge25519_madd(&r, h, &t);
891        ge25519_p1p1_to_p3(h, &r);
892    }
893}
894
895/* multiply by the order of the main subgroup l = 2^252+27742317777372353535851937790883648493 */
896static void
897ge25519_mul_l(ge25519_p3 *r, const ge25519_p3 *A)
898{
899    static const signed char aslide[253] = {
900        13, 0, 0, 0, 0, -1, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, -13, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 3, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
901    };
902    ge25519_cached Ai[8];
903    ge25519_p1p1   t;
904    ge25519_p3     u;
905    ge25519_p3     A2;
906    int            i;
907
908    ge25519_p3_to_cached(&Ai[0], A);
909    ge25519_p3_dbl(&t, A);
910    ge25519_p1p1_to_p3(&A2, &t);
911    ge25519_add(&t, &A2, &Ai[0]);
912    ge25519_p1p1_to_p3(&u, &t);
913    ge25519_p3_to_cached(&Ai[1], &u);
914    ge25519_add(&t, &A2, &Ai[1]);
915    ge25519_p1p1_to_p3(&u, &t);
916    ge25519_p3_to_cached(&Ai[2], &u);
917    ge25519_add(&t, &A2, &Ai[2]);
918    ge25519_p1p1_to_p3(&u, &t);
919    ge25519_p3_to_cached(&Ai[3], &u);
920    ge25519_add(&t, &A2, &Ai[3]);
921    ge25519_p1p1_to_p3(&u, &t);
922    ge25519_p3_to_cached(&Ai[4], &u);
923    ge25519_add(&t, &A2, &Ai[4]);
924    ge25519_p1p1_to_p3(&u, &t);
925    ge25519_p3_to_cached(&Ai[5], &u);
926    ge25519_add(&t, &A2, &Ai[5]);
927    ge25519_p1p1_to_p3(&u, &t);
928    ge25519_p3_to_cached(&Ai[6], &u);
929    ge25519_add(&t, &A2, &Ai[6]);
930    ge25519_p1p1_to_p3(&u, &t);
931    ge25519_p3_to_cached(&Ai[7], &u);
932
933    ge25519_p3_0(r);
934
935    for (i = 252; i >= 0; --i) {
936        ge25519_p3_dbl(&t, r);
937
938        if (aslide[i] > 0) {
939            ge25519_p1p1_to_p3(&u, &t);
940            ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
941        } else if (aslide[i] < 0) {
942            ge25519_p1p1_to_p3(&u, &t);
943            ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
944        }
945
946        ge25519_p1p1_to_p3(r, &t);
947    }
948}
949
950int
951ge25519_is_on_curve(const ge25519_p3 *p)
952{
953    fe25519 x2;
954    fe25519 y2;
955    fe25519 z2;
956    fe25519 z4;
957    fe25519 t0;
958    fe25519 t1;
959
960    fe25519_sq(x2, p->X);
961    fe25519_sq(y2, p->Y);
962    fe25519_sq(z2, p->Z);
963    fe25519_sub(t0, y2, x2);
964    fe25519_mul(t0, t0, z2);
965
966    fe25519_mul(t1, x2, y2);
967    fe25519_mul(t1, t1, d);
968    fe25519_sq(z4, z2);
969    fe25519_add(t1, t1, z4);
970    fe25519_sub(t0, t0, t1);
971
972    return fe25519_iszero(t0);
973}
974
975int
976ge25519_is_on_main_subgroup(const ge25519_p3 *p)
977{
978    ge25519_p3 pl;
979
980    ge25519_mul_l(&pl, p);
981
982    return fe25519_iszero(pl.X);
983}
984
985int
986ge25519_is_canonical(const unsigned char *s)
987{
988    unsigned char c;
989    unsigned char d;
990    unsigned int  i;
991
992    c = (s[31] & 0x7f) ^ 0x7f;
993    for (i = 30; i > 0; i--) {
994        c |= s[i] ^ 0xff;
995    }
996    c = (((unsigned int) c) - 1U) >> 8;
997    d = (0xed - 1U - (unsigned int) s[0]) >> 8;
998
999    return 1 - (c & d & 1);
1000}
1001
1002int
1003ge25519_has_small_order(const unsigned char s[32])
1004{
1005    CRYPTO_ALIGN(16)
1006    static const unsigned char blacklist[][32] = {
1007        /* 0 (order 4) */
1008        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1009          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1010          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1011        /* 1 (order 1) */
1012        { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1013          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1014          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1015        /* 2707385501144840649318225287225658788936804267575313519463743609750303402022
1016           (order 8) */
1017        { 0x26, 0xe8, 0x95, 0x8f, 0xc2, 0xb2, 0x27, 0xb0, 0x45, 0xc3, 0xf4,
1018          0x89, 0xf2, 0xef, 0x98, 0xf0, 0xd5, 0xdf, 0xac, 0x05, 0xd3, 0xc6,
1019          0x33, 0x39, 0xb1, 0x38, 0x02, 0x88, 0x6d, 0x53, 0xfc, 0x05 },
1020        /* 55188659117513257062467267217118295137698188065244968500265048394206261417927
1021           (order 8) */
1022        { 0xc7, 0x17, 0x6a, 0x70, 0x3d, 0x4d, 0xd8, 0x4f, 0xba, 0x3c, 0x0b,
1023          0x76, 0x0d, 0x10, 0x67, 0x0f, 0x2a, 0x20, 0x53, 0xfa, 0x2c, 0x39,
1024          0xcc, 0xc6, 0x4e, 0xc7, 0xfd, 0x77, 0x92, 0xac, 0x03, 0x7a },
1025        /* p-1 (order 2) */
1026        { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1027          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1028          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1029        /* p (=0, order 4) */
1030        { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1031          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1032          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1033        /* p+1 (=1, order 1) */
1034        { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1035          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1036          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }
1037    };
1038    unsigned char c[7] = { 0 };
1039    unsigned int  k;
1040    size_t        i, j;
1041
1042    COMPILER_ASSERT(7 == sizeof blacklist / sizeof blacklist[0]);
1043    for (j = 0; j < 31; j++) {
1044        for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1045            c[i] |= s[j] ^ blacklist[i][j];
1046        }
1047    }
1048    for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1049        c[i] |= (s[j] & 0x7f) ^ blacklist[i][j];
1050    }
1051    k = 0;
1052    for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1053        k |= (c[i] - 1);
1054    }
1055    return (int) ((k >> 8) & 1);
1056}
1057
1058/*
1059 Input:
1060 a[0]+256*a[1]+...+256^31*a[31] = a
1061 b[0]+256*b[1]+...+256^31*b[31] = b
1062 c[0]+256*c[1]+...+256^31*c[31] = c
1063 *
1064 Output:
1065 s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1066 where l = 2^252 + 27742317777372353535851937790883648493.
1067 */
1068
1069void
1070sc25519_muladd(unsigned char *s, const unsigned char *a,
1071               const unsigned char *b, const unsigned char *c)
1072{
1073    int64_t a0  = 2097151 & load_3(a);
1074    int64_t a1  = 2097151 & (load_4(a + 2) >> 5);
1075    int64_t a2  = 2097151 & (load_3(a + 5) >> 2);
1076    int64_t a3  = 2097151 & (load_4(a + 7) >> 7);
1077    int64_t a4  = 2097151 & (load_4(a + 10) >> 4);
1078    int64_t a5  = 2097151 & (load_3(a + 13) >> 1);
1079    int64_t a6  = 2097151 & (load_4(a + 15) >> 6);
1080    int64_t a7  = 2097151 & (load_3(a + 18) >> 3);
1081    int64_t a8  = 2097151 & load_3(a + 21);
1082    int64_t a9  = 2097151 & (load_4(a + 23) >> 5);
1083    int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1084    int64_t a11 = (load_4(a + 28) >> 7);
1085
1086    int64_t b0  = 2097151 & load_3(b);
1087    int64_t b1  = 2097151 & (load_4(b + 2) >> 5);
1088    int64_t b2  = 2097151 & (load_3(b + 5) >> 2);
1089    int64_t b3  = 2097151 & (load_4(b + 7) >> 7);
1090    int64_t b4  = 2097151 & (load_4(b + 10) >> 4);
1091    int64_t b5  = 2097151 & (load_3(b + 13) >> 1);
1092    int64_t b6  = 2097151 & (load_4(b + 15) >> 6);
1093    int64_t b7  = 2097151 & (load_3(b + 18) >> 3);
1094    int64_t b8  = 2097151 & load_3(b + 21);
1095    int64_t b9  = 2097151 & (load_4(b + 23) >> 5);
1096    int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1097    int64_t b11 = (load_4(b + 28) >> 7);
1098
1099    int64_t c0  = 2097151 & load_3(c);
1100    int64_t c1  = 2097151 & (load_4(c + 2) >> 5);
1101    int64_t c2  = 2097151 & (load_3(c + 5) >> 2);
1102    int64_t c3  = 2097151 & (load_4(c + 7) >> 7);
1103    int64_t c4  = 2097151 & (load_4(c + 10) >> 4);
1104    int64_t c5  = 2097151 & (load_3(c + 13) >> 1);
1105    int64_t c6  = 2097151 & (load_4(c + 15) >> 6);
1106    int64_t c7  = 2097151 & (load_3(c + 18) >> 3);
1107    int64_t c8  = 2097151 & load_3(c + 21);
1108    int64_t c9  = 2097151 & (load_4(c + 23) >> 5);
1109    int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1110    int64_t c11 = (load_4(c + 28) >> 7);
1111
1112    int64_t s0;
1113    int64_t s1;
1114    int64_t s2;
1115    int64_t s3;
1116    int64_t s4;
1117    int64_t s5;
1118    int64_t s6;
1119    int64_t s7;
1120    int64_t s8;
1121    int64_t s9;
1122    int64_t s10;
1123    int64_t s11;
1124    int64_t s12;
1125    int64_t s13;
1126    int64_t s14;
1127    int64_t s15;
1128    int64_t s16;
1129    int64_t s17;
1130    int64_t s18;
1131    int64_t s19;
1132    int64_t s20;
1133    int64_t s21;
1134    int64_t s22;
1135    int64_t s23;
1136
1137    int64_t carry0;
1138    int64_t carry1;
1139    int64_t carry2;
1140    int64_t carry3;
1141    int64_t carry4;
1142    int64_t carry5;
1143    int64_t carry6;
1144    int64_t carry7;
1145    int64_t carry8;
1146    int64_t carry9;
1147    int64_t carry10;
1148    int64_t carry11;
1149    int64_t carry12;
1150    int64_t carry13;
1151    int64_t carry14;
1152    int64_t carry15;
1153    int64_t carry16;
1154    int64_t carry17;
1155    int64_t carry18;
1156    int64_t carry19;
1157    int64_t carry20;
1158    int64_t carry21;
1159    int64_t carry22;
1160
1161    s0 = c0 + a0 * b0;
1162    s1 = c1 + a0 * b1 + a1 * b0;
1163    s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
1164    s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
1165    s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
1166    s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
1167    s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
1168         a6 * b0;
1169    s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
1170         a6 * b1 + a7 * b0;
1171    s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
1172         a6 * b2 + a7 * b1 + a8 * b0;
1173    s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
1174         a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
1175    s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
1176          a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
1177    s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
1178          a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
1179    s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
1180          a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
1181    s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
1182          a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
1183    s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
1184          a9 * b5 + a10 * b4 + a11 * b3;
1185    s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
1186          a10 * b5 + a11 * b4;
1187    s16 =
1188        a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
1189    s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
1190    s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
1191    s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
1192    s20 = a9 * b11 + a10 * b10 + a11 * b9;
1193    s21 = a10 * b11 + a11 * b10;
1194    s22 = a11 * b11;
1195    s23 = 0;
1196
1197    carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1198    s1 += carry0;
1199    s0 -= carry0 * ((uint64_t) 1L << 21);
1200    carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1201    s3 += carry2;
1202    s2 -= carry2 * ((uint64_t) 1L << 21);
1203    carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1204    s5 += carry4;
1205    s4 -= carry4 * ((uint64_t) 1L << 21);
1206    carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1207    s7 += carry6;
1208    s6 -= carry6 * ((uint64_t) 1L << 21);
1209    carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1210    s9 += carry8;
1211    s8 -= carry8 * ((uint64_t) 1L << 21);
1212    carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1213    s11 += carry10;
1214    s10 -= carry10 * ((uint64_t) 1L << 21);
1215    carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1216    s13 += carry12;
1217    s12 -= carry12 * ((uint64_t) 1L << 21);
1218    carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1219    s15 += carry14;
1220    s14 -= carry14 * ((uint64_t) 1L << 21);
1221    carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1222    s17 += carry16;
1223    s16 -= carry16 * ((uint64_t) 1L << 21);
1224    carry18 = (s18 + (int64_t) (1L << 20)) >> 21;
1225    s19 += carry18;
1226    s18 -= carry18 * ((uint64_t) 1L << 21);
1227    carry20 = (s20 + (int64_t) (1L << 20)) >> 21;
1228    s21 += carry20;
1229    s20 -= carry20 * ((uint64_t) 1L << 21);
1230    carry22 = (s22 + (int64_t) (1L << 20)) >> 21;
1231    s23 += carry22;
1232    s22 -= carry22 * ((uint64_t) 1L << 21);
1233
1234    carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1235    s2 += carry1;
1236    s1 -= carry1 * ((uint64_t) 1L << 21);
1237    carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1238    s4 += carry3;
1239    s3 -= carry3 * ((uint64_t) 1L << 21);
1240    carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1241    s6 += carry5;
1242    s5 -= carry5 * ((uint64_t) 1L << 21);
1243    carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1244    s8 += carry7;
1245    s7 -= carry7 * ((uint64_t) 1L << 21);
1246    carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1247    s10 += carry9;
1248    s9 -= carry9 * ((uint64_t) 1L << 21);
1249    carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1250    s12 += carry11;
1251    s11 -= carry11 * ((uint64_t) 1L << 21);
1252    carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1253    s14 += carry13;
1254    s13 -= carry13 * ((uint64_t) 1L << 21);
1255    carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1256    s16 += carry15;
1257    s15 -= carry15 * ((uint64_t) 1L << 21);
1258    carry17 = (s17 + (int64_t) (1L << 20)) >> 21;
1259    s18 += carry17;
1260    s17 -= carry17 * ((uint64_t) 1L << 21);
1261    carry19 = (s19 + (int64_t) (1L << 20)) >> 21;
1262    s20 += carry19;
1263    s19 -= carry19 * ((uint64_t) 1L << 21);
1264    carry21 = (s21 + (int64_t) (1L << 20)) >> 21;
1265    s22 += carry21;
1266    s21 -= carry21 * ((uint64_t) 1L << 21);
1267
1268    s11 += s23 * 666643;
1269    s12 += s23 * 470296;
1270    s13 += s23 * 654183;
1271    s14 -= s23 * 997805;
1272    s15 += s23 * 136657;
1273    s16 -= s23 * 683901;
1274
1275    s10 += s22 * 666643;
1276    s11 += s22 * 470296;
1277    s12 += s22 * 654183;
1278    s13 -= s22 * 997805;
1279    s14 += s22 * 136657;
1280    s15 -= s22 * 683901;
1281
1282    s9 += s21 * 666643;
1283    s10 += s21 * 470296;
1284    s11 += s21 * 654183;
1285    s12 -= s21 * 997805;
1286    s13 += s21 * 136657;
1287    s14 -= s21 * 683901;
1288
1289    s8 += s20 * 666643;
1290    s9 += s20 * 470296;
1291    s10 += s20 * 654183;
1292    s11 -= s20 * 997805;
1293    s12 += s20 * 136657;
1294    s13 -= s20 * 683901;
1295
1296    s7 += s19 * 666643;
1297    s8 += s19 * 470296;
1298    s9 += s19 * 654183;
1299    s10 -= s19 * 997805;
1300    s11 += s19 * 136657;
1301    s12 -= s19 * 683901;
1302
1303    s6 += s18 * 666643;
1304    s7 += s18 * 470296;
1305    s8 += s18 * 654183;
1306    s9 -= s18 * 997805;
1307    s10 += s18 * 136657;
1308    s11 -= s18 * 683901;
1309
1310    carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1311    s7 += carry6;
1312    s6 -= carry6 * ((uint64_t) 1L << 21);
1313    carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1314    s9 += carry8;
1315    s8 -= carry8 * ((uint64_t) 1L << 21);
1316    carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1317    s11 += carry10;
1318    s10 -= carry10 * ((uint64_t) 1L << 21);
1319    carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1320    s13 += carry12;
1321    s12 -= carry12 * ((uint64_t) 1L << 21);
1322    carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1323    s15 += carry14;
1324    s14 -= carry14 * ((uint64_t) 1L << 21);
1325    carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1326    s17 += carry16;
1327    s16 -= carry16 * ((uint64_t) 1L << 21);
1328
1329    carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1330    s8 += carry7;
1331    s7 -= carry7 * ((uint64_t) 1L << 21);
1332    carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1333    s10 += carry9;
1334    s9 -= carry9 * ((uint64_t) 1L << 21);
1335    carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1336    s12 += carry11;
1337    s11 -= carry11 * ((uint64_t) 1L << 21);
1338    carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1339    s14 += carry13;
1340    s13 -= carry13 * ((uint64_t) 1L << 21);
1341    carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1342    s16 += carry15;
1343    s15 -= carry15 * ((uint64_t) 1L << 21);
1344
1345    s5 += s17 * 666643;
1346    s6 += s17 * 470296;
1347    s7 += s17 * 654183;
1348    s8 -= s17 * 997805;
1349    s9 += s17 * 136657;
1350    s10 -= s17 * 683901;
1351
1352    s4 += s16 * 666643;
1353    s5 += s16 * 470296;
1354    s6 += s16 * 654183;
1355    s7 -= s16 * 997805;
1356    s8 += s16 * 136657;
1357    s9 -= s16 * 683901;
1358
1359    s3 += s15 * 666643;
1360    s4 += s15 * 470296;
1361    s5 += s15 * 654183;
1362    s6 -= s15 * 997805;
1363    s7 += s15 * 136657;
1364    s8 -= s15 * 683901;
1365
1366    s2 += s14 * 666643;
1367    s3 += s14 * 470296;
1368    s4 += s14 * 654183;
1369    s5 -= s14 * 997805;
1370    s6 += s14 * 136657;
1371    s7 -= s14 * 683901;
1372
1373    s1 += s13 * 666643;
1374    s2 += s13 * 470296;
1375    s3 += s13 * 654183;
1376    s4 -= s13 * 997805;
1377    s5 += s13 * 136657;
1378    s6 -= s13 * 683901;
1379
1380    s0 += s12 * 666643;
1381    s1 += s12 * 470296;
1382    s2 += s12 * 654183;
1383    s3 -= s12 * 997805;
1384    s4 += s12 * 136657;
1385    s5 -= s12 * 683901;
1386    s12 = 0;
1387
1388    carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1389    s1 += carry0;
1390    s0 -= carry0 * ((uint64_t) 1L << 21);
1391    carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1392    s3 += carry2;
1393    s2 -= carry2 * ((uint64_t) 1L << 21);
1394    carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1395    s5 += carry4;
1396    s4 -= carry4 * ((uint64_t) 1L << 21);
1397    carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1398    s7 += carry6;
1399    s6 -= carry6 * ((uint64_t) 1L << 21);
1400    carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1401    s9 += carry8;
1402    s8 -= carry8 * ((uint64_t) 1L << 21);
1403    carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1404    s11 += carry10;
1405    s10 -= carry10 * ((uint64_t) 1L << 21);
1406
1407    carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1408    s2 += carry1;
1409    s1 -= carry1 * ((uint64_t) 1L << 21);
1410    carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1411    s4 += carry3;
1412    s3 -= carry3 * ((uint64_t) 1L << 21);
1413    carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1414    s6 += carry5;
1415    s5 -= carry5 * ((uint64_t) 1L << 21);
1416    carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1417    s8 += carry7;
1418    s7 -= carry7 * ((uint64_t) 1L << 21);
1419    carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1420    s10 += carry9;
1421    s9 -= carry9 * ((uint64_t) 1L << 21);
1422    carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1423    s12 += carry11;
1424    s11 -= carry11 * ((uint64_t) 1L << 21);
1425
1426    s0 += s12 * 666643;
1427    s1 += s12 * 470296;
1428    s2 += s12 * 654183;
1429    s3 -= s12 * 997805;
1430    s4 += s12 * 136657;
1431    s5 -= s12 * 683901;
1432    s12 = 0;
1433
1434    carry0 = s0 >> 21;
1435    s1 += carry0;
1436    s0 -= carry0 * ((uint64_t) 1L << 21);
1437    carry1 = s1 >> 21;
1438    s2 += carry1;
1439    s1 -= carry1 * ((uint64_t) 1L << 21);
1440    carry2 = s2 >> 21;
1441    s3 += carry2;
1442    s2 -= carry2 * ((uint64_t) 1L << 21);
1443    carry3 = s3 >> 21;
1444    s4 += carry3;
1445    s3 -= carry3 * ((uint64_t) 1L << 21);
1446    carry4 = s4 >> 21;
1447    s5 += carry4;
1448    s4 -= carry4 * ((uint64_t) 1L << 21);
1449    carry5 = s5 >> 21;
1450    s6 += carry5;
1451    s5 -= carry5 * ((uint64_t) 1L << 21);
1452    carry6 = s6 >> 21;
1453    s7 += carry6;
1454    s6 -= carry6 * ((uint64_t) 1L << 21);
1455    carry7 = s7 >> 21;
1456    s8 += carry7;
1457    s7 -= carry7 * ((uint64_t) 1L << 21);
1458    carry8 = s8 >> 21;
1459    s9 += carry8;
1460    s8 -= carry8 * ((uint64_t) 1L << 21);
1461    carry9 = s9 >> 21;
1462    s10 += carry9;
1463    s9 -= carry9 * ((uint64_t) 1L << 21);
1464    carry10 = s10 >> 21;
1465    s11 += carry10;
1466    s10 -= carry10 * ((uint64_t) 1L << 21);
1467    carry11 = s11 >> 21;
1468    s12 += carry11;
1469    s11 -= carry11 * ((uint64_t) 1L << 21);
1470
1471    s0 += s12 * 666643;
1472    s1 += s12 * 470296;
1473    s2 += s12 * 654183;
1474    s3 -= s12 * 997805;
1475    s4 += s12 * 136657;
1476    s5 -= s12 * 683901;
1477
1478    carry0 = s0 >> 21;
1479    s1 += carry0;
1480    s0 -= carry0 * ((uint64_t) 1L << 21);
1481    carry1 = s1 >> 21;
1482    s2 += carry1;
1483    s1 -= carry1 * ((uint64_t) 1L << 21);
1484    carry2 = s2 >> 21;
1485    s3 += carry2;
1486    s2 -= carry2 * ((uint64_t) 1L << 21);
1487    carry3 = s3 >> 21;
1488    s4 += carry3;
1489    s3 -= carry3 * ((uint64_t) 1L << 21);
1490    carry4 = s4 >> 21;
1491    s5 += carry4;
1492    s4 -= carry4 * ((uint64_t) 1L << 21);
1493    carry5 = s5 >> 21;
1494    s6 += carry5;
1495    s5 -= carry5 * ((uint64_t) 1L << 21);
1496    carry6 = s6 >> 21;
1497    s7 += carry6;
1498    s6 -= carry6 * ((uint64_t) 1L << 21);
1499    carry7 = s7 >> 21;
1500    s8 += carry7;
1501    s7 -= carry7 * ((uint64_t) 1L << 21);
1502    carry8 = s8 >> 21;
1503    s9 += carry8;
1504    s8 -= carry8 * ((uint64_t) 1L << 21);
1505    carry9 = s9 >> 21;
1506    s10 += carry9;
1507    s9 -= carry9 * ((uint64_t) 1L << 21);
1508    carry10 = s10 >> 21;
1509    s11 += carry10;
1510    s10 -= carry10 * ((uint64_t) 1L << 21);
1511
1512    s[0]  = s0 >> 0;
1513    s[1]  = s0 >> 8;
1514    s[2]  = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1515    s[3]  = s1 >> 3;
1516    s[4]  = s1 >> 11;
1517    s[5]  = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1518    s[6]  = s2 >> 6;
1519    s[7]  = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1520    s[8]  = s3 >> 1;
1521    s[9]  = s3 >> 9;
1522    s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1523    s[11] = s4 >> 4;
1524    s[12] = s4 >> 12;
1525    s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1526    s[14] = s5 >> 7;
1527    s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1528    s[16] = s6 >> 2;
1529    s[17] = s6 >> 10;
1530    s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1531    s[19] = s7 >> 5;
1532    s[20] = s7 >> 13;
1533    s[21] = s8 >> 0;
1534    s[22] = s8 >> 8;
1535    s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1536    s[24] = s9 >> 3;
1537    s[25] = s9 >> 11;
1538    s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1539    s[27] = s10 >> 6;
1540    s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1541    s[29] = s11 >> 1;
1542    s[30] = s11 >> 9;
1543    s[31] = s11 >> 17;
1544}
1545
1546/*
1547 Input:
1548 s[0]+256*s[1]+...+256^63*s[63] = s
1549 *
1550 Output:
1551 s[0]+256*s[1]+...+256^31*s[31] = s mod l
1552 where l = 2^252 + 27742317777372353535851937790883648493.
1553 Overwrites s in place.
1554 */
1555
1556void
1557sc25519_reduce(unsigned char *s)
1558{
1559    int64_t s0  = 2097151 & load_3(s);
1560    int64_t s1  = 2097151 & (load_4(s + 2) >> 5);
1561    int64_t s2  = 2097151 & (load_3(s + 5) >> 2);
1562    int64_t s3  = 2097151 & (load_4(s + 7) >> 7);
1563    int64_t s4  = 2097151 & (load_4(s + 10) >> 4);
1564    int64_t s5  = 2097151 & (load_3(s + 13) >> 1);
1565    int64_t s6  = 2097151 & (load_4(s + 15) >> 6);
1566    int64_t s7  = 2097151 & (load_3(s + 18) >> 3);
1567    int64_t s8  = 2097151 & load_3(s + 21);
1568    int64_t s9  = 2097151 & (load_4(s + 23) >> 5);
1569    int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1570    int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1571    int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1572    int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1573    int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1574    int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1575    int64_t s16 = 2097151 & load_3(s + 42);
1576    int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1577    int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1578    int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1579    int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1580    int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1581    int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1582    int64_t s23 = (load_4(s + 60) >> 3);
1583
1584    int64_t carry0;
1585    int64_t carry1;
1586    int64_t carry2;
1587    int64_t carry3;
1588    int64_t carry4;
1589    int64_t carry5;
1590    int64_t carry6;
1591    int64_t carry7;
1592    int64_t carry8;
1593    int64_t carry9;
1594    int64_t carry10;
1595    int64_t carry11;
1596    int64_t carry12;
1597    int64_t carry13;
1598    int64_t carry14;
1599    int64_t carry15;
1600    int64_t carry16;
1601
1602    s11 += s23 * 666643;
1603    s12 += s23 * 470296;
1604    s13 += s23 * 654183;
1605    s14 -= s23 * 997805;
1606    s15 += s23 * 136657;
1607    s16 -= s23 * 683901;
1608
1609    s10 += s22 * 666643;
1610    s11 += s22 * 470296;
1611    s12 += s22 * 654183;
1612    s13 -= s22 * 997805;
1613    s14 += s22 * 136657;
1614    s15 -= s22 * 683901;
1615
1616    s9 += s21 * 666643;
1617    s10 += s21 * 470296;
1618    s11 += s21 * 654183;
1619    s12 -= s21 * 997805;
1620    s13 += s21 * 136657;
1621    s14 -= s21 * 683901;
1622
1623    s8 += s20 * 666643;
1624    s9 += s20 * 470296;
1625    s10 += s20 * 654183;
1626    s11 -= s20 * 997805;
1627    s12 += s20 * 136657;
1628    s13 -= s20 * 683901;
1629
1630    s7 += s19 * 666643;
1631    s8 += s19 * 470296;
1632    s9 += s19 * 654183;
1633    s10 -= s19 * 997805;
1634    s11 += s19 * 136657;
1635    s12 -= s19 * 683901;
1636
1637    s6 += s18 * 666643;
1638    s7 += s18 * 470296;
1639    s8 += s18 * 654183;
1640    s9 -= s18 * 997805;
1641    s10 += s18 * 136657;
1642    s11 -= s18 * 683901;
1643
1644    carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1645    s7 += carry6;
1646    s6 -= carry6 * ((uint64_t) 1L << 21);
1647    carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1648    s9 += carry8;
1649    s8 -= carry8 * ((uint64_t) 1L << 21);
1650    carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1651    s11 += carry10;
1652    s10 -= carry10 * ((uint64_t) 1L << 21);
1653    carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1654    s13 += carry12;
1655    s12 -= carry12 * ((uint64_t) 1L << 21);
1656    carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1657    s15 += carry14;
1658    s14 -= carry14 * ((uint64_t) 1L << 21);
1659    carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1660    s17 += carry16;
1661    s16 -= carry16 * ((uint64_t) 1L << 21);
1662
1663    carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1664    s8 += carry7;
1665    s7 -= carry7 * ((uint64_t) 1L << 21);
1666    carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1667    s10 += carry9;
1668    s9 -= carry9 * ((uint64_t) 1L << 21);
1669    carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1670    s12 += carry11;
1671    s11 -= carry11 * ((uint64_t) 1L << 21);
1672    carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1673    s14 += carry13;
1674    s13 -= carry13 * ((uint64_t) 1L << 21);
1675    carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1676    s16 += carry15;
1677    s15 -= carry15 * ((uint64_t) 1L << 21);
1678
1679    s5 += s17 * 666643;
1680    s6 += s17 * 470296;
1681    s7 += s17 * 654183;
1682    s8 -= s17 * 997805;
1683    s9 += s17 * 136657;
1684    s10 -= s17 * 683901;
1685
1686    s4 += s16 * 666643;
1687    s5 += s16 * 470296;
1688    s6 += s16 * 654183;
1689    s7 -= s16 * 997805;
1690    s8 += s16 * 136657;
1691    s9 -= s16 * 683901;
1692
1693    s3 += s15 * 666643;
1694    s4 += s15 * 470296;
1695    s5 += s15 * 654183;
1696    s6 -= s15 * 997805;
1697    s7 += s15 * 136657;
1698    s8 -= s15 * 683901;
1699
1700    s2 += s14 * 666643;
1701    s3 += s14 * 470296;
1702    s4 += s14 * 654183;
1703    s5 -= s14 * 997805;
1704    s6 += s14 * 136657;
1705    s7 -= s14 * 683901;
1706
1707    s1 += s13 * 666643;
1708    s2 += s13 * 470296;
1709    s3 += s13 * 654183;
1710    s4 -= s13 * 997805;
1711    s5 += s13 * 136657;
1712    s6 -= s13 * 683901;
1713
1714    s0 += s12 * 666643;
1715    s1 += s12 * 470296;
1716    s2 += s12 * 654183;
1717    s3 -= s12 * 997805;
1718    s4 += s12 * 136657;
1719    s5 -= s12 * 683901;
1720    s12 = 0;
1721
1722    carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1723    s1 += carry0;
1724    s0 -= carry0 * ((uint64_t) 1L << 21);
1725    carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1726    s3 += carry2;
1727    s2 -= carry2 * ((uint64_t) 1L << 21);
1728    carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1729    s5 += carry4;
1730    s4 -= carry4 * ((uint64_t) 1L << 21);
1731    carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1732    s7 += carry6;
1733    s6 -= carry6 * ((uint64_t) 1L << 21);
1734    carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1735    s9 += carry8;
1736    s8 -= carry8 * ((uint64_t) 1L << 21);
1737    carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1738    s11 += carry10;
1739    s10 -= carry10 * ((uint64_t) 1L << 21);
1740
1741    carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1742    s2 += carry1;
1743    s1 -= carry1 * ((uint64_t) 1L << 21);
1744    carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1745    s4 += carry3;
1746    s3 -= carry3 * ((uint64_t) 1L << 21);
1747    carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1748    s6 += carry5;
1749    s5 -= carry5 * ((uint64_t) 1L << 21);
1750    carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1751    s8 += carry7;
1752    s7 -= carry7 * ((uint64_t) 1L << 21);
1753    carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1754    s10 += carry9;
1755    s9 -= carry9 * ((uint64_t) 1L << 21);
1756    carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1757    s12 += carry11;
1758    s11 -= carry11 * ((uint64_t) 1L << 21);
1759
1760    s0 += s12 * 666643;
1761    s1 += s12 * 470296;
1762    s2 += s12 * 654183;
1763    s3 -= s12 * 997805;
1764    s4 += s12 * 136657;
1765    s5 -= s12 * 683901;
1766    s12 = 0;
1767
1768    carry0 = s0 >> 21;
1769    s1 += carry0;
1770    s0 -= carry0 * ((uint64_t) 1L << 21);
1771    carry1 = s1 >> 21;
1772    s2 += carry1;
1773    s1 -= carry1 * ((uint64_t) 1L << 21);
1774    carry2 = s2 >> 21;
1775    s3 += carry2;
1776    s2 -= carry2 * ((uint64_t) 1L << 21);
1777    carry3 = s3 >> 21;
1778    s4 += carry3;
1779    s3 -= carry3 * ((uint64_t) 1L << 21);
1780    carry4 = s4 >> 21;
1781    s5 += carry4;
1782    s4 -= carry4 * ((uint64_t) 1L << 21);
1783    carry5 = s5 >> 21;
1784    s6 += carry5;
1785    s5 -= carry5 * ((uint64_t) 1L << 21);
1786    carry6 = s6 >> 21;
1787    s7 += carry6;
1788    s6 -= carry6 * ((uint64_t) 1L << 21);
1789    carry7 = s7 >> 21;
1790    s8 += carry7;
1791    s7 -= carry7 * ((uint64_t) 1L << 21);
1792    carry8 = s8 >> 21;
1793    s9 += carry8;
1794    s8 -= carry8 * ((uint64_t) 1L << 21);
1795    carry9 = s9 >> 21;
1796    s10 += carry9;
1797    s9 -= carry9 * ((uint64_t) 1L << 21);
1798    carry10 = s10 >> 21;
1799    s11 += carry10;
1800    s10 -= carry10 * ((uint64_t) 1L << 21);
1801    carry11 = s11 >> 21;
1802    s12 += carry11;
1803    s11 -= carry11 * ((uint64_t) 1L << 21);
1804
1805    s0 += s12 * 666643;
1806    s1 += s12 * 470296;
1807    s2 += s12 * 654183;
1808    s3 -= s12 * 997805;
1809    s4 += s12 * 136657;
1810    s5 -= s12 * 683901;
1811
1812    carry0 = s0 >> 21;
1813    s1 += carry0;
1814    s0 -= carry0 * ((uint64_t) 1L << 21);
1815    carry1 = s1 >> 21;
1816    s2 += carry1;
1817    s1 -= carry1 * ((uint64_t) 1L << 21);
1818    carry2 = s2 >> 21;
1819    s3 += carry2;
1820    s2 -= carry2 * ((uint64_t) 1L << 21);
1821    carry3 = s3 >> 21;
1822    s4 += carry3;
1823    s3 -= carry3 * ((uint64_t) 1L << 21);
1824    carry4 = s4 >> 21;
1825    s5 += carry4;
1826    s4 -= carry4 * ((uint64_t) 1L << 21);
1827    carry5 = s5 >> 21;
1828    s6 += carry5;
1829    s5 -= carry5 * ((uint64_t) 1L << 21);
1830    carry6 = s6 >> 21;
1831    s7 += carry6;
1832    s6 -= carry6 * ((uint64_t) 1L << 21);
1833    carry7 = s7 >> 21;
1834    s8 += carry7;
1835    s7 -= carry7 * ((uint64_t) 1L << 21);
1836    carry8 = s8 >> 21;
1837    s9 += carry8;
1838    s8 -= carry8 * ((uint64_t) 1L << 21);
1839    carry9 = s9 >> 21;
1840    s10 += carry9;
1841    s9 -= carry9 * ((uint64_t) 1L << 21);
1842    carry10 = s10 >> 21;
1843    s11 += carry10;
1844    s10 -= carry10 * ((uint64_t) 1L << 21);
1845
1846    s[0]  = s0 >> 0;
1847    s[1]  = s0 >> 8;
1848    s[2]  = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1849    s[3]  = s1 >> 3;
1850    s[4]  = s1 >> 11;
1851    s[5]  = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1852    s[6]  = s2 >> 6;
1853    s[7]  = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1854    s[8]  = s3 >> 1;
1855    s[9]  = s3 >> 9;
1856    s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1857    s[11] = s4 >> 4;
1858    s[12] = s4 >> 12;
1859    s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1860    s[14] = s5 >> 7;
1861    s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1862    s[16] = s6 >> 2;
1863    s[17] = s6 >> 10;
1864    s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1865    s[19] = s7 >> 5;
1866    s[20] = s7 >> 13;
1867    s[21] = s8 >> 0;
1868    s[22] = s8 >> 8;
1869    s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1870    s[24] = s9 >> 3;
1871    s[25] = s9 >> 11;
1872    s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1873    s[27] = s10 >> 6;
1874    s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1875    s[29] = s11 >> 1;
1876    s[30] = s11 >> 9;
1877    s[31] = s11 >> 17;
1878}
1879
1880int
1881sc25519_is_canonical(const unsigned char *s)
1882{
1883    /* 2^252+27742317777372353535851937790883648493 */
1884    static const unsigned char L[32] = {
1885        0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7,
1886        0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1887        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
1888    };
1889    unsigned char c = 0;
1890    unsigned char n = 1;
1891    unsigned int  i = 32;
1892
1893    do {
1894        i--;
1895        c |= ((s[i] - L[i]) >> 8) & n;
1896        n &= ((s[i] ^ L[i]) - 1) >> 8;
1897    } while (i != 0);
1898
1899    return (c != 0);
1900}
1901
1902static void
1903chi25519(fe25519 out, const fe25519 z)
1904{
1905    fe25519 t0, t1, t2, t3;
1906    int     i;
1907
1908    fe25519_sq(t0, z);
1909    fe25519_mul(t1, t0, z);
1910    fe25519_sq(t0, t1);
1911    fe25519_sq(t2, t0);
1912    fe25519_sq(t2, t2);
1913    fe25519_mul(t2, t2, t0);
1914    fe25519_mul(t1, t2, z);
1915    fe25519_sq(t2, t1);
1916
1917    for (i = 1; i < 5; i++) {
1918        fe25519_sq(t2, t2);
1919    }
1920    fe25519_mul(t1, t2, t1);
1921    fe25519_sq(t2, t1);
1922    for (i = 1; i < 10; i++) {
1923        fe25519_sq(t2, t2);
1924    }
1925    fe25519_mul(t2, t2, t1);
1926    fe25519_sq(t3, t2);
1927    for (i = 1; i < 20; i++) {
1928        fe25519_sq(t3, t3);
1929    }
1930    fe25519_mul(t2, t3, t2);
1931    fe25519_sq(t2, t2);
1932    for (i = 1; i < 10; i++) {
1933        fe25519_sq(t2, t2);
1934    }
1935    fe25519_mul(t1, t2, t1);
1936    fe25519_sq(t2, t1);
1937    for (i = 1; i < 50; i++) {
1938        fe25519_sq(t2, t2);
1939    }
1940    fe25519_mul(t2, t2, t1);
1941    fe25519_sq(t3, t2);
1942    for (i = 1; i < 100; i++) {
1943        fe25519_sq(t3, t3);
1944    }
1945    fe25519_mul(t2, t3, t2);
1946    fe25519_sq(t2, t2);
1947    for (i = 1; i < 50; i++) {
1948        fe25519_sq(t2, t2);
1949    }
1950    fe25519_mul(t1, t2, t1);
1951    fe25519_sq(t1, t1);
1952    for (i = 1; i < 4; i++) {
1953        fe25519_sq(t1, t1);
1954    }
1955    fe25519_mul(out, t1, t0);
1956}
1957
1958void
1959ge25519_from_uniform(unsigned char s[32], const unsigned char r[32])
1960{
1961    fe25519       e;
1962    fe25519       negx;
1963    fe25519       rr2;
1964    fe25519       x, x2, x3;
1965    ge25519_p3    p3;
1966    ge25519_p1p1  p1;
1967    ge25519_p2    p2;
1968    unsigned int  e_is_minus_1;
1969    unsigned char x_sign;
1970
1971    memcpy(s, r, 32);
1972    x_sign = s[31] & 0x80;
1973    s[31] &= 0x7f;
1974
1975    fe25519_frombytes(rr2, s);
1976
1977    /* elligator */
1978    fe25519_sq2(rr2, rr2);
1979    rr2[0]++;
1980    fe25519_invert(rr2, rr2);
1981    fe25519_mul(x, curve25519_A, rr2);
1982    fe25519_neg(x, x);
1983
1984    fe25519_sq(x2, x);
1985    fe25519_mul(x3, x, x2);
1986    fe25519_add(e, x3, x);
1987    fe25519_mul(x2, x2, curve25519_A);
1988    fe25519_add(e, x2, e);
1989
1990    chi25519(e, e);
1991
1992    fe25519_tobytes(s, e);
1993    e_is_minus_1 = s[1] & 1;
1994    fe25519_neg(negx, x);
1995    fe25519_cmov(x, negx, e_is_minus_1);
1996    fe25519_0(x2);
1997    fe25519_cmov(x2, curve25519_A, e_is_minus_1);
1998    fe25519_sub(x, x, x2);
1999
2000    /* yed = (x-1)/(x+1) */
2001    {
2002        fe25519 one;
2003        fe25519 x_plus_one;
2004        fe25519 x_plus_one_inv;
2005        fe25519 x_minus_one;
2006        fe25519 yed;
2007
2008        fe25519_1(one);
2009        fe25519_add(x_plus_one, x, one);
2010        fe25519_sub(x_minus_one, x, one);
2011        fe25519_invert(x_plus_one_inv, x_plus_one);
2012        fe25519_mul(yed, x_minus_one, x_plus_one_inv);
2013        fe25519_tobytes(s, yed);
2014    }
2015
2016    /* recover x */
2017    s[31] |= x_sign;
2018    if (ge25519_frombytes(&p3, s) != 0) {
2019        abort(); /* LCOV_EXCL_LINE */
2020    }
2021
2022    /* multiply by the cofactor */
2023    ge25519_p3_dbl(&p1, &p3);
2024    ge25519_p1p1_to_p2(&p2, &p1);
2025    ge25519_p2_dbl(&p1, &p2);
2026    ge25519_p1p1_to_p2(&p2, &p1);
2027    ge25519_p2_dbl(&p1, &p2);
2028    ge25519_p1p1_to_p3(&p3, &p1);
2029
2030    ge25519_p3_tobytes(s, &p3);
2031}
2032