1/*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27/*
28 * Parameters for supported curves:
29 *   - field modulus p
30 *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
31 *   - b*R mod p (b is the second curve equation parameter)
32 */
33
34static const uint16_t P256_P[] = {
35	0x0111,
36	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
37	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
38	0x7FFF, 0x0001
39};
40
41static const uint16_t P256_R2[] = {
42	0x0111,
43	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
44	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
45	0x4FFF, 0x0000
46};
47
48static const uint16_t P256_B[] = {
49	0x0111,
50	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
51	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
52	0x0187, 0x0000
53};
54
55static const uint16_t P384_P[] = {
56	0x0199,
57	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
58	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
59	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
60	0x7FFF, 0x01FF
61};
62
63static const uint16_t P384_R2[] = {
64	0x0199,
65	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
66	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
67	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
68	0x0000, 0x0000
69};
70
71static const uint16_t P384_B[] = {
72	0x0199,
73	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
74	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
75	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
76	0x0452, 0x0084
77};
78
79static const uint16_t P521_P[] = {
80	0x022B,
81	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
82	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
83	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
84	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
85	0x7FFF, 0x7FFF, 0x07FF
86};
87
88static const uint16_t P521_R2[] = {
89	0x022B,
90	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
91	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
92	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
93	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
94	0x0000, 0x0000, 0x0000
95};
96
97static const uint16_t P521_B[] = {
98	0x022B,
99	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
100	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
101	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
102	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
103	0x1618, 0x27D7, 0x0465
104};
105
106typedef struct {
107	const uint16_t *p;
108	const uint16_t *b;
109	const uint16_t *R2;
110	uint16_t p0i;
111	size_t point_len;
112} curve_params;
113
114static inline const curve_params *
115id_to_curve(int curve)
116{
117	static const curve_params pp[] = {
118		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
119		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
120		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
121	};
122
123	return &pp[curve - BR_EC_secp256r1];
124}
125
126#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
127
128/*
129 * Type for a point in Jacobian coordinates:
130 * -- three values, x, y and z, in Montgomery representation
131 * -- affine coordinates are X = x / z^2 and Y = y / z^3
132 * -- for the point at infinity, z = 0
133 */
134typedef struct {
135	uint16_t c[3][I15_LEN];
136} jacobian;
137
138/*
139 * We use a custom interpreter that uses a dozen registers, and
140 * only six operations:
141 *    MSET(d, a)       copy a into d
142 *    MADD(d, a)       d = d+a (modular)
143 *    MSUB(d, a)       d = d-a (modular)
144 *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
145 *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
146 *    MTZ(d)           clear return value if d = 0
147 * Destination of MMUL (d) must be distinct from operands (a and b).
148 * There is no such constraint for MSUB and MADD.
149 *
150 * Registers include the operand coordinates, and temporaries.
151 */
152#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
153#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
154#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
155#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
156#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
157#define MTZ(d)          (0x5000 + ((d) << 8))
158#define ENDCODE         0
159
160/*
161 * Registers for the input operands.
162 */
163#define P1x    0
164#define P1y    1
165#define P1z    2
166#define P2x    3
167#define P2y    4
168#define P2z    5
169
170/*
171 * Alternate names for the first input operand.
172 */
173#define Px     0
174#define Py     1
175#define Pz     2
176
177/*
178 * Temporaries.
179 */
180#define t1     6
181#define t2     7
182#define t3     8
183#define t4     9
184#define t5    10
185#define t6    11
186#define t7    12
187
188/*
189 * Extra scratch registers available when there is no second operand (e.g.
190 * for "double" and "affine").
191 */
192#define t8     3
193#define t9     4
194#define t10    5
195
196/*
197 * Doubling formulas are:
198 *
199 *   s = 4*x*y^2
200 *   m = 3*(x + z^2)*(x - z^2)
201 *   x' = m^2 - 2*s
202 *   y' = m*(s - x') - 8*y^4
203 *   z' = 2*y*z
204 *
205 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
206 * should. This case should not happen anyway, because our curves have
207 * prime order, and thus do not contain any point of order 2.
208 *
209 * If P is infinity (z = 0), then again the formulas yield infinity,
210 * which is correct. Thus, this code works for all points.
211 *
212 * Cost: 8 multiplications
213 */
214static const uint16_t code_double[] = {
215	/*
216	 * Compute z^2 (in t1).
217	 */
218	MMUL(t1, Pz, Pz),
219
220	/*
221	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
222	 */
223	MSET(t2, Px),
224	MSUB(t2, t1),
225	MADD(t1, Px),
226
227	/*
228	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
229	 */
230	MMUL(t3, t1, t2),
231	MSET(t1, t3),
232	MADD(t1, t3),
233	MADD(t1, t3),
234
235	/*
236	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
237	 */
238	MMUL(t3, Py, Py),
239	MADD(t3, t3),
240	MMUL(t2, Px, t3),
241	MADD(t2, t2),
242
243	/*
244	 * Compute x' = m^2 - 2*s.
245	 */
246	MMUL(Px, t1, t1),
247	MSUB(Px, t2),
248	MSUB(Px, t2),
249
250	/*
251	 * Compute z' = 2*y*z.
252	 */
253	MMUL(t4, Py, Pz),
254	MSET(Pz, t4),
255	MADD(Pz, t4),
256
257	/*
258	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
259	 * 2*y^2 in t3.
260	 */
261	MSUB(t2, Px),
262	MMUL(Py, t1, t2),
263	MMUL(t4, t3, t3),
264	MSUB(Py, t4),
265	MSUB(Py, t4),
266
267	ENDCODE
268};
269
270/*
271 * Addtions formulas are:
272 *
273 *   u1 = x1 * z2^2
274 *   u2 = x2 * z1^2
275 *   s1 = y1 * z2^3
276 *   s2 = y2 * z1^3
277 *   h = u2 - u1
278 *   r = s2 - s1
279 *   x3 = r^2 - h^3 - 2 * u1 * h^2
280 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
281 *   z3 = h * z1 * z2
282 *
283 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
284 * z3 == 0, so the result is correct.
285 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
286 * not correct.
287 * h == 0 only if u1 == u2; this happens in two cases:
288 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
289 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
290 *
291 * Thus, the following situations are not handled correctly:
292 * -- P1 = 0 and P2 != 0
293 * -- P1 != 0 and P2 = 0
294 * -- P1 = P2
295 * All other cases are properly computed. However, even in "incorrect"
296 * situations, the three coordinates still are properly formed field
297 * elements.
298 *
299 * The returned flag is cleared if r == 0. This happens in the following
300 * cases:
301 * -- Both points are on the same horizontal line (same Y coordinate).
302 * -- Both points are infinity.
303 * -- One point is infinity and the other is on line Y = 0.
304 * The third case cannot happen with our curves (there is no valid point
305 * on line Y = 0 since that would be a point of order 2). If the two
306 * source points are non-infinity, then remains only the case where the
307 * two points are on the same horizontal line.
308 *
309 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
310 * P2 != 0:
311 * -- If the returned value is not the point at infinity, then it was properly
312 * computed.
313 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
314 * is indeed the point at infinity.
315 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
316 * use the 'double' code.
317 *
318 * Cost: 16 multiplications
319 */
320static const uint16_t code_add[] = {
321	/*
322	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
323	 */
324	MMUL(t3, P2z, P2z),
325	MMUL(t1, P1x, t3),
326	MMUL(t4, P2z, t3),
327	MMUL(t3, P1y, t4),
328
329	/*
330	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
331	 */
332	MMUL(t4, P1z, P1z),
333	MMUL(t2, P2x, t4),
334	MMUL(t5, P1z, t4),
335	MMUL(t4, P2y, t5),
336
337	/*
338	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
339	 */
340	MSUB(t2, t1),
341	MSUB(t4, t3),
342
343	/*
344	 * Report cases where r = 0 through the returned flag.
345	 */
346	MTZ(t4),
347
348	/*
349	 * Compute u1*h^2 (in t6) and h^3 (in t5).
350	 */
351	MMUL(t7, t2, t2),
352	MMUL(t6, t1, t7),
353	MMUL(t5, t7, t2),
354
355	/*
356	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
357	 * t1 and t7 can be used as scratch registers.
358	 */
359	MMUL(P1x, t4, t4),
360	MSUB(P1x, t5),
361	MSUB(P1x, t6),
362	MSUB(P1x, t6),
363
364	/*
365	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
366	 */
367	MSUB(t6, P1x),
368	MMUL(P1y, t4, t6),
369	MMUL(t1, t5, t3),
370	MSUB(P1y, t1),
371
372	/*
373	 * Compute z3 = h*z1*z2.
374	 */
375	MMUL(t1, P1z, P2z),
376	MMUL(P1z, t1, t2),
377
378	ENDCODE
379};
380
381/*
382 * Check that the point is on the curve. This code snippet assumes the
383 * following conventions:
384 * -- Coordinates x and y have been freshly decoded in P1 (but not
385 * converted to Montgomery coordinates yet).
386 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
387 */
388static const uint16_t code_check[] = {
389
390	/* Convert x and y to Montgomery representation. */
391	MMUL(t1, P1x, P2x),
392	MMUL(t2, P1y, P2x),
393	MSET(P1x, t1),
394	MSET(P1y, t2),
395
396	/* Compute x^3 in t1. */
397	MMUL(t2, P1x, P1x),
398	MMUL(t1, P1x, t2),
399
400	/* Subtract 3*x from t1. */
401	MSUB(t1, P1x),
402	MSUB(t1, P1x),
403	MSUB(t1, P1x),
404
405	/* Add b. */
406	MADD(t1, P2y),
407
408	/* Compute y^2 in t2. */
409	MMUL(t2, P1y, P1y),
410
411	/* Compare y^2 with x^3 - 3*x + b; they must match. */
412	MSUB(t1, t2),
413	MTZ(t1),
414
415	/* Set z to 1 (in Montgomery representation). */
416	MMUL(P1z, P2x, P2z),
417
418	ENDCODE
419};
420
421/*
422 * Conversion back to affine coordinates. This code snippet assumes that
423 * the z coordinate of P2 is set to 1 (not in Montgomery representation).
424 */
425static const uint16_t code_affine[] = {
426
427	/* Save z*R in t1. */
428	MSET(t1, P1z),
429
430	/* Compute z^3 in t2. */
431	MMUL(t2, P1z, P1z),
432	MMUL(t3, P1z, t2),
433	MMUL(t2, t3, P2z),
434
435	/* Invert to (1/z^3) in t2. */
436	MINV(t2, t3, t4),
437
438	/* Compute y. */
439	MSET(t3, P1y),
440	MMUL(P1y, t2, t3),
441
442	/* Compute (1/z^2) in t3. */
443	MMUL(t3, t2, t1),
444
445	/* Compute x. */
446	MSET(t2, P1x),
447	MMUL(P1x, t2, t3),
448
449	ENDCODE
450};
451
452static uint32_t
453run_code(jacobian *P1, const jacobian *P2,
454	const curve_params *cc, const uint16_t *code)
455{
456	uint32_t r;
457	uint16_t t[13][I15_LEN];
458	size_t u;
459
460	r = 1;
461
462	/*
463	 * Copy the two operands in the dedicated registers.
464	 */
465	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
466	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
467
468	/*
469	 * Run formulas.
470	 */
471	for (u = 0;; u ++) {
472		unsigned op, d, a, b;
473
474		op = code[u];
475		if (op == 0) {
476			break;
477		}
478		d = (op >> 8) & 0x0F;
479		a = (op >> 4) & 0x0F;
480		b = op & 0x0F;
481		op >>= 12;
482		switch (op) {
483			uint32_t ctl;
484			size_t plen;
485			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
486
487		case 0:
488			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
489			break;
490		case 1:
491			ctl = br_i15_add(t[d], t[a], 1);
492			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
493			br_i15_sub(t[d], cc->p, ctl);
494			break;
495		case 2:
496			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
497			break;
498		case 3:
499			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
500			break;
501		case 4:
502			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
503			br_i15_encode(tp, plen, cc->p);
504			tp[plen - 1] -= 2;
505			br_i15_modpow(t[d], tp, plen,
506				cc->p, cc->p0i, t[a], t[b]);
507			break;
508		default:
509			r &= ~br_i15_iszero(t[d]);
510			break;
511		}
512	}
513
514	/*
515	 * Copy back result.
516	 */
517	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
518	return r;
519}
520
521static void
522set_one(uint16_t *x, const uint16_t *p)
523{
524	size_t plen;
525
526	plen = (p[0] + 31) >> 4;
527	memset(x, 0, plen * sizeof *x);
528	x[0] = p[0];
529	x[1] = 0x0001;
530}
531
532static void
533point_zero(jacobian *P, const curve_params *cc)
534{
535	memset(P, 0, sizeof *P);
536	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
537}
538
539static inline void
540point_double(jacobian *P, const curve_params *cc)
541{
542	run_code(P, P, cc, code_double);
543}
544
545static inline uint32_t
546point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
547{
548	return run_code(P1, P2, cc, code_add);
549}
550
551static void
552point_mul(jacobian *P, const unsigned char *x, size_t xlen,
553	const curve_params *cc)
554{
555	/*
556	 * We do a simple double-and-add ladder with a 2-bit window
557	 * to make only one add every two doublings. We thus first
558	 * precompute 2P and 3P in some local buffers.
559	 *
560	 * We always perform two doublings and one addition; the
561	 * addition is with P, 2P and 3P and is done in a temporary
562	 * array.
563	 *
564	 * The addition code cannot handle cases where one of the
565	 * operands is infinity, which is the case at the start of the
566	 * ladder. We therefore need to maintain a flag that controls
567	 * this situation.
568	 */
569	uint32_t qz;
570	jacobian P2, P3, Q, T, U;
571
572	memcpy(&P2, P, sizeof P2);
573	point_double(&P2, cc);
574	memcpy(&P3, P, sizeof P3);
575	point_add(&P3, &P2, cc);
576
577	point_zero(&Q, cc);
578	qz = 1;
579	while (xlen -- > 0) {
580		int k;
581
582		for (k = 6; k >= 0; k -= 2) {
583			uint32_t bits;
584			uint32_t bnz;
585
586			point_double(&Q, cc);
587			point_double(&Q, cc);
588			memcpy(&T, P, sizeof T);
589			memcpy(&U, &Q, sizeof U);
590			bits = (*x >> k) & (uint32_t)3;
591			bnz = NEQ(bits, 0);
592			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
593			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
594			point_add(&U, &T, cc);
595			CCOPY(bnz & qz, &Q, &T, sizeof Q);
596			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
597			qz &= ~bnz;
598		}
599		x ++;
600	}
601	memcpy(P, &Q, sizeof Q);
602}
603
604/*
605 * Decode point into Jacobian coordinates. This function does not support
606 * the point at infinity. If the point is invalid then this returns 0, but
607 * the coordinates are still set to properly formed field elements.
608 */
609static uint32_t
610point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
611{
612	/*
613	 * Points must use uncompressed format:
614	 * -- first byte is 0x04;
615	 * -- coordinates X and Y use unsigned big-endian, with the same
616	 *    length as the field modulus.
617	 *
618	 * We don't support hybrid format (uncompressed, but first byte
619	 * has value 0x06 or 0x07, depending on the least significant bit
620	 * of Y) because it is rather useless, and explicitly forbidden
621	 * by PKIX (RFC 5480, section 2.2).
622	 *
623	 * We don't support compressed format either, because it is not
624	 * much used in practice (there are or were patent-related
625	 * concerns about point compression, which explains the lack of
626	 * generalised support). Also, point compression support would
627	 * need a bit more code.
628	 */
629	const unsigned char *buf;
630	size_t plen, zlen;
631	uint32_t r;
632	jacobian Q;
633
634	buf = src;
635	point_zero(P, cc);
636	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
637	if (len != 1 + (plen << 1)) {
638		return 0;
639	}
640	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
641	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
642
643	/*
644	 * Check first byte.
645	 */
646	r &= EQ(buf[0], 0x04);
647	/* obsolete
648	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
649		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
650	*/
651
652	/*
653	 * Convert coordinates and check that the point is valid.
654	 */
655	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
656	memcpy(Q.c[0], cc->R2, zlen);
657	memcpy(Q.c[1], cc->b, zlen);
658	set_one(Q.c[2], cc->p);
659	r &= ~run_code(P, &Q, cc, code_check);
660	return r;
661}
662
663/*
664 * Encode a point. This method assumes that the point is correct and is
665 * not the point at infinity. Encoded size is always 1+2*plen, where
666 * plen is the field modulus length, in bytes.
667 */
668static void
669point_encode(void *dst, const jacobian *P, const curve_params *cc)
670{
671	unsigned char *buf;
672	size_t plen;
673	jacobian Q, T;
674
675	buf = dst;
676	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
677	buf[0] = 0x04;
678	memcpy(&Q, P, sizeof *P);
679	set_one(T.c[2], cc->p);
680	run_code(&Q, &T, cc, code_affine);
681	br_i15_encode(buf + 1, plen, Q.c[0]);
682	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
683}
684
685static const br_ec_curve_def *
686id_to_curve_def(int curve)
687{
688	switch (curve) {
689	case BR_EC_secp256r1:
690		return &br_secp256r1;
691	case BR_EC_secp384r1:
692		return &br_secp384r1;
693	case BR_EC_secp521r1:
694		return &br_secp521r1;
695	}
696	return NULL;
697}
698
699static const unsigned char *
700api_generator(int curve, size_t *len)
701{
702	const br_ec_curve_def *cd;
703
704	cd = id_to_curve_def(curve);
705	*len = cd->generator_len;
706	return cd->generator;
707}
708
709static const unsigned char *
710api_order(int curve, size_t *len)
711{
712	const br_ec_curve_def *cd;
713
714	cd = id_to_curve_def(curve);
715	*len = cd->order_len;
716	return cd->order;
717}
718
719static size_t
720api_xoff(int curve, size_t *len)
721{
722	api_generator(curve, len);
723	*len >>= 1;
724	return 1;
725}
726
727static uint32_t
728api_mul(unsigned char *G, size_t Glen,
729	const unsigned char *x, size_t xlen, int curve)
730{
731	uint32_t r;
732	const curve_params *cc;
733	jacobian P;
734
735	cc = id_to_curve(curve);
736	r = point_decode(&P, G, Glen, cc);
737	point_mul(&P, x, xlen, cc);
738	if (Glen == cc->point_len) {
739		point_encode(G, &P, cc);
740	}
741	return r;
742}
743
744static size_t
745api_mulgen(unsigned char *R,
746	const unsigned char *x, size_t xlen, int curve)
747{
748	const unsigned char *G;
749	size_t Glen;
750
751	G = api_generator(curve, &Glen);
752	memcpy(R, G, Glen);
753	api_mul(R, Glen, x, xlen, curve);
754	return Glen;
755}
756
757static uint32_t
758api_muladd(unsigned char *A, const unsigned char *B, size_t len,
759	const unsigned char *x, size_t xlen,
760	const unsigned char *y, size_t ylen, int curve)
761{
762	uint32_t r, t, z;
763	const curve_params *cc;
764	jacobian P, Q;
765
766	/*
767	 * TODO: see about merging the two ladders. Right now, we do
768	 * two independent point multiplications, which is a bit
769	 * wasteful of CPU resources (but yields short code).
770	 */
771
772	cc = id_to_curve(curve);
773	r = point_decode(&P, A, len, cc);
774	if (B == NULL) {
775		size_t Glen;
776
777		B = api_generator(curve, &Glen);
778	}
779	r &= point_decode(&Q, B, len, cc);
780	point_mul(&P, x, xlen, cc);
781	point_mul(&Q, y, ylen, cc);
782
783	/*
784	 * We want to compute P+Q. Since the base points A and B are distinct
785	 * from infinity, and the multipliers are non-zero and lower than the
786	 * curve order, then we know that P and Q are non-infinity. This
787	 * leaves two special situations to test for:
788	 * -- If P = Q then we must use point_double().
789	 * -- If P+Q = 0 then we must report an error.
790	 */
791	t = point_add(&P, &Q, cc);
792	point_double(&Q, cc);
793	z = br_i15_iszero(P.c[2]);
794
795	/*
796	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
797	 * have the following:
798	 *
799	 *   z = 0, t = 0   return P (normal addition)
800	 *   z = 0, t = 1   return P (normal addition)
801	 *   z = 1, t = 0   return Q (a 'double' case)
802	 *   z = 1, t = 1   report an error (P+Q = 0)
803	 */
804	CCOPY(z & ~t, &P, &Q, sizeof Q);
805	point_encode(A, &P, cc);
806	r &= ~(z & t);
807
808	return r;
809}
810
811/* see bearssl_ec.h */
812const br_ec_impl br_ec_prime_i15 = {
813	(uint32_t)0x03800000,
814	&api_generator,
815	&api_order,
816	&api_xoff,
817	&api_mul,
818	&api_mulgen,
819	&api_muladd
820};
821