1/*
2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "inner.h"
26
27/*
28 * Parameters for supported curves (field modulus, and 'b' equation
29 * parameter; both values use the 'i31' format, and 'b' is in Montgomery
30 * representation).
31 */
32
33static const uint32_t P256_P[] = {
34	0x00000108,
35	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
36	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
37	0x000000FF
38};
39
40static const uint32_t P256_R2[] = {
41	0x00000108,
42	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
43	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
44	0x00000000
45};
46
47static const uint32_t P256_B[] = {
48	0x00000108,
49	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
50	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
51	0x0000000E
52};
53
54static const uint32_t P384_P[] = {
55	0x0000018C,
56	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
57	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
58	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
59	0x00000FFF
60};
61
62static const uint32_t P384_R2[] = {
63	0x0000018C,
64	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
65	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
66	0x00008000, 0x00008000, 0x00000000, 0x00000000,
67	0x00000000
68};
69
70static const uint32_t P384_B[] = {
71	0x0000018C,
72	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
73	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
74	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
75	0x000008A5
76};
77
78static const uint32_t P521_P[] = {
79	0x00000219,
80	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
81	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
82	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
83	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
84	0x01FFFFFF
85};
86
87static const uint32_t P521_R2[] = {
88	0x00000219,
89	0x00001000, 0x00000000, 0x00000000, 0x00000000,
90	0x00000000, 0x00000000, 0x00000000, 0x00000000,
91	0x00000000, 0x00000000, 0x00000000, 0x00000000,
92	0x00000000, 0x00000000, 0x00000000, 0x00000000,
93	0x00000000
94};
95
96static const uint32_t P521_B[] = {
97	0x00000219,
98	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
99	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
100	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
101	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
102	0x00654FAE
103};
104
105typedef struct {
106	const uint32_t *p;
107	const uint32_t *b;
108	const uint32_t *R2;
109	uint32_t p0i;
110} curve_params;
111
112static inline const curve_params *
113id_to_curve(int curve)
114{
115	static const curve_params pp[] = {
116		{ P256_P, P256_B, P256_R2, 0x00000001 },
117		{ P384_P, P384_B, P384_R2, 0x00000001 },
118		{ P521_P, P521_B, P521_R2, 0x00000001 }
119	};
120
121	return &pp[curve - BR_EC_secp256r1];
122}
123
124#define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
125
126/*
127 * Type for a point in Jacobian coordinates:
128 * -- three values, x, y and z, in Montgomery representation
129 * -- affine coordinates are X = x / z^2 and Y = y / z^3
130 * -- for the point at infinity, z = 0
131 */
132typedef struct {
133	uint32_t c[3][I31_LEN];
134} jacobian;
135
136/*
137 * We use a custom interpreter that uses a dozen registers, and
138 * only six operations:
139 *    MSET(d, a)       copy a into d
140 *    MADD(d, a)       d = d+a (modular)
141 *    MSUB(d, a)       d = d-a (modular)
142 *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
143 *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
144 *    MTZ(d)           clear return value if d = 0
145 * Destination of MMUL (d) must be distinct from operands (a and b).
146 * There is no such constraint for MSUB and MADD.
147 *
148 * Registers include the operand coordinates, and temporaries.
149 */
150#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
151#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
152#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
153#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
154#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
155#define MTZ(d)          (0x5000 + ((d) << 8))
156#define ENDCODE         0
157
158/*
159 * Registers for the input operands.
160 */
161#define P1x    0
162#define P1y    1
163#define P1z    2
164#define P2x    3
165#define P2y    4
166#define P2z    5
167
168/*
169 * Alternate names for the first input operand.
170 */
171#define Px     0
172#define Py     1
173#define Pz     2
174
175/*
176 * Temporaries.
177 */
178#define t1     6
179#define t2     7
180#define t3     8
181#define t4     9
182#define t5    10
183#define t6    11
184#define t7    12
185
186/*
187 * Extra scratch registers available when there is no second operand (e.g.
188 * for "double" and "affine").
189 */
190#define t8     3
191#define t9     4
192#define t10    5
193
194/*
195 * Doubling formulas are:
196 *
197 *   s = 4*x*y^2
198 *   m = 3*(x + z^2)*(x - z^2)
199 *   x' = m^2 - 2*s
200 *   y' = m*(s - x') - 8*y^4
201 *   z' = 2*y*z
202 *
203 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
204 * should. This case should not happen anyway, because our curves have
205 * prime order, and thus do not contain any point of order 2.
206 *
207 * If P is infinity (z = 0), then again the formulas yield infinity,
208 * which is correct. Thus, this code works for all points.
209 *
210 * Cost: 8 multiplications
211 */
212static const uint16_t code_double[] = {
213	/*
214	 * Compute z^2 (in t1).
215	 */
216	MMUL(t1, Pz, Pz),
217
218	/*
219	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
220	 */
221	MSET(t2, Px),
222	MSUB(t2, t1),
223	MADD(t1, Px),
224
225	/*
226	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
227	 */
228	MMUL(t3, t1, t2),
229	MSET(t1, t3),
230	MADD(t1, t3),
231	MADD(t1, t3),
232
233	/*
234	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
235	 */
236	MMUL(t3, Py, Py),
237	MADD(t3, t3),
238	MMUL(t2, Px, t3),
239	MADD(t2, t2),
240
241	/*
242	 * Compute x' = m^2 - 2*s.
243	 */
244	MMUL(Px, t1, t1),
245	MSUB(Px, t2),
246	MSUB(Px, t2),
247
248	/*
249	 * Compute z' = 2*y*z.
250	 */
251	MMUL(t4, Py, Pz),
252	MSET(Pz, t4),
253	MADD(Pz, t4),
254
255	/*
256	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
257	 * 2*y^2 in t3.
258	 */
259	MSUB(t2, Px),
260	MMUL(Py, t1, t2),
261	MMUL(t4, t3, t3),
262	MSUB(Py, t4),
263	MSUB(Py, t4),
264
265	ENDCODE
266};
267
268/*
269 * Addtions formulas are:
270 *
271 *   u1 = x1 * z2^2
272 *   u2 = x2 * z1^2
273 *   s1 = y1 * z2^3
274 *   s2 = y2 * z1^3
275 *   h = u2 - u1
276 *   r = s2 - s1
277 *   x3 = r^2 - h^3 - 2 * u1 * h^2
278 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
279 *   z3 = h * z1 * z2
280 *
281 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
282 * z3 == 0, so the result is correct.
283 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
284 * not correct.
285 * h == 0 only if u1 == u2; this happens in two cases:
286 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
287 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
288 *
289 * Thus, the following situations are not handled correctly:
290 * -- P1 = 0 and P2 != 0
291 * -- P1 != 0 and P2 = 0
292 * -- P1 = P2
293 * All other cases are properly computed. However, even in "incorrect"
294 * situations, the three coordinates still are properly formed field
295 * elements.
296 *
297 * The returned flag is cleared if r == 0. This happens in the following
298 * cases:
299 * -- Both points are on the same horizontal line (same Y coordinate).
300 * -- Both points are infinity.
301 * -- One point is infinity and the other is on line Y = 0.
302 * The third case cannot happen with our curves (there is no valid point
303 * on line Y = 0 since that would be a point of order 2). If the two
304 * source points are non-infinity, then remains only the case where the
305 * two points are on the same horizontal line.
306 *
307 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
308 * P2 != 0:
309 * -- If the returned value is not the point at infinity, then it was properly
310 * computed.
311 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
312 * is indeed the point at infinity.
313 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
314 * use the 'double' code.
315 *
316 * Cost: 16 multiplications
317 */
318static const uint16_t code_add[] = {
319	/*
320	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
321	 */
322	MMUL(t3, P2z, P2z),
323	MMUL(t1, P1x, t3),
324	MMUL(t4, P2z, t3),
325	MMUL(t3, P1y, t4),
326
327	/*
328	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
329	 */
330	MMUL(t4, P1z, P1z),
331	MMUL(t2, P2x, t4),
332	MMUL(t5, P1z, t4),
333	MMUL(t4, P2y, t5),
334
335	/*
336	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
337	 */
338	MSUB(t2, t1),
339	MSUB(t4, t3),
340
341	/*
342	 * Report cases where r = 0 through the returned flag.
343	 */
344	MTZ(t4),
345
346	/*
347	 * Compute u1*h^2 (in t6) and h^3 (in t5).
348	 */
349	MMUL(t7, t2, t2),
350	MMUL(t6, t1, t7),
351	MMUL(t5, t7, t2),
352
353	/*
354	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
355	 * t1 and t7 can be used as scratch registers.
356	 */
357	MMUL(P1x, t4, t4),
358	MSUB(P1x, t5),
359	MSUB(P1x, t6),
360	MSUB(P1x, t6),
361
362	/*
363	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
364	 */
365	MSUB(t6, P1x),
366	MMUL(P1y, t4, t6),
367	MMUL(t1, t5, t3),
368	MSUB(P1y, t1),
369
370	/*
371	 * Compute z3 = h*z1*z2.
372	 */
373	MMUL(t1, P1z, P2z),
374	MMUL(P1z, t1, t2),
375
376	ENDCODE
377};
378
379/*
380 * Check that the point is on the curve. This code snippet assumes the
381 * following conventions:
382 * -- Coordinates x and y have been freshly decoded in P1 (but not
383 * converted to Montgomery coordinates yet).
384 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
385 */
386static const uint16_t code_check[] = {
387
388	/* Convert x and y to Montgomery representation. */
389	MMUL(t1, P1x, P2x),
390	MMUL(t2, P1y, P2x),
391	MSET(P1x, t1),
392	MSET(P1y, t2),
393
394	/* Compute x^3 in t1. */
395	MMUL(t2, P1x, P1x),
396	MMUL(t1, P1x, t2),
397
398	/* Subtract 3*x from t1. */
399	MSUB(t1, P1x),
400	MSUB(t1, P1x),
401	MSUB(t1, P1x),
402
403	/* Add b. */
404	MADD(t1, P2y),
405
406	/* Compute y^2 in t2. */
407	MMUL(t2, P1y, P1y),
408
409	/* Compare y^2 with x^3 - 3*x + b; they must match. */
410	MSUB(t1, t2),
411	MTZ(t1),
412
413	/* Set z to 1 (in Montgomery representation). */
414	MMUL(P1z, P2x, P2z),
415
416	ENDCODE
417};
418
419/*
420 * Conversion back to affine coordinates. This code snippet assumes that
421 * the z coordinate of P2 is set to 1 (not in Montgomery representation).
422 */
423static const uint16_t code_affine[] = {
424
425	/* Save z*R in t1. */
426	MSET(t1, P1z),
427
428	/* Compute z^3 in t2. */
429	MMUL(t2, P1z, P1z),
430	MMUL(t3, P1z, t2),
431	MMUL(t2, t3, P2z),
432
433	/* Invert to (1/z^3) in t2. */
434	MINV(t2, t3, t4),
435
436	/* Compute y. */
437	MSET(t3, P1y),
438	MMUL(P1y, t2, t3),
439
440	/* Compute (1/z^2) in t3. */
441	MMUL(t3, t2, t1),
442
443	/* Compute x. */
444	MSET(t2, P1x),
445	MMUL(P1x, t2, t3),
446
447	ENDCODE
448};
449
450static uint32_t
451run_code(jacobian *P1, const jacobian *P2,
452	const curve_params *cc, const uint16_t *code)
453{
454	uint32_t r;
455	uint32_t t[13][I31_LEN];
456	size_t u;
457
458	r = 1;
459
460	/*
461	 * Copy the two operands in the dedicated registers.
462	 */
463	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
464	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
465
466	/*
467	 * Run formulas.
468	 */
469	for (u = 0;; u ++) {
470		unsigned op, d, a, b;
471
472		op = code[u];
473		if (op == 0) {
474			break;
475		}
476		d = (op >> 8) & 0x0F;
477		a = (op >> 4) & 0x0F;
478		b = op & 0x0F;
479		op >>= 12;
480		switch (op) {
481			uint32_t ctl;
482			size_t plen;
483			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
484
485		case 0:
486			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
487			break;
488		case 1:
489			ctl = br_i31_add(t[d], t[a], 1);
490			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
491			br_i31_sub(t[d], cc->p, ctl);
492			break;
493		case 2:
494			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
495			break;
496		case 3:
497			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
498			break;
499		case 4:
500			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
501			br_i31_encode(tp, plen, cc->p);
502			tp[plen - 1] -= 2;
503			br_i31_modpow(t[d], tp, plen,
504				cc->p, cc->p0i, t[a], t[b]);
505			break;
506		default:
507			r &= ~br_i31_iszero(t[d]);
508			break;
509		}
510	}
511
512	/*
513	 * Copy back result.
514	 */
515	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
516	return r;
517}
518
519static void
520set_one(uint32_t *x, const uint32_t *p)
521{
522	size_t plen;
523
524	plen = (p[0] + 63) >> 5;
525	memset(x, 0, plen * sizeof *x);
526	x[0] = p[0];
527	x[1] = 0x00000001;
528}
529
530static void
531point_zero(jacobian *P, const curve_params *cc)
532{
533	memset(P, 0, sizeof *P);
534	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
535}
536
537static inline void
538point_double(jacobian *P, const curve_params *cc)
539{
540	run_code(P, P, cc, code_double);
541}
542
543static inline uint32_t
544point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
545{
546	return run_code(P1, P2, cc, code_add);
547}
548
549static void
550point_mul(jacobian *P, const unsigned char *x, size_t xlen,
551	const curve_params *cc)
552{
553	/*
554	 * We do a simple double-and-add ladder with a 2-bit window
555	 * to make only one add every two doublings. We thus first
556	 * precompute 2P and 3P in some local buffers.
557	 *
558	 * We always perform two doublings and one addition; the
559	 * addition is with P, 2P and 3P and is done in a temporary
560	 * array.
561	 *
562	 * The addition code cannot handle cases where one of the
563	 * operands is infinity, which is the case at the start of the
564	 * ladder. We therefore need to maintain a flag that controls
565	 * this situation.
566	 */
567	uint32_t qz;
568	jacobian P2, P3, Q, T, U;
569
570	memcpy(&P2, P, sizeof P2);
571	point_double(&P2, cc);
572	memcpy(&P3, P, sizeof P3);
573	point_add(&P3, &P2, cc);
574
575	point_zero(&Q, cc);
576	qz = 1;
577	while (xlen -- > 0) {
578		int k;
579
580		for (k = 6; k >= 0; k -= 2) {
581			uint32_t bits;
582			uint32_t bnz;
583
584			point_double(&Q, cc);
585			point_double(&Q, cc);
586			memcpy(&T, P, sizeof T);
587			memcpy(&U, &Q, sizeof U);
588			bits = (*x >> k) & (uint32_t)3;
589			bnz = NEQ(bits, 0);
590			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
591			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
592			point_add(&U, &T, cc);
593			CCOPY(bnz & qz, &Q, &T, sizeof Q);
594			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
595			qz &= ~bnz;
596		}
597		x ++;
598	}
599	memcpy(P, &Q, sizeof Q);
600}
601
602/*
603 * Decode point into Jacobian coordinates. This function does not support
604 * the point at infinity. If the point is invalid then this returns 0, but
605 * the coordinates are still set to properly formed field elements.
606 */
607static uint32_t
608point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
609{
610	/*
611	 * Points must use uncompressed format:
612	 * -- first byte is 0x04;
613	 * -- coordinates X and Y use unsigned big-endian, with the same
614	 *    length as the field modulus.
615	 *
616	 * We don't support hybrid format (uncompressed, but first byte
617	 * has value 0x06 or 0x07, depending on the least significant bit
618	 * of Y) because it is rather useless, and explicitly forbidden
619	 * by PKIX (RFC 5480, section 2.2).
620	 *
621	 * We don't support compressed format either, because it is not
622	 * much used in practice (there are or were patent-related
623	 * concerns about point compression, which explains the lack of
624	 * generalised support). Also, point compression support would
625	 * need a bit more code.
626	 */
627	const unsigned char *buf;
628	size_t plen, zlen;
629	uint32_t r;
630	jacobian Q;
631
632	buf = src;
633	point_zero(P, cc);
634	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
635	if (len != 1 + (plen << 1)) {
636		return 0;
637	}
638	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
639	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
640
641	/*
642	 * Check first byte.
643	 */
644	r &= EQ(buf[0], 0x04);
645	/* obsolete
646	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
647		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
648	*/
649
650	/*
651	 * Convert coordinates and check that the point is valid.
652	 */
653	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
654	memcpy(Q.c[0], cc->R2, zlen);
655	memcpy(Q.c[1], cc->b, zlen);
656	set_one(Q.c[2], cc->p);
657	r &= ~run_code(P, &Q, cc, code_check);
658	return r;
659}
660
661/*
662 * Encode a point. This method assumes that the point is correct and is
663 * not the point at infinity. Encoded size is always 1+2*plen, where
664 * plen is the field modulus length, in bytes.
665 */
666static void
667point_encode(void *dst, const jacobian *P, const curve_params *cc)
668{
669	unsigned char *buf;
670	uint32_t xbl;
671	size_t plen;
672	jacobian Q, T;
673
674	buf = dst;
675	xbl = cc->p[0];
676	xbl -= (xbl >> 5);
677	plen = (xbl + 7) >> 3;
678	buf[0] = 0x04;
679	memcpy(&Q, P, sizeof *P);
680	set_one(T.c[2], cc->p);
681	run_code(&Q, &T, cc, code_affine);
682	br_i31_encode(buf + 1, plen, Q.c[0]);
683	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
684}
685
686static const br_ec_curve_def *
687id_to_curve_def(int curve)
688{
689	switch (curve) {
690	case BR_EC_secp256r1:
691		return &br_secp256r1;
692	case BR_EC_secp384r1:
693		return &br_secp384r1;
694	case BR_EC_secp521r1:
695		return &br_secp521r1;
696	}
697	return NULL;
698}
699
700static const unsigned char *
701api_generator(int curve, size_t *len)
702{
703	const br_ec_curve_def *cd;
704
705	cd = id_to_curve_def(curve);
706	*len = cd->generator_len;
707	return cd->generator;
708}
709
710static const unsigned char *
711api_order(int curve, size_t *len)
712{
713	const br_ec_curve_def *cd;
714
715	cd = id_to_curve_def(curve);
716	*len = cd->order_len;
717	return cd->order;
718}
719
720static size_t
721api_xoff(int curve, size_t *len)
722{
723	api_generator(curve, len);
724	*len >>= 1;
725	return 1;
726}
727
728static uint32_t
729api_mul(unsigned char *G, size_t Glen,
730	const unsigned char *x, size_t xlen, int curve)
731{
732	uint32_t r;
733	const curve_params *cc;
734	jacobian P;
735
736	cc = id_to_curve(curve);
737	r = point_decode(&P, G, Glen, cc);
738	point_mul(&P, x, xlen, cc);
739	point_encode(G, &P, cc);
740	return r;
741}
742
743static size_t
744api_mulgen(unsigned char *R,
745	const unsigned char *x, size_t xlen, int curve)
746{
747	const unsigned char *G;
748	size_t Glen;
749
750	G = api_generator(curve, &Glen);
751	memcpy(R, G, Glen);
752	api_mul(R, Glen, x, xlen, curve);
753	return Glen;
754}
755
756static uint32_t
757api_muladd(unsigned char *A, const unsigned char *B, size_t len,
758	const unsigned char *x, size_t xlen,
759	const unsigned char *y, size_t ylen, int curve)
760{
761	uint32_t r, t, z;
762	const curve_params *cc;
763	jacobian P, Q;
764
765	/*
766	 * TODO: see about merging the two ladders. Right now, we do
767	 * two independent point multiplications, which is a bit
768	 * wasteful of CPU resources (but yields short code).
769	 */
770
771	cc = id_to_curve(curve);
772	r = point_decode(&P, A, len, cc);
773	if (B == NULL) {
774		size_t Glen;
775
776		B = api_generator(curve, &Glen);
777	}
778	r &= point_decode(&Q, B, len, cc);
779	point_mul(&P, x, xlen, cc);
780	point_mul(&Q, y, ylen, cc);
781
782	/*
783	 * We want to compute P+Q. Since the base points A and B are distinct
784	 * from infinity, and the multipliers are non-zero and lower than the
785	 * curve order, then we know that P and Q are non-infinity. This
786	 * leaves two special situations to test for:
787	 * -- If P = Q then we must use point_double().
788	 * -- If P+Q = 0 then we must report an error.
789	 */
790	t = point_add(&P, &Q, cc);
791	point_double(&Q, cc);
792	z = br_i31_iszero(P.c[2]);
793
794	/*
795	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
796	 * have the following:
797	 *
798	 *   z = 0, t = 0   return P (normal addition)
799	 *   z = 0, t = 1   return P (normal addition)
800	 *   z = 1, t = 0   return Q (a 'double' case)
801	 *   z = 1, t = 1   report an error (P+Q = 0)
802	 */
803	CCOPY(z & ~t, &P, &Q, sizeof Q);
804	point_encode(A, &P, cc);
805	r &= ~(z & t);
806
807	return r;
808}
809
810/* see bearssl_ec.h */
811const br_ec_impl br_ec_prime_i31 = {
812	(uint32_t)0x03800000,
813	&api_generator,
814	&api_order,
815	&api_xoff,
816	&api_mul,
817	&api_mulgen,
818	&api_muladd
819};
820