1/*
2 * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
3 * Use is subject to license terms.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/* *********************************************************************
25 *
26 * The Original Code is the elliptic curve math library for prime field curves.
27 *
28 * The Initial Developer of the Original Code is
29 * Sun Microsystems, Inc.
30 * Portions created by the Initial Developer are Copyright (C) 2003
31 * the Initial Developer. All Rights Reserved.
32 *
33 * Contributor(s):
34 *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
35 *
36 *********************************************************************** */
37
38#include "ecp.h"
39#include "mpi.h"
40#include "mplogic.h"
41#include "mpi-priv.h"
42#ifndef _KERNEL
43#include <stdlib.h>
44#endif
45
46#define ECP192_DIGITS ECL_CURVE_DIGITS(192)
47
48/* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
49 * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
50 * Implementation of the NIST Elliptic Curves over Prime Fields. */
51mp_err
52ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
53{
54        mp_err res = MP_OKAY;
55        mp_size a_used = MP_USED(a);
56        mp_digit r3;
57#ifndef MPI_AMD64_ADD
58        mp_digit carry;
59#endif
60#ifdef ECL_THIRTY_TWO_BIT
61        mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
62        mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
63#else
64        mp_digit a5 = 0, a4 = 0, a3 = 0;
65        mp_digit r0, r1, r2;
66#endif
67
68        /* reduction not needed if a is not larger than field size */
69        if (a_used < ECP192_DIGITS) {
70                if (a == r) {
71                        return MP_OKAY;
72                }
73                return mp_copy(a, r);
74        }
75
76        /* for polynomials larger than twice the field size, use regular
77         * reduction */
78        if (a_used > ECP192_DIGITS*2) {
79                MP_CHECKOK(mp_mod(a, &meth->irr, r));
80        } else {
81                /* copy out upper words of a */
82
83#ifdef ECL_THIRTY_TWO_BIT
84
85                /* in all the math below,
86                 * nXb is most signifiant, nXa is least significant */
87                switch (a_used) {
88                case 12:
89                        a5b = MP_DIGIT(a, 11);
90                case 11:
91                        a5a = MP_DIGIT(a, 10);
92                case 10:
93                        a4b = MP_DIGIT(a, 9);
94                case 9:
95                        a4a = MP_DIGIT(a, 8);
96                case 8:
97                        a3b = MP_DIGIT(a, 7);
98                case 7:
99                        a3a = MP_DIGIT(a, 6);
100                }
101
102
103                r2b= MP_DIGIT(a, 5);
104                r2a= MP_DIGIT(a, 4);
105                r1b = MP_DIGIT(a, 3);
106                r1a = MP_DIGIT(a, 2);
107                r0b = MP_DIGIT(a, 1);
108                r0a = MP_DIGIT(a, 0);
109
110                /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
111                MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
112                MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
113                MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
114                MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
115                MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
116                MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
117                r3 = carry; carry = 0;
118                MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
119                MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
120                MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
121                MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
122                MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
123                MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
124                r3 += carry;
125                MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
126                MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
127                MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
128                MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
129                r3 += carry;
130
131                /* reduce out the carry */
132                while (r3) {
133                        MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
134                        MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
135                        MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
136                        MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
137                        MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
138                        MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
139                        r3 = carry;
140                }
141
142                /* check for final reduction */
143                /*
144                 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
145                 * 0xffffffffffffffff. That means we can only be over and need
146                 * one more reduction
147                 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
148                 *     and
149                 *     r1 == 0xffffffffffffffffff   or
150                 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
151                 * In all cases, we subtract the field (or add the 2's
152                 * complement value (1,1,0)).  (r0, r1, r2)
153                 */
154                if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
155                        && (r1b == 0xffffffff) ) &&
156                           ((r1a == 0xffffffff) ||
157                            (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
158                                        (r0b == 0xffffffff)) ) {
159                        /* do a quick subtract */
160                        MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
161                        r0b += carry;
162                        r1a = r1b = r2a = r2b = 0;
163                }
164
165                /* set the lower words of r */
166                if (a != r) {
167                        MP_CHECKOK(s_mp_pad(r, 6));
168                }
169                MP_DIGIT(r, 5) = r2b;
170                MP_DIGIT(r, 4) = r2a;
171                MP_DIGIT(r, 3) = r1b;
172                MP_DIGIT(r, 2) = r1a;
173                MP_DIGIT(r, 1) = r0b;
174                MP_DIGIT(r, 0) = r0a;
175                MP_USED(r) = 6;
176#else
177                switch (a_used) {
178                case 6:
179                        a5 = MP_DIGIT(a, 5);
180                case 5:
181                        a4 = MP_DIGIT(a, 4);
182                case 4:
183                        a3 = MP_DIGIT(a, 3);
184                }
185
186                r2 = MP_DIGIT(a, 2);
187                r1 = MP_DIGIT(a, 1);
188                r0 = MP_DIGIT(a, 0);
189
190                /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
191#ifndef MPI_AMD64_ADD
192                MP_ADD_CARRY_ZERO(r0, a3, r0, carry);
193                MP_ADD_CARRY(r1, a3, r1, carry, carry);
194                MP_ADD_CARRY(r2, a4, r2, carry, carry);
195                r3 = carry;
196                MP_ADD_CARRY_ZERO(r0, a5, r0, carry);
197                MP_ADD_CARRY(r1, a5, r1, carry, carry);
198                MP_ADD_CARRY(r2, a5, r2, carry, carry);
199                r3 += carry;
200                MP_ADD_CARRY_ZERO(r1, a4, r1, carry);
201                MP_ADD_CARRY(r2,  0, r2, carry, carry);
202                r3 += carry;
203
204#else
205                r2 = MP_DIGIT(a, 2);
206                r1 = MP_DIGIT(a, 1);
207                r0 = MP_DIGIT(a, 0);
208
209                /* set the lower words of r */
210                __asm__ (
211                "xorq   %3,%3           \n\t"
212                "addq   %4,%0           \n\t"
213                "adcq   %4,%1           \n\t"
214                "adcq   %5,%2           \n\t"
215                "adcq   $0,%3           \n\t"
216                "addq   %6,%0           \n\t"
217                "adcq   %6,%1           \n\t"
218                "adcq   %6,%2           \n\t"
219                "adcq   $0,%3           \n\t"
220                "addq   %5,%1           \n\t"
221                "adcq   $0,%2           \n\t"
222                "adcq   $0,%3           \n\t"
223                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
224                  "=r"(a4), "=r"(a5)
225                : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
226                  "4" (a3), "5" (a4), "6"(a5)
227                : "%cc" );
228#endif
229
230                /* reduce out the carry */
231                while (r3) {
232#ifndef MPI_AMD64_ADD
233                        MP_ADD_CARRY_ZERO(r0, r3, r0, carry);
234                        MP_ADD_CARRY(r1, r3, r1, carry, carry);
235                        MP_ADD_CARRY(r2,  0, r2, carry, carry);
236                        r3 = carry;
237#else
238                        a3=r3;
239                        __asm__ (
240                        "xorq   %3,%3           \n\t"
241                        "addq   %4,%0           \n\t"
242                        "adcq   %4,%1           \n\t"
243                        "adcq   $0,%2           \n\t"
244                        "adcq   $0,%3           \n\t"
245                        : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
246                        : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
247                        : "%cc" );
248#endif
249                }
250
251                /* check for final reduction */
252                /*
253                 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
254                 * 0xffffffffffffffff. That means we can only be over and need
255                 * one more reduction
256                 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
257                 *     and
258                 *     r1 == 0xffffffffffffffffff   or
259                 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
260                 * In all cases, we subtract the field (or add the 2's
261                 * complement value (1,1,0)).  (r0, r1, r2)
262                 */
263                if (r3 || ((r2 == MP_DIGIT_MAX) &&
264                      ((r1 == MP_DIGIT_MAX) ||
265                        ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
266                        /* do a quick subtract */
267                        r0++;
268                        r1 = r2 = 0;
269                }
270                /* set the lower words of r */
271                if (a != r) {
272                        MP_CHECKOK(s_mp_pad(r, 3));
273                }
274                MP_DIGIT(r, 2) = r2;
275                MP_DIGIT(r, 1) = r1;
276                MP_DIGIT(r, 0) = r0;
277                MP_USED(r) = 3;
278#endif
279        }
280
281  CLEANUP:
282        return res;
283}
284
285#ifndef ECL_THIRTY_TWO_BIT
286/* Compute the sum of 192 bit curves. Do the work in-line since the
287 * number of words are so small, we don't want to overhead of mp function
288 * calls.  Uses optimized modular reduction for p192.
289 */
290mp_err
291ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
292                        const GFMethod *meth)
293{
294        mp_err res = MP_OKAY;
295        mp_digit a0 = 0, a1 = 0, a2 = 0;
296        mp_digit r0 = 0, r1 = 0, r2 = 0;
297        mp_digit carry;
298
299        switch(MP_USED(a)) {
300        case 3:
301                a2 = MP_DIGIT(a,2);
302        case 2:
303                a1 = MP_DIGIT(a,1);
304        case 1:
305                a0 = MP_DIGIT(a,0);
306        }
307        switch(MP_USED(b)) {
308        case 3:
309                r2 = MP_DIGIT(b,2);
310        case 2:
311                r1 = MP_DIGIT(b,1);
312        case 1:
313                r0 = MP_DIGIT(b,0);
314        }
315
316#ifndef MPI_AMD64_ADD
317        MP_ADD_CARRY_ZERO(a0, r0, r0, carry);
318        MP_ADD_CARRY(a1, r1, r1, carry, carry);
319        MP_ADD_CARRY(a2, r2, r2, carry, carry);
320#else
321        __asm__ (
322                "xorq   %3,%3           \n\t"
323                "addq   %4,%0           \n\t"
324                "adcq   %5,%1           \n\t"
325                "adcq   %6,%2           \n\t"
326                "adcq   $0,%3           \n\t"
327                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
328                : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
329                  "1" (r1), "2" (r2)
330                : "%cc" );
331#endif
332
333        /* Do quick 'subract' if we've gone over
334         * (add the 2's complement of the curve field) */
335        if (carry || ((r2 == MP_DIGIT_MAX) &&
336                      ((r1 == MP_DIGIT_MAX) ||
337                        ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
338#ifndef MPI_AMD64_ADD
339                MP_ADD_CARRY_ZERO(r0, 1, r0, carry);
340                MP_ADD_CARRY(r1, 1, r1, carry, carry);
341                MP_ADD_CARRY(r2, 0, r2, carry, carry);
342#else
343                __asm__ (
344                        "addq   $1,%0           \n\t"
345                        "adcq   $1,%1           \n\t"
346                        "adcq   $0,%2           \n\t"
347                        : "=r"(r0), "=r"(r1), "=r"(r2)
348                        : "0" (r0), "1" (r1), "2" (r2)
349                        : "%cc" );
350#endif
351        }
352
353
354        MP_CHECKOK(s_mp_pad(r, 3));
355        MP_DIGIT(r, 2) = r2;
356        MP_DIGIT(r, 1) = r1;
357        MP_DIGIT(r, 0) = r0;
358        MP_SIGN(r) = MP_ZPOS;
359        MP_USED(r) = 3;
360        s_mp_clamp(r);
361
362
363  CLEANUP:
364        return res;
365}
366
367/* Compute the diff of 192 bit curves. Do the work in-line since the
368 * number of words are so small, we don't want to overhead of mp function
369 * calls.  Uses optimized modular reduction for p192.
370 */
371mp_err
372ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
373                        const GFMethod *meth)
374{
375        mp_err res = MP_OKAY;
376        mp_digit b0 = 0, b1 = 0, b2 = 0;
377        mp_digit r0 = 0, r1 = 0, r2 = 0;
378        mp_digit borrow;
379
380        switch(MP_USED(a)) {
381        case 3:
382                r2 = MP_DIGIT(a,2);
383        case 2:
384                r1 = MP_DIGIT(a,1);
385        case 1:
386                r0 = MP_DIGIT(a,0);
387        }
388
389        switch(MP_USED(b)) {
390        case 3:
391                b2 = MP_DIGIT(b,2);
392        case 2:
393                b1 = MP_DIGIT(b,1);
394        case 1:
395                b0 = MP_DIGIT(b,0);
396        }
397
398#ifndef MPI_AMD64_ADD
399        MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
400        MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
401        MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
402#else
403        __asm__ (
404                "xorq   %3,%3           \n\t"
405                "subq   %4,%0           \n\t"
406                "sbbq   %5,%1           \n\t"
407                "sbbq   %6,%2           \n\t"
408                "adcq   $0,%3           \n\t"
409                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
410                : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
411                  "1" (r1), "2" (r2)
412                : "%cc" );
413#endif
414
415        /* Do quick 'add' if we've gone under 0
416         * (subtract the 2's complement of the curve field) */
417        if (borrow) {
418#ifndef MPI_AMD64_ADD
419                MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
420                MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
421                MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
422#else
423                __asm__ (
424                        "subq   $1,%0           \n\t"
425                        "sbbq   $1,%1           \n\t"
426                        "sbbq   $0,%2           \n\t"
427                        : "=r"(r0), "=r"(r1), "=r"(r2)
428                        : "0" (r0), "1" (r1), "2" (r2)
429                        : "%cc" );
430#endif
431        }
432
433        MP_CHECKOK(s_mp_pad(r, 3));
434        MP_DIGIT(r, 2) = r2;
435        MP_DIGIT(r, 1) = r1;
436        MP_DIGIT(r, 0) = r0;
437        MP_SIGN(r) = MP_ZPOS;
438        MP_USED(r) = 3;
439        s_mp_clamp(r);
440
441  CLEANUP:
442        return res;
443}
444
445#endif
446
447/* Compute the square of polynomial a, reduce modulo p192. Store the
448 * result in r.  r could be a.  Uses optimized modular reduction for p192.
449 */
450mp_err
451ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
452{
453        mp_err res = MP_OKAY;
454
455        MP_CHECKOK(mp_sqr(a, r));
456        MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
457  CLEANUP:
458        return res;
459}
460
461/* Compute the product of two polynomials a and b, reduce modulo p192.
462 * Store the result in r.  r could be a or b; a could be b.  Uses
463 * optimized modular reduction for p192. */
464mp_err
465ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
466                                        const GFMethod *meth)
467{
468        mp_err res = MP_OKAY;
469
470        MP_CHECKOK(mp_mul(a, b, r));
471        MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
472  CLEANUP:
473        return res;
474}
475
476/* Divides two field elements. If a is NULL, then returns the inverse of
477 * b. */
478mp_err
479ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
480                   const GFMethod *meth)
481{
482        mp_err res = MP_OKAY;
483        mp_int t;
484
485        /* If a is NULL, then return the inverse of b, otherwise return a/b. */
486        if (a == NULL) {
487                return  mp_invmod(b, &meth->irr, r);
488        } else {
489                /* MPI doesn't support divmod, so we implement it using invmod and
490                 * mulmod. */
491                MP_CHECKOK(mp_init(&t, FLAG(b)));
492                MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
493                MP_CHECKOK(mp_mul(a, &t, r));
494                MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
495          CLEANUP:
496                mp_clear(&t);
497                return res;
498        }
499}
500
501/* Wire in fast field arithmetic and precomputation of base point for
502 * named curves. */
503mp_err
504ec_group_set_gfp192(ECGroup *group, ECCurveName name)
505{
506        if (name == ECCurve_NIST_P192) {
507                group->meth->field_mod = &ec_GFp_nistp192_mod;
508                group->meth->field_mul = &ec_GFp_nistp192_mul;
509                group->meth->field_sqr = &ec_GFp_nistp192_sqr;
510                group->meth->field_div = &ec_GFp_nistp192_div;
511#ifndef ECL_THIRTY_TWO_BIT
512                group->meth->field_add = &ec_GFp_nistp192_add;
513                group->meth->field_sub = &ec_GFp_nistp192_sub;
514#endif
515        }
516        return MP_OKAY;
517}
518