1/* 2 * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved. 3 * Use is subject to license terms. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with this library; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/* ********************************************************************* 25 * 26 * The Original Code is the elliptic curve math library for prime field curves. 27 * 28 * The Initial Developer of the Original Code is 29 * Sun Microsystems, Inc. 30 * Portions created by the Initial Developer are Copyright (C) 2003 31 * the Initial Developer. All Rights Reserved. 32 * 33 * Contributor(s): 34 * Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories 35 * 36 *********************************************************************** */ 37 38#include "ecp.h" 39#include "mpi.h" 40#include "mplogic.h" 41#include "mpi-priv.h" 42#ifndef _KERNEL 43#include <stdlib.h> 44#endif 45 46#define ECP192_DIGITS ECL_CURVE_DIGITS(192) 47 48/* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses 49 * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software 50 * Implementation of the NIST Elliptic Curves over Prime Fields. */ 51mp_err 52ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 53{ 54 mp_err res = MP_OKAY; 55 mp_size a_used = MP_USED(a); 56 mp_digit r3; 57#ifndef MPI_AMD64_ADD 58 mp_digit carry; 59#endif 60#ifdef ECL_THIRTY_TWO_BIT 61 mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0; 62 mp_digit r0a, r0b, r1a, r1b, r2a, r2b; 63#else 64 mp_digit a5 = 0, a4 = 0, a3 = 0; 65 mp_digit r0, r1, r2; 66#endif 67 68 /* reduction not needed if a is not larger than field size */ 69 if (a_used < ECP192_DIGITS) { 70 if (a == r) { 71 return MP_OKAY; 72 } 73 return mp_copy(a, r); 74 } 75 76 /* for polynomials larger than twice the field size, use regular 77 * reduction */ 78 if (a_used > ECP192_DIGITS*2) { 79 MP_CHECKOK(mp_mod(a, &meth->irr, r)); 80 } else { 81 /* copy out upper words of a */ 82 83#ifdef ECL_THIRTY_TWO_BIT 84 85 /* in all the math below, 86 * nXb is most signifiant, nXa is least significant */ 87 switch (a_used) { 88 case 12: 89 a5b = MP_DIGIT(a, 11); 90 case 11: 91 a5a = MP_DIGIT(a, 10); 92 case 10: 93 a4b = MP_DIGIT(a, 9); 94 case 9: 95 a4a = MP_DIGIT(a, 8); 96 case 8: 97 a3b = MP_DIGIT(a, 7); 98 case 7: 99 a3a = MP_DIGIT(a, 6); 100 } 101 102 103 r2b= MP_DIGIT(a, 5); 104 r2a= MP_DIGIT(a, 4); 105 r1b = MP_DIGIT(a, 3); 106 r1a = MP_DIGIT(a, 2); 107 r0b = MP_DIGIT(a, 1); 108 r0a = MP_DIGIT(a, 0); 109 110 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 111 MP_ADD_CARRY(r0a, a3a, r0a, 0, carry); 112 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry); 113 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry); 114 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry); 115 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry); 116 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry); 117 r3 = carry; carry = 0; 118 MP_ADD_CARRY(r0a, a5a, r0a, 0, carry); 119 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry); 120 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry); 121 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry); 122 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry); 123 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry); 124 r3 += carry; 125 MP_ADD_CARRY(r1a, a4a, r1a, 0, carry); 126 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry); 127 MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 128 MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 129 r3 += carry; 130 131 /* reduce out the carry */ 132 while (r3) { 133 MP_ADD_CARRY(r0a, r3, r0a, 0, carry); 134 MP_ADD_CARRY(r0b, 0, r0b, carry, carry); 135 MP_ADD_CARRY(r1a, r3, r1a, carry, carry); 136 MP_ADD_CARRY(r1b, 0, r1b, carry, carry); 137 MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 138 MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 139 r3 = carry; 140 } 141 142 /* check for final reduction */ 143 /* 144 * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 145 * 0xffffffffffffffff. That means we can only be over and need 146 * one more reduction 147 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 148 * and 149 * r1 == 0xffffffffffffffffff or 150 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 151 * In all cases, we subtract the field (or add the 2's 152 * complement value (1,1,0)). (r0, r1, r2) 153 */ 154 if (((r2b == 0xffffffff) && (r2a == 0xffffffff) 155 && (r1b == 0xffffffff) ) && 156 ((r1a == 0xffffffff) || 157 (r1a == 0xfffffffe) && (r0a == 0xffffffff) && 158 (r0b == 0xffffffff)) ) { 159 /* do a quick subtract */ 160 MP_ADD_CARRY(r0a, 1, r0a, 0, carry); 161 r0b += carry; 162 r1a = r1b = r2a = r2b = 0; 163 } 164 165 /* set the lower words of r */ 166 if (a != r) { 167 MP_CHECKOK(s_mp_pad(r, 6)); 168 } 169 MP_DIGIT(r, 5) = r2b; 170 MP_DIGIT(r, 4) = r2a; 171 MP_DIGIT(r, 3) = r1b; 172 MP_DIGIT(r, 2) = r1a; 173 MP_DIGIT(r, 1) = r0b; 174 MP_DIGIT(r, 0) = r0a; 175 MP_USED(r) = 6; 176#else 177 switch (a_used) { 178 case 6: 179 a5 = MP_DIGIT(a, 5); 180 case 5: 181 a4 = MP_DIGIT(a, 4); 182 case 4: 183 a3 = MP_DIGIT(a, 3); 184 } 185 186 r2 = MP_DIGIT(a, 2); 187 r1 = MP_DIGIT(a, 1); 188 r0 = MP_DIGIT(a, 0); 189 190 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 191#ifndef MPI_AMD64_ADD 192 MP_ADD_CARRY_ZERO(r0, a3, r0, carry); 193 MP_ADD_CARRY(r1, a3, r1, carry, carry); 194 MP_ADD_CARRY(r2, a4, r2, carry, carry); 195 r3 = carry; 196 MP_ADD_CARRY_ZERO(r0, a5, r0, carry); 197 MP_ADD_CARRY(r1, a5, r1, carry, carry); 198 MP_ADD_CARRY(r2, a5, r2, carry, carry); 199 r3 += carry; 200 MP_ADD_CARRY_ZERO(r1, a4, r1, carry); 201 MP_ADD_CARRY(r2, 0, r2, carry, carry); 202 r3 += carry; 203 204#else 205 r2 = MP_DIGIT(a, 2); 206 r1 = MP_DIGIT(a, 1); 207 r0 = MP_DIGIT(a, 0); 208 209 /* set the lower words of r */ 210 __asm__ ( 211 "xorq %3,%3 \n\t" 212 "addq %4,%0 \n\t" 213 "adcq %4,%1 \n\t" 214 "adcq %5,%2 \n\t" 215 "adcq $0,%3 \n\t" 216 "addq %6,%0 \n\t" 217 "adcq %6,%1 \n\t" 218 "adcq %6,%2 \n\t" 219 "adcq $0,%3 \n\t" 220 "addq %5,%1 \n\t" 221 "adcq $0,%2 \n\t" 222 "adcq $0,%3 \n\t" 223 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), 224 "=r"(a4), "=r"(a5) 225 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), 226 "4" (a3), "5" (a4), "6"(a5) 227 : "%cc" ); 228#endif 229 230 /* reduce out the carry */ 231 while (r3) { 232#ifndef MPI_AMD64_ADD 233 MP_ADD_CARRY_ZERO(r0, r3, r0, carry); 234 MP_ADD_CARRY(r1, r3, r1, carry, carry); 235 MP_ADD_CARRY(r2, 0, r2, carry, carry); 236 r3 = carry; 237#else 238 a3=r3; 239 __asm__ ( 240 "xorq %3,%3 \n\t" 241 "addq %4,%0 \n\t" 242 "adcq %4,%1 \n\t" 243 "adcq $0,%2 \n\t" 244 "adcq $0,%3 \n\t" 245 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3) 246 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3) 247 : "%cc" ); 248#endif 249 } 250 251 /* check for final reduction */ 252 /* 253 * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 254 * 0xffffffffffffffff. That means we can only be over and need 255 * one more reduction 256 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 257 * and 258 * r1 == 0xffffffffffffffffff or 259 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 260 * In all cases, we subtract the field (or add the 2's 261 * complement value (1,1,0)). (r0, r1, r2) 262 */ 263 if (r3 || ((r2 == MP_DIGIT_MAX) && 264 ((r1 == MP_DIGIT_MAX) || 265 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 266 /* do a quick subtract */ 267 r0++; 268 r1 = r2 = 0; 269 } 270 /* set the lower words of r */ 271 if (a != r) { 272 MP_CHECKOK(s_mp_pad(r, 3)); 273 } 274 MP_DIGIT(r, 2) = r2; 275 MP_DIGIT(r, 1) = r1; 276 MP_DIGIT(r, 0) = r0; 277 MP_USED(r) = 3; 278#endif 279 } 280 281 CLEANUP: 282 return res; 283} 284 285#ifndef ECL_THIRTY_TWO_BIT 286/* Compute the sum of 192 bit curves. Do the work in-line since the 287 * number of words are so small, we don't want to overhead of mp function 288 * calls. Uses optimized modular reduction for p192. 289 */ 290mp_err 291ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, 292 const GFMethod *meth) 293{ 294 mp_err res = MP_OKAY; 295 mp_digit a0 = 0, a1 = 0, a2 = 0; 296 mp_digit r0 = 0, r1 = 0, r2 = 0; 297 mp_digit carry; 298 299 switch(MP_USED(a)) { 300 case 3: 301 a2 = MP_DIGIT(a,2); 302 case 2: 303 a1 = MP_DIGIT(a,1); 304 case 1: 305 a0 = MP_DIGIT(a,0); 306 } 307 switch(MP_USED(b)) { 308 case 3: 309 r2 = MP_DIGIT(b,2); 310 case 2: 311 r1 = MP_DIGIT(b,1); 312 case 1: 313 r0 = MP_DIGIT(b,0); 314 } 315 316#ifndef MPI_AMD64_ADD 317 MP_ADD_CARRY_ZERO(a0, r0, r0, carry); 318 MP_ADD_CARRY(a1, r1, r1, carry, carry); 319 MP_ADD_CARRY(a2, r2, r2, carry, carry); 320#else 321 __asm__ ( 322 "xorq %3,%3 \n\t" 323 "addq %4,%0 \n\t" 324 "adcq %5,%1 \n\t" 325 "adcq %6,%2 \n\t" 326 "adcq $0,%3 \n\t" 327 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) 328 : "r" (a0), "r" (a1), "r" (a2), "0" (r0), 329 "1" (r1), "2" (r2) 330 : "%cc" ); 331#endif 332 333 /* Do quick 'subract' if we've gone over 334 * (add the 2's complement of the curve field) */ 335 if (carry || ((r2 == MP_DIGIT_MAX) && 336 ((r1 == MP_DIGIT_MAX) || 337 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 338#ifndef MPI_AMD64_ADD 339 MP_ADD_CARRY_ZERO(r0, 1, r0, carry); 340 MP_ADD_CARRY(r1, 1, r1, carry, carry); 341 MP_ADD_CARRY(r2, 0, r2, carry, carry); 342#else 343 __asm__ ( 344 "addq $1,%0 \n\t" 345 "adcq $1,%1 \n\t" 346 "adcq $0,%2 \n\t" 347 : "=r"(r0), "=r"(r1), "=r"(r2) 348 : "0" (r0), "1" (r1), "2" (r2) 349 : "%cc" ); 350#endif 351 } 352 353 354 MP_CHECKOK(s_mp_pad(r, 3)); 355 MP_DIGIT(r, 2) = r2; 356 MP_DIGIT(r, 1) = r1; 357 MP_DIGIT(r, 0) = r0; 358 MP_SIGN(r) = MP_ZPOS; 359 MP_USED(r) = 3; 360 s_mp_clamp(r); 361 362 363 CLEANUP: 364 return res; 365} 366 367/* Compute the diff of 192 bit curves. Do the work in-line since the 368 * number of words are so small, we don't want to overhead of mp function 369 * calls. Uses optimized modular reduction for p192. 370 */ 371mp_err 372ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, 373 const GFMethod *meth) 374{ 375 mp_err res = MP_OKAY; 376 mp_digit b0 = 0, b1 = 0, b2 = 0; 377 mp_digit r0 = 0, r1 = 0, r2 = 0; 378 mp_digit borrow; 379 380 switch(MP_USED(a)) { 381 case 3: 382 r2 = MP_DIGIT(a,2); 383 case 2: 384 r1 = MP_DIGIT(a,1); 385 case 1: 386 r0 = MP_DIGIT(a,0); 387 } 388 389 switch(MP_USED(b)) { 390 case 3: 391 b2 = MP_DIGIT(b,2); 392 case 2: 393 b1 = MP_DIGIT(b,1); 394 case 1: 395 b0 = MP_DIGIT(b,0); 396 } 397 398#ifndef MPI_AMD64_ADD 399 MP_SUB_BORROW(r0, b0, r0, 0, borrow); 400 MP_SUB_BORROW(r1, b1, r1, borrow, borrow); 401 MP_SUB_BORROW(r2, b2, r2, borrow, borrow); 402#else 403 __asm__ ( 404 "xorq %3,%3 \n\t" 405 "subq %4,%0 \n\t" 406 "sbbq %5,%1 \n\t" 407 "sbbq %6,%2 \n\t" 408 "adcq $0,%3 \n\t" 409 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) 410 : "r" (b0), "r" (b1), "r" (b2), "0" (r0), 411 "1" (r1), "2" (r2) 412 : "%cc" ); 413#endif 414 415 /* Do quick 'add' if we've gone under 0 416 * (subtract the 2's complement of the curve field) */ 417 if (borrow) { 418#ifndef MPI_AMD64_ADD 419 MP_SUB_BORROW(r0, 1, r0, 0, borrow); 420 MP_SUB_BORROW(r1, 1, r1, borrow, borrow); 421 MP_SUB_BORROW(r2, 0, r2, borrow, borrow); 422#else 423 __asm__ ( 424 "subq $1,%0 \n\t" 425 "sbbq $1,%1 \n\t" 426 "sbbq $0,%2 \n\t" 427 : "=r"(r0), "=r"(r1), "=r"(r2) 428 : "0" (r0), "1" (r1), "2" (r2) 429 : "%cc" ); 430#endif 431 } 432 433 MP_CHECKOK(s_mp_pad(r, 3)); 434 MP_DIGIT(r, 2) = r2; 435 MP_DIGIT(r, 1) = r1; 436 MP_DIGIT(r, 0) = r0; 437 MP_SIGN(r) = MP_ZPOS; 438 MP_USED(r) = 3; 439 s_mp_clamp(r); 440 441 CLEANUP: 442 return res; 443} 444 445#endif 446 447/* Compute the square of polynomial a, reduce modulo p192. Store the 448 * result in r. r could be a. Uses optimized modular reduction for p192. 449 */ 450mp_err 451ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 452{ 453 mp_err res = MP_OKAY; 454 455 MP_CHECKOK(mp_sqr(a, r)); 456 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 457 CLEANUP: 458 return res; 459} 460 461/* Compute the product of two polynomials a and b, reduce modulo p192. 462 * Store the result in r. r could be a or b; a could be b. Uses 463 * optimized modular reduction for p192. */ 464mp_err 465ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r, 466 const GFMethod *meth) 467{ 468 mp_err res = MP_OKAY; 469 470 MP_CHECKOK(mp_mul(a, b, r)); 471 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 472 CLEANUP: 473 return res; 474} 475 476/* Divides two field elements. If a is NULL, then returns the inverse of 477 * b. */ 478mp_err 479ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r, 480 const GFMethod *meth) 481{ 482 mp_err res = MP_OKAY; 483 mp_int t; 484 485 /* If a is NULL, then return the inverse of b, otherwise return a/b. */ 486 if (a == NULL) { 487 return mp_invmod(b, &meth->irr, r); 488 } else { 489 /* MPI doesn't support divmod, so we implement it using invmod and 490 * mulmod. */ 491 MP_CHECKOK(mp_init(&t, FLAG(b))); 492 MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); 493 MP_CHECKOK(mp_mul(a, &t, r)); 494 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 495 CLEANUP: 496 mp_clear(&t); 497 return res; 498 } 499} 500 501/* Wire in fast field arithmetic and precomputation of base point for 502 * named curves. */ 503mp_err 504ec_group_set_gfp192(ECGroup *group, ECCurveName name) 505{ 506 if (name == ECCurve_NIST_P192) { 507 group->meth->field_mod = &ec_GFp_nistp192_mod; 508 group->meth->field_mul = &ec_GFp_nistp192_mul; 509 group->meth->field_sqr = &ec_GFp_nistp192_sqr; 510 group->meth->field_div = &ec_GFp_nistp192_div; 511#ifndef ECL_THIRTY_TWO_BIT 512 group->meth->field_add = &ec_GFp_nistp192_add; 513 group->meth->field_sub = &ec_GFp_nistp192_sub; 514#endif 515 } 516 return MP_OKAY; 517} 518