bn_mul.c revision 1.30
1/* $OpenBSD: bn_mul.c,v 1.30 2023/01/23 12:17:57 jsing Exp $ */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59#include <assert.h> 60#include <stdio.h> 61#include <string.h> 62 63#include <openssl/opensslconf.h> 64 65#include "bn_arch.h" 66#include "bn_local.h" 67 68#ifndef HAVE_BN_MUL_ADD_WORDS 69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) 70 71BN_ULONG 72bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 73{ 74 BN_ULONG c1 = 0; 75 76 assert(num >= 0); 77 if (num <= 0) 78 return (c1); 79 80#ifndef OPENSSL_SMALL_FOOTPRINT 81 while (num & ~3) { 82 mul_add(rp[0], ap[0], w, c1); 83 mul_add(rp[1], ap[1], w, c1); 84 mul_add(rp[2], ap[2], w, c1); 85 mul_add(rp[3], ap[3], w, c1); 86 ap += 4; 87 rp += 4; 88 num -= 4; 89 } 90#endif 91 while (num) { 92 mul_add(rp[0], ap[0], w, c1); 93 ap++; 94 rp++; 95 num--; 96 } 97 98 return (c1); 99} 100 101#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 102 103BN_ULONG 104bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 105{ 106 BN_ULONG c = 0; 107 BN_ULONG bl, bh; 108 109 assert(num >= 0); 110 if (num <= 0) 111 return ((BN_ULONG)0); 112 113 bl = LBITS(w); 114 bh = HBITS(w); 115 116#ifndef OPENSSL_SMALL_FOOTPRINT 117 while (num & ~3) { 118 mul_add(rp[0], ap[0], bl, bh, c); 119 mul_add(rp[1], ap[1], bl, bh, c); 120 mul_add(rp[2], ap[2], bl, bh, c); 121 mul_add(rp[3], ap[3], bl, bh, c); 122 ap += 4; 123 rp += 4; 124 num -= 4; 125 } 126#endif 127 while (num) { 128 mul_add(rp[0], ap[0], bl, bh, c); 129 ap++; 130 rp++; 131 num--; 132 } 133 return (c); 134} 135 136#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 137#endif 138 139#ifndef HAVE_BN_MUL_COMBA4 140void 141bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 142{ 143 BN_ULONG c1, c2, c3; 144 145 c1 = 0; 146 c2 = 0; 147 c3 = 0; 148 mul_add_c(a[0], b[0], c1, c2, c3); 149 r[0] = c1; 150 c1 = 0; 151 mul_add_c(a[0], b[1], c2, c3, c1); 152 mul_add_c(a[1], b[0], c2, c3, c1); 153 r[1] = c2; 154 c2 = 0; 155 mul_add_c(a[2], b[0], c3, c1, c2); 156 mul_add_c(a[1], b[1], c3, c1, c2); 157 mul_add_c(a[0], b[2], c3, c1, c2); 158 r[2] = c3; 159 c3 = 0; 160 mul_add_c(a[0], b[3], c1, c2, c3); 161 mul_add_c(a[1], b[2], c1, c2, c3); 162 mul_add_c(a[2], b[1], c1, c2, c3); 163 mul_add_c(a[3], b[0], c1, c2, c3); 164 r[3] = c1; 165 c1 = 0; 166 mul_add_c(a[3], b[1], c2, c3, c1); 167 mul_add_c(a[2], b[2], c2, c3, c1); 168 mul_add_c(a[1], b[3], c2, c3, c1); 169 r[4] = c2; 170 c2 = 0; 171 mul_add_c(a[2], b[3], c3, c1, c2); 172 mul_add_c(a[3], b[2], c3, c1, c2); 173 r[5] = c3; 174 c3 = 0; 175 mul_add_c(a[3], b[3], c1, c2, c3); 176 r[6] = c1; 177 r[7] = c2; 178} 179#endif 180 181#ifndef HAVE_BN_MUL_COMBA8 182void 183bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 184{ 185 BN_ULONG c1, c2, c3; 186 187 c1 = 0; 188 c2 = 0; 189 c3 = 0; 190 mul_add_c(a[0], b[0], c1, c2, c3); 191 r[0] = c1; 192 c1 = 0; 193 mul_add_c(a[0], b[1], c2, c3, c1); 194 mul_add_c(a[1], b[0], c2, c3, c1); 195 r[1] = c2; 196 c2 = 0; 197 mul_add_c(a[2], b[0], c3, c1, c2); 198 mul_add_c(a[1], b[1], c3, c1, c2); 199 mul_add_c(a[0], b[2], c3, c1, c2); 200 r[2] = c3; 201 c3 = 0; 202 mul_add_c(a[0], b[3], c1, c2, c3); 203 mul_add_c(a[1], b[2], c1, c2, c3); 204 mul_add_c(a[2], b[1], c1, c2, c3); 205 mul_add_c(a[3], b[0], c1, c2, c3); 206 r[3] = c1; 207 c1 = 0; 208 mul_add_c(a[4], b[0], c2, c3, c1); 209 mul_add_c(a[3], b[1], c2, c3, c1); 210 mul_add_c(a[2], b[2], c2, c3, c1); 211 mul_add_c(a[1], b[3], c2, c3, c1); 212 mul_add_c(a[0], b[4], c2, c3, c1); 213 r[4] = c2; 214 c2 = 0; 215 mul_add_c(a[0], b[5], c3, c1, c2); 216 mul_add_c(a[1], b[4], c3, c1, c2); 217 mul_add_c(a[2], b[3], c3, c1, c2); 218 mul_add_c(a[3], b[2], c3, c1, c2); 219 mul_add_c(a[4], b[1], c3, c1, c2); 220 mul_add_c(a[5], b[0], c3, c1, c2); 221 r[5] = c3; 222 c3 = 0; 223 mul_add_c(a[6], b[0], c1, c2, c3); 224 mul_add_c(a[5], b[1], c1, c2, c3); 225 mul_add_c(a[4], b[2], c1, c2, c3); 226 mul_add_c(a[3], b[3], c1, c2, c3); 227 mul_add_c(a[2], b[4], c1, c2, c3); 228 mul_add_c(a[1], b[5], c1, c2, c3); 229 mul_add_c(a[0], b[6], c1, c2, c3); 230 r[6] = c1; 231 c1 = 0; 232 mul_add_c(a[0], b[7], c2, c3, c1); 233 mul_add_c(a[1], b[6], c2, c3, c1); 234 mul_add_c(a[2], b[5], c2, c3, c1); 235 mul_add_c(a[3], b[4], c2, c3, c1); 236 mul_add_c(a[4], b[3], c2, c3, c1); 237 mul_add_c(a[5], b[2], c2, c3, c1); 238 mul_add_c(a[6], b[1], c2, c3, c1); 239 mul_add_c(a[7], b[0], c2, c3, c1); 240 r[7] = c2; 241 c2 = 0; 242 mul_add_c(a[7], b[1], c3, c1, c2); 243 mul_add_c(a[6], b[2], c3, c1, c2); 244 mul_add_c(a[5], b[3], c3, c1, c2); 245 mul_add_c(a[4], b[4], c3, c1, c2); 246 mul_add_c(a[3], b[5], c3, c1, c2); 247 mul_add_c(a[2], b[6], c3, c1, c2); 248 mul_add_c(a[1], b[7], c3, c1, c2); 249 r[8] = c3; 250 c3 = 0; 251 mul_add_c(a[2], b[7], c1, c2, c3); 252 mul_add_c(a[3], b[6], c1, c2, c3); 253 mul_add_c(a[4], b[5], c1, c2, c3); 254 mul_add_c(a[5], b[4], c1, c2, c3); 255 mul_add_c(a[6], b[3], c1, c2, c3); 256 mul_add_c(a[7], b[2], c1, c2, c3); 257 r[9] = c1; 258 c1 = 0; 259 mul_add_c(a[7], b[3], c2, c3, c1); 260 mul_add_c(a[6], b[4], c2, c3, c1); 261 mul_add_c(a[5], b[5], c2, c3, c1); 262 mul_add_c(a[4], b[6], c2, c3, c1); 263 mul_add_c(a[3], b[7], c2, c3, c1); 264 r[10] = c2; 265 c2 = 0; 266 mul_add_c(a[4], b[7], c3, c1, c2); 267 mul_add_c(a[5], b[6], c3, c1, c2); 268 mul_add_c(a[6], b[5], c3, c1, c2); 269 mul_add_c(a[7], b[4], c3, c1, c2); 270 r[11] = c3; 271 c3 = 0; 272 mul_add_c(a[7], b[5], c1, c2, c3); 273 mul_add_c(a[6], b[6], c1, c2, c3); 274 mul_add_c(a[5], b[7], c1, c2, c3); 275 r[12] = c1; 276 c1 = 0; 277 mul_add_c(a[6], b[7], c2, c3, c1); 278 mul_add_c(a[7], b[6], c2, c3, c1); 279 r[13] = c2; 280 c2 = 0; 281 mul_add_c(a[7], b[7], c3, c1, c2); 282 r[14] = c3; 283 r[15] = c1; 284} 285#endif 286 287#ifndef HAVE_BN_MUL_WORDS 288#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) 289 290BN_ULONG 291bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 292{ 293 BN_ULONG c1 = 0; 294 295 assert(num >= 0); 296 if (num <= 0) 297 return (c1); 298 299#ifndef OPENSSL_SMALL_FOOTPRINT 300 while (num & ~3) { 301 mul(rp[0], ap[0], w, c1); 302 mul(rp[1], ap[1], w, c1); 303 mul(rp[2], ap[2], w, c1); 304 mul(rp[3], ap[3], w, c1); 305 ap += 4; 306 rp += 4; 307 num -= 4; 308 } 309#endif 310 while (num) { 311 mul(rp[0], ap[0], w, c1); 312 ap++; 313 rp++; 314 num--; 315 } 316 return (c1); 317} 318#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 319 320BN_ULONG 321bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 322{ 323 BN_ULONG carry = 0; 324 BN_ULONG bl, bh; 325 326 assert(num >= 0); 327 if (num <= 0) 328 return ((BN_ULONG)0); 329 330 bl = LBITS(w); 331 bh = HBITS(w); 332 333#ifndef OPENSSL_SMALL_FOOTPRINT 334 while (num & ~3) { 335 mul(rp[0], ap[0], bl, bh, carry); 336 mul(rp[1], ap[1], bl, bh, carry); 337 mul(rp[2], ap[2], bl, bh, carry); 338 mul(rp[3], ap[3], bl, bh, carry); 339 ap += 4; 340 rp += 4; 341 num -= 4; 342 } 343#endif 344 while (num) { 345 mul(rp[0], ap[0], bl, bh, carry); 346 ap++; 347 rp++; 348 num--; 349 } 350 return (carry); 351} 352#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 353#endif 354 355#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) 356/* 357 * Here follows a specialised variant of bn_sub_words(), which has the property 358 * performing operations on arrays of different sizes. The sizes of those arrays 359 * is expressed through cl, which is the common length (basically, 360 * min(len(a),len(b))), and dl, which is the delta between the two lengths, 361 * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the 362 * operations that require a result array as parameter, it must have the length 363 * cl+abs(dl). 364 */ 365BN_ULONG 366bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, 367 int dl) 368{ 369 BN_ULONG c, t; 370 371 assert(cl >= 0); 372 c = bn_sub_words(r, a, b, cl); 373 374 if (dl == 0) 375 return c; 376 377 r += cl; 378 a += cl; 379 b += cl; 380 381 if (dl < 0) { 382 for (;;) { 383 t = b[0]; 384 r[0] = (0 - t - c) & BN_MASK2; 385 if (t != 0) 386 c = 1; 387 if (++dl >= 0) 388 break; 389 390 t = b[1]; 391 r[1] = (0 - t - c) & BN_MASK2; 392 if (t != 0) 393 c = 1; 394 if (++dl >= 0) 395 break; 396 397 t = b[2]; 398 r[2] = (0 - t - c) & BN_MASK2; 399 if (t != 0) 400 c = 1; 401 if (++dl >= 0) 402 break; 403 404 t = b[3]; 405 r[3] = (0 - t - c) & BN_MASK2; 406 if (t != 0) 407 c = 1; 408 if (++dl >= 0) 409 break; 410 411 b += 4; 412 r += 4; 413 } 414 } else { 415 int save_dl = dl; 416 while (c) { 417 t = a[0]; 418 r[0] = (t - c) & BN_MASK2; 419 if (t != 0) 420 c = 0; 421 if (--dl <= 0) 422 break; 423 424 t = a[1]; 425 r[1] = (t - c) & BN_MASK2; 426 if (t != 0) 427 c = 0; 428 if (--dl <= 0) 429 break; 430 431 t = a[2]; 432 r[2] = (t - c) & BN_MASK2; 433 if (t != 0) 434 c = 0; 435 if (--dl <= 0) 436 break; 437 438 t = a[3]; 439 r[3] = (t - c) & BN_MASK2; 440 if (t != 0) 441 c = 0; 442 if (--dl <= 0) 443 break; 444 445 save_dl = dl; 446 a += 4; 447 r += 4; 448 } 449 if (dl > 0) { 450 if (save_dl > dl) { 451 switch (save_dl - dl) { 452 case 1: 453 r[1] = a[1]; 454 if (--dl <= 0) 455 break; 456 case 2: 457 r[2] = a[2]; 458 if (--dl <= 0) 459 break; 460 case 3: 461 r[3] = a[3]; 462 if (--dl <= 0) 463 break; 464 } 465 a += 4; 466 r += 4; 467 } 468 } 469 if (dl > 0) { 470 for (;;) { 471 r[0] = a[0]; 472 if (--dl <= 0) 473 break; 474 r[1] = a[1]; 475 if (--dl <= 0) 476 break; 477 r[2] = a[2]; 478 if (--dl <= 0) 479 break; 480 r[3] = a[3]; 481 if (--dl <= 0) 482 break; 483 484 a += 4; 485 r += 4; 486 } 487 } 488 } 489 return c; 490} 491#endif 492 493void 494bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) 495{ 496 BN_ULONG *rr; 497 498 499 if (na < nb) { 500 int itmp; 501 BN_ULONG *ltmp; 502 503 itmp = na; 504 na = nb; 505 nb = itmp; 506 ltmp = a; 507 a = b; 508 b = ltmp; 509 510 } 511 rr = &(r[na]); 512 if (nb <= 0) { 513 (void)bn_mul_words(r, a, na, 0); 514 return; 515 } else 516 rr[0] = bn_mul_words(r, a, na, b[0]); 517 518 for (;;) { 519 if (--nb <= 0) 520 return; 521 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); 522 if (--nb <= 0) 523 return; 524 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); 525 if (--nb <= 0) 526 return; 527 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); 528 if (--nb <= 0) 529 return; 530 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); 531 rr += 4; 532 r += 4; 533 b += 4; 534 } 535} 536 537#ifdef BN_RECURSION 538/* Karatsuba recursive multiplication algorithm 539 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ 540 541/* r is 2*n2 words in size, 542 * a and b are both n2 words in size. 543 * n2 must be a power of 2. 544 * We multiply and return the result. 545 * t must be 2*n2 words in size 546 * We calculate 547 * a[0]*b[0] 548 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 549 * a[1]*b[1] 550 */ 551/* dnX may not be positive, but n2/2+dnX has to be */ 552void 553bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, 554 int dnb, BN_ULONG *t) 555{ 556 int n = n2 / 2, c1, c2; 557 int tna = n + dna, tnb = n + dnb; 558 unsigned int neg, zero; 559 BN_ULONG ln, lo, *p; 560 561# ifdef BN_MUL_COMBA 562# if 0 563 if (n2 == 4) { 564 bn_mul_comba4(r, a, b); 565 return; 566 } 567# endif 568 /* Only call bn_mul_comba 8 if n2 == 8 and the 569 * two arrays are complete [steve] 570 */ 571 if (n2 == 8 && dna == 0 && dnb == 0) { 572 bn_mul_comba8(r, a, b); 573 return; 574 } 575# endif /* BN_MUL_COMBA */ 576 /* Else do normal multiply */ 577 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { 578 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); 579 if ((dna + dnb) < 0) 580 memset(&r[2*n2 + dna + dnb], 0, 581 sizeof(BN_ULONG) * -(dna + dnb)); 582 return; 583 } 584 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 585 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 586 c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n); 587 zero = neg = 0; 588 switch (c1 * 3 + c2) { 589 case -4: 590 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 591 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 592 break; 593 case -3: 594 zero = 1; 595 break; 596 case -2: 597 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 598 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 599 neg = 1; 600 break; 601 case -1: 602 case 0: 603 case 1: 604 zero = 1; 605 break; 606 case 2: 607 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 608 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 609 neg = 1; 610 break; 611 case 3: 612 zero = 1; 613 break; 614 case 4: 615 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 616 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 617 break; 618 } 619 620# ifdef BN_MUL_COMBA 621 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take 622 extra args to do this well */ 623 { 624 if (!zero) 625 bn_mul_comba4(&(t[n2]), t, &(t[n])); 626 else 627 memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); 628 629 bn_mul_comba4(r, a, b); 630 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); 631 } else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could 632 take extra args to do this 633 well */ 634 { 635 if (!zero) 636 bn_mul_comba8(&(t[n2]), t, &(t[n])); 637 else 638 memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); 639 640 bn_mul_comba8(r, a, b); 641 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); 642 } else 643# endif /* BN_MUL_COMBA */ 644 { 645 p = &(t[n2 * 2]); 646 if (!zero) 647 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 648 else 649 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); 650 bn_mul_recursive(r, a, b, n, 0, 0, p); 651 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); 652 } 653 654 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 655 * r[10] holds (a[0]*b[0]) 656 * r[32] holds (b[1]*b[1]) 657 */ 658 659 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 660 661 if (neg) /* if t[32] is negative */ 662 { 663 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 664 } else { 665 /* Might have a carry */ 666 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 667 } 668 669 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 670 * r[10] holds (a[0]*b[0]) 671 * r[32] holds (b[1]*b[1]) 672 * c1 holds the carry bits 673 */ 674 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 675 if (c1) { 676 p = &(r[n + n2]); 677 lo= *p; 678 ln = (lo + c1) & BN_MASK2; 679 *p = ln; 680 681 /* The overflow will stop before we over write 682 * words we should not overwrite */ 683 if (ln < (BN_ULONG)c1) { 684 do { 685 p++; 686 lo= *p; 687 ln = (lo + 1) & BN_MASK2; 688 *p = ln; 689 } while (ln == 0); 690 } 691 } 692} 693 694/* n+tn is the word length 695 * t needs to be n*4 is size, as does r */ 696/* tnX may not be negative but less than n */ 697void 698bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, 699 int tnb, BN_ULONG *t) 700{ 701 int i, j, n2 = n * 2; 702 int c1, c2, neg; 703 BN_ULONG ln, lo, *p; 704 705 if (n < 8) { 706 bn_mul_normal(r, a, n + tna, b, n + tnb); 707 return; 708 } 709 710 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 711 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 712 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); 713 neg = 0; 714 switch (c1 * 3 + c2) { 715 case -4: 716 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 717 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 718 break; 719 case -3: 720 /* break; */ 721 case -2: 722 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 723 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 724 neg = 1; 725 break; 726 case -1: 727 case 0: 728 case 1: 729 /* break; */ 730 case 2: 731 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 732 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 733 neg = 1; 734 break; 735 case 3: 736 /* break; */ 737 case 4: 738 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 739 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 740 break; 741 } 742 /* The zero case isn't yet implemented here. The speedup 743 would probably be negligible. */ 744# if 0 745 if (n == 4) { 746 bn_mul_comba4(&(t[n2]), t, &(t[n])); 747 bn_mul_comba4(r, a, b); 748 bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); 749 memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2)); 750 } else 751# endif 752 if (n == 8) { 753 bn_mul_comba8(&(t[n2]), t, &(t[n])); 754 bn_mul_comba8(r, a, b); 755 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); 756 memset(&(r[n2 + tna + tnb]), 0, 757 sizeof(BN_ULONG) * (n2 - tna - tnb)); 758 } else { 759 p = &(t[n2*2]); 760 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 761 bn_mul_recursive(r, a, b, n, 0, 0, p); 762 i = n / 2; 763 /* If there is only a bottom half to the number, 764 * just do it */ 765 if (tna > tnb) 766 j = tna - i; 767 else 768 j = tnb - i; 769 if (j == 0) { 770 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), 771 i, tna - i, tnb - i, p); 772 memset(&(r[n2 + i * 2]), 0, 773 sizeof(BN_ULONG) * (n2 - i * 2)); 774 } 775 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ 776 { 777 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), 778 i, tna - i, tnb - i, p); 779 memset(&(r[n2 + tna + tnb]), 0, 780 sizeof(BN_ULONG) * (n2 - tna - tnb)); 781 } 782 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ 783 { 784 memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); 785 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && 786 tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { 787 bn_mul_normal(&(r[n2]), &(a[n]), tna, 788 &(b[n]), tnb); 789 } else { 790 for (;;) { 791 i /= 2; 792 /* these simplified conditions work 793 * exclusively because difference 794 * between tna and tnb is 1 or 0 */ 795 if (i < tna || i < tnb) { 796 bn_mul_part_recursive(&(r[n2]), 797 &(a[n]), &(b[n]), i, 798 tna - i, tnb - i, p); 799 break; 800 } else if (i == tna || i == tnb) { 801 bn_mul_recursive(&(r[n2]), 802 &(a[n]), &(b[n]), i, 803 tna - i, tnb - i, p); 804 break; 805 } 806 } 807 } 808 } 809 } 810 811 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 812 * r[10] holds (a[0]*b[0]) 813 * r[32] holds (b[1]*b[1]) 814 */ 815 816 c1 = (int)(bn_add_words(t, r,&(r[n2]), n2)); 817 818 if (neg) /* if t[32] is negative */ 819 { 820 c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2)); 821 } else { 822 /* Might have a carry */ 823 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 824 } 825 826 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 827 * r[10] holds (a[0]*b[0]) 828 * r[32] holds (b[1]*b[1]) 829 * c1 holds the carry bits 830 */ 831 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 832 if (c1) { 833 p = &(r[n + n2]); 834 lo= *p; 835 ln = (lo + c1)&BN_MASK2; 836 *p = ln; 837 838 /* The overflow will stop before we over write 839 * words we should not overwrite */ 840 if (ln < (BN_ULONG)c1) { 841 do { 842 p++; 843 lo= *p; 844 ln = (lo + 1) & BN_MASK2; 845 *p = ln; 846 } while (ln == 0); 847 } 848 } 849} 850#endif /* BN_RECURSION */ 851 852#ifndef HAVE_BN_MUL 853#ifndef BN_RECURSION 854int 855bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) 856{ 857 bn_mul_normal(r->d, a->d, a->top, b->d, b->top); 858 859 return 1; 860} 861 862#else /* BN_RECURSION */ 863int 864bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) 865{ 866 BIGNUM *t = NULL; 867 int al, bl, i, k; 868 int j = 0; 869 int ret = 0; 870 871 BN_CTX_start(ctx); 872 873 al = a->top; 874 bl = b->top; 875 876 i = al - bl; 877 878 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { 879 if (i >= -1 && i <= 1) { 880 /* Find out the power of two lower or equal 881 to the longest of the two numbers */ 882 if (i >= 0) { 883 j = BN_num_bits_word((BN_ULONG)al); 884 } 885 if (i == -1) { 886 j = BN_num_bits_word((BN_ULONG)bl); 887 } 888 j = 1 << (j - 1); 889 assert(j <= al || j <= bl); 890 k = j + j; 891 if ((t = BN_CTX_get(ctx)) == NULL) 892 goto err; 893 if (al > j || bl > j) { 894 if (!bn_wexpand(t, k * 4)) 895 goto err; 896 if (!bn_wexpand(r, k * 4)) 897 goto err; 898 bn_mul_part_recursive(r->d, a->d, b->d, 899 j, al - j, bl - j, t->d); 900 } 901 else /* al <= j || bl <= j */ 902 { 903 if (!bn_wexpand(t, k * 2)) 904 goto err; 905 if (!bn_wexpand(r, k * 2)) 906 goto err; 907 bn_mul_recursive(r->d, a->d, b->d, 908 j, al - j, bl - j, t->d); 909 } 910 r->top = rn; 911 goto end; 912 } 913#if 0 914 if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) { 915 BIGNUM *tmp_bn = (BIGNUM *)b; 916 if (!bn_wexpand(tmp_bn, al)) 917 goto err; 918 tmp_bn->d[bl] = 0; 919 bl++; 920 i--; 921 } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { 922 BIGNUM *tmp_bn = (BIGNUM *)a; 923 if (!bn_wexpand(tmp_bn, bl)) 924 goto err; 925 tmp_bn->d[al] = 0; 926 al++; 927 i++; 928 } 929 if (i == 0) { 930 /* symmetric and > 4 */ 931 /* 16 or larger */ 932 j = BN_num_bits_word((BN_ULONG)al); 933 j = 1 << (j - 1); 934 k = j + j; 935 if ((t = BN_CTX_get(ctx)) == NULL) 936 goto err; 937 if (al == j) /* exact multiple */ 938 { 939 if (!bn_wexpand(t, k * 2)) 940 goto err; 941 if (!bn_wexpand(r, k * 2)) 942 goto err; 943 bn_mul_recursive(r->d, a->d, b->d, al, t->d); 944 } else { 945 if (!bn_wexpand(t, k * 4)) 946 goto err; 947 if (!bn_wexpand(r, k * 4)) 948 goto err; 949 bn_mul_part_recursive(r->d, a->d, b->d, 950 al - j, j, t->d); 951 } 952 r->top = top; 953 goto end; 954 } 955#endif 956 } 957 958 bn_mul_normal(r->d, a->d, al, b->d, bl); 959 960 end: 961 ret = 1; 962 err: 963 BN_CTX_end(ctx); 964 965 return ret; 966} 967#endif /* BN_RECURSION */ 968#endif /* HAVE_BN_MUL */ 969 970int 971BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 972{ 973 BIGNUM *rr; 974 int rn; 975 int ret = 0; 976 977 BN_CTX_start(ctx); 978 979 if (BN_is_zero(a) || BN_is_zero(b)) { 980 BN_zero(r); 981 goto done; 982 } 983 984 rr = r; 985 if (rr == a || rr == b) 986 rr = BN_CTX_get(ctx); 987 if (rr == NULL) 988 goto err; 989 990 rn = a->top + b->top; 991 if (rn < a->top) 992 goto err; 993 if (!bn_wexpand(rr, rn)) 994 goto err; 995 996 if (a->top == 4 && b->top == 4) { 997 bn_mul_comba4(rr->d, a->d, b->d); 998 } else if (a->top == 8 && b->top == 8) { 999 bn_mul_comba8(rr->d, a->d, b->d); 1000 } else { 1001 if (!bn_mul(rr, a, b, rn, ctx)) 1002 goto err; 1003 } 1004 1005 rr->top = rn; 1006 rr->neg = a->neg ^ b->neg; 1007 1008 bn_correct_top(rr); 1009 1010 if (r != rr) 1011 BN_copy(r, rr); 1012 done: 1013 ret = 1; 1014 err: 1015 BN_CTX_end(ctx); 1016 1017 return ret; 1018} 1019