bn_mul.c revision 1.26
1/* $OpenBSD: bn_mul.c,v 1.26 2023/01/20 10:00:51 jsing Exp $ */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59#include <assert.h> 60#include <stdio.h> 61#include <string.h> 62 63#include <openssl/opensslconf.h> 64 65#include "bn_local.h" 66 67/* Here follows specialised variants of bn_add_words() and 68 bn_sub_words(). They have the property performing operations on 69 arrays of different sizes. The sizes of those arrays is expressed through 70 cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, 71 which is the delta between the two lengths, calculated as len(a)-len(b). 72 All lengths are the number of BN_ULONGs... For the operations that require 73 a result array as parameter, it must have the length cl+abs(dl). 74 These functions should probably end up in bn_asm.c as soon as there are 75 assembler counterparts for the systems that use assembler files. */ 76 77BN_ULONG 78bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, 79 int dl) 80{ 81 BN_ULONG c, l, t; 82 83 assert(cl >= 0); 84 c = bn_add_words(r, a, b, cl); 85 86 if (dl == 0) 87 return c; 88 89 r += cl; 90 a += cl; 91 b += cl; 92 93 if (dl < 0) { 94 int save_dl = dl; 95 while (c) { 96 l = (c + b[0]) & BN_MASK2; 97 c = (l < c); 98 r[0] = l; 99 if (++dl >= 0) 100 break; 101 102 l = (c + b[1]) & BN_MASK2; 103 c = (l < c); 104 r[1] = l; 105 if (++dl >= 0) 106 break; 107 108 l = (c + b[2]) & BN_MASK2; 109 c = (l < c); 110 r[2] = l; 111 if (++dl >= 0) 112 break; 113 114 l = (c + b[3]) & BN_MASK2; 115 c = (l < c); 116 r[3] = l; 117 if (++dl >= 0) 118 break; 119 120 save_dl = dl; 121 b += 4; 122 r += 4; 123 } 124 if (dl < 0) { 125 if (save_dl < dl) { 126 switch (dl - save_dl) { 127 case 1: 128 r[1] = b[1]; 129 if (++dl >= 0) 130 break; 131 case 2: 132 r[2] = b[2]; 133 if (++dl >= 0) 134 break; 135 case 3: 136 r[3] = b[3]; 137 if (++dl >= 0) 138 break; 139 } 140 b += 4; 141 r += 4; 142 } 143 } 144 if (dl < 0) { 145 for (;;) { 146 r[0] = b[0]; 147 if (++dl >= 0) 148 break; 149 r[1] = b[1]; 150 if (++dl >= 0) 151 break; 152 r[2] = b[2]; 153 if (++dl >= 0) 154 break; 155 r[3] = b[3]; 156 if (++dl >= 0) 157 break; 158 159 b += 4; 160 r += 4; 161 } 162 } 163 } else { 164 int save_dl = dl; 165 while (c) { 166 t = (a[0] + c) & BN_MASK2; 167 c = (t < c); 168 r[0] = t; 169 if (--dl <= 0) 170 break; 171 172 t = (a[1] + c) & BN_MASK2; 173 c = (t < c); 174 r[1] = t; 175 if (--dl <= 0) 176 break; 177 178 t = (a[2] + c) & BN_MASK2; 179 c = (t < c); 180 r[2] = t; 181 if (--dl <= 0) 182 break; 183 184 t = (a[3] + c) & BN_MASK2; 185 c = (t < c); 186 r[3] = t; 187 if (--dl <= 0) 188 break; 189 190 save_dl = dl; 191 a += 4; 192 r += 4; 193 } 194 if (dl > 0) { 195 if (save_dl > dl) { 196 switch (save_dl - dl) { 197 case 1: 198 r[1] = a[1]; 199 if (--dl <= 0) 200 break; 201 case 2: 202 r[2] = a[2]; 203 if (--dl <= 0) 204 break; 205 case 3: 206 r[3] = a[3]; 207 if (--dl <= 0) 208 break; 209 } 210 a += 4; 211 r += 4; 212 } 213 } 214 if (dl > 0) { 215 for (;;) { 216 r[0] = a[0]; 217 if (--dl <= 0) 218 break; 219 r[1] = a[1]; 220 if (--dl <= 0) 221 break; 222 r[2] = a[2]; 223 if (--dl <= 0) 224 break; 225 r[3] = a[3]; 226 if (--dl <= 0) 227 break; 228 229 a += 4; 230 r += 4; 231 } 232 } 233 } 234 return c; 235} 236 237#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) 238BN_ULONG 239bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, 240 int dl) 241{ 242 BN_ULONG c, t; 243 244 assert(cl >= 0); 245 c = bn_sub_words(r, a, b, cl); 246 247 if (dl == 0) 248 return c; 249 250 r += cl; 251 a += cl; 252 b += cl; 253 254 if (dl < 0) { 255 for (;;) { 256 t = b[0]; 257 r[0] = (0 - t - c) & BN_MASK2; 258 if (t != 0) 259 c = 1; 260 if (++dl >= 0) 261 break; 262 263 t = b[1]; 264 r[1] = (0 - t - c) & BN_MASK2; 265 if (t != 0) 266 c = 1; 267 if (++dl >= 0) 268 break; 269 270 t = b[2]; 271 r[2] = (0 - t - c) & BN_MASK2; 272 if (t != 0) 273 c = 1; 274 if (++dl >= 0) 275 break; 276 277 t = b[3]; 278 r[3] = (0 - t - c) & BN_MASK2; 279 if (t != 0) 280 c = 1; 281 if (++dl >= 0) 282 break; 283 284 b += 4; 285 r += 4; 286 } 287 } else { 288 int save_dl = dl; 289 while (c) { 290 t = a[0]; 291 r[0] = (t - c) & BN_MASK2; 292 if (t != 0) 293 c = 0; 294 if (--dl <= 0) 295 break; 296 297 t = a[1]; 298 r[1] = (t - c) & BN_MASK2; 299 if (t != 0) 300 c = 0; 301 if (--dl <= 0) 302 break; 303 304 t = a[2]; 305 r[2] = (t - c) & BN_MASK2; 306 if (t != 0) 307 c = 0; 308 if (--dl <= 0) 309 break; 310 311 t = a[3]; 312 r[3] = (t - c) & BN_MASK2; 313 if (t != 0) 314 c = 0; 315 if (--dl <= 0) 316 break; 317 318 save_dl = dl; 319 a += 4; 320 r += 4; 321 } 322 if (dl > 0) { 323 if (save_dl > dl) { 324 switch (save_dl - dl) { 325 case 1: 326 r[1] = a[1]; 327 if (--dl <= 0) 328 break; 329 case 2: 330 r[2] = a[2]; 331 if (--dl <= 0) 332 break; 333 case 3: 334 r[3] = a[3]; 335 if (--dl <= 0) 336 break; 337 } 338 a += 4; 339 r += 4; 340 } 341 } 342 if (dl > 0) { 343 for (;;) { 344 r[0] = a[0]; 345 if (--dl <= 0) 346 break; 347 r[1] = a[1]; 348 if (--dl <= 0) 349 break; 350 r[2] = a[2]; 351 if (--dl <= 0) 352 break; 353 r[3] = a[3]; 354 if (--dl <= 0) 355 break; 356 357 a += 4; 358 r += 4; 359 } 360 } 361 } 362 return c; 363} 364#endif 365 366void 367bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) 368{ 369 BN_ULONG *rr; 370 371 372 if (na < nb) { 373 int itmp; 374 BN_ULONG *ltmp; 375 376 itmp = na; 377 na = nb; 378 nb = itmp; 379 ltmp = a; 380 a = b; 381 b = ltmp; 382 383 } 384 rr = &(r[na]); 385 if (nb <= 0) { 386 (void)bn_mul_words(r, a, na, 0); 387 return; 388 } else 389 rr[0] = bn_mul_words(r, a, na, b[0]); 390 391 for (;;) { 392 if (--nb <= 0) 393 return; 394 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); 395 if (--nb <= 0) 396 return; 397 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); 398 if (--nb <= 0) 399 return; 400 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); 401 if (--nb <= 0) 402 return; 403 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); 404 rr += 4; 405 r += 4; 406 b += 4; 407 } 408} 409 410void 411bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 412{ 413 bn_mul_words(r, a, n, b[0]); 414 415 for (;;) { 416 if (--n <= 0) 417 return; 418 bn_mul_add_words(&(r[1]), a, n, b[1]); 419 if (--n <= 0) 420 return; 421 bn_mul_add_words(&(r[2]), a, n, b[2]); 422 if (--n <= 0) 423 return; 424 bn_mul_add_words(&(r[3]), a, n, b[3]); 425 if (--n <= 0) 426 return; 427 bn_mul_add_words(&(r[4]), a, n, b[4]); 428 r += 4; 429 b += 4; 430 } 431} 432 433#ifdef BN_RECURSION 434/* a and b must be the same size, which is n2. 435 * r needs to be n2 words and t needs to be n2*2 436 * l is the low words of the output. 437 * t needs to be n2*3 438 */ 439void 440bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, 441 BN_ULONG *t) 442{ 443 int i, n; 444 int c1, c2; 445 int neg, oneg, zero; 446 BN_ULONG ll, lc, *lp, *mp; 447 448 n = n2 / 2; 449 450 /* Calculate (al-ah)*(bh-bl) */ 451 neg = zero = 0; 452 c1 = bn_cmp_words(&(a[0]), &(a[n]), n); 453 c2 = bn_cmp_words(&(b[n]), &(b[0]), n); 454 switch (c1 * 3 + c2) { 455 case -4: 456 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); 457 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); 458 break; 459 case -3: 460 zero = 1; 461 break; 462 case -2: 463 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); 464 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); 465 neg = 1; 466 break; 467 case -1: 468 case 0: 469 case 1: 470 zero = 1; 471 break; 472 case 2: 473 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); 474 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); 475 neg = 1; 476 break; 477 case 3: 478 zero = 1; 479 break; 480 case 4: 481 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); 482 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); 483 break; 484 } 485 486 oneg = neg; 487 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ 488 /* r[10] = (a[1]*b[1]) */ 489# ifdef BN_MUL_COMBA 490 if (n == 8) { 491 bn_mul_comba8(&(t[0]), &(r[0]), &(r[n])); 492 bn_mul_comba8(r, &(a[n]), &(b[n])); 493 } else 494# endif 495 { 496 bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2])); 497 bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2])); 498 } 499 500 /* s0 == low(al*bl) 501 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) 502 * We know s0 and s1 so the only unknown is high(al*bl) 503 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) 504 * high(al*bl) == s1 - (r[0]+l[0]+t[0]) 505 */ 506 if (l != NULL) { 507 lp = &(t[n2 + n]); 508 c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n)); 509 } else { 510 c1 = 0; 511 lp = &(r[0]); 512 } 513 514 if (neg) 515 neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n)); 516 else { 517 bn_add_words(&(t[n2]), lp, &(t[0]), n); 518 neg = 0; 519 } 520 521 if (l != NULL) { 522 bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n); 523 } else { 524 lp = &(t[n2 + n]); 525 mp = &(t[n2]); 526 for (i = 0; i < n; i++) 527 lp[i] = ((~mp[i]) + 1) & BN_MASK2; 528 } 529 530 /* s[0] = low(al*bl) 531 * t[3] = high(al*bl) 532 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign 533 * r[10] = (a[1]*b[1]) 534 */ 535 /* R[10] = al*bl 536 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) 537 * R[32] = ah*bh 538 */ 539 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) 540 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) 541 * R[3]=r[1]+(carry/borrow) 542 */ 543 if (l != NULL) { 544 lp = &(t[n2]); 545 c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n)); 546 } else { 547 lp = &(t[n2 + n]); 548 c1 = 0; 549 } 550 c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n)); 551 if (oneg) 552 c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n)); 553 else 554 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n)); 555 556 c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n)); 557 c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n)); 558 if (oneg) 559 c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n)); 560 else 561 c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n)); 562 563 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */ 564 { 565 i = 0; 566 if (c1 > 0) { 567 lc = c1; 568 do { 569 ll = (r[i] + lc) & BN_MASK2; 570 r[i++] = ll; 571 lc = (lc > ll); 572 } while (lc); 573 } else { 574 lc = -c1; 575 do { 576 ll = r[i]; 577 r[i++] = (ll - lc) & BN_MASK2; 578 lc = (lc > ll); 579 } while (lc); 580 } 581 } 582 if (c2 != 0) /* Add starting at r[1] */ 583 { 584 i = n; 585 if (c2 > 0) { 586 lc = c2; 587 do { 588 ll = (r[i] + lc) & BN_MASK2; 589 r[i++] = ll; 590 lc = (lc > ll); 591 } while (lc); 592 } else { 593 lc = -c2; 594 do { 595 ll = r[i]; 596 r[i++] = (ll - lc) & BN_MASK2; 597 lc = (lc > ll); 598 } while (lc); 599 } 600 } 601} 602 603/* Karatsuba recursive multiplication algorithm 604 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ 605 606/* r is 2*n2 words in size, 607 * a and b are both n2 words in size. 608 * n2 must be a power of 2. 609 * We multiply and return the result. 610 * t must be 2*n2 words in size 611 * We calculate 612 * a[0]*b[0] 613 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 614 * a[1]*b[1] 615 */ 616/* dnX may not be positive, but n2/2+dnX has to be */ 617void 618bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, 619 int dnb, BN_ULONG *t) 620{ 621 int n = n2 / 2, c1, c2; 622 int tna = n + dna, tnb = n + dnb; 623 unsigned int neg, zero; 624 BN_ULONG ln, lo, *p; 625 626# ifdef BN_MUL_COMBA 627# if 0 628 if (n2 == 4) { 629 bn_mul_comba4(r, a, b); 630 return; 631 } 632# endif 633 /* Only call bn_mul_comba 8 if n2 == 8 and the 634 * two arrays are complete [steve] 635 */ 636 if (n2 == 8 && dna == 0 && dnb == 0) { 637 bn_mul_comba8(r, a, b); 638 return; 639 } 640# endif /* BN_MUL_COMBA */ 641 /* Else do normal multiply */ 642 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { 643 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); 644 if ((dna + dnb) < 0) 645 memset(&r[2*n2 + dna + dnb], 0, 646 sizeof(BN_ULONG) * -(dna + dnb)); 647 return; 648 } 649 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 650 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 651 c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n); 652 zero = neg = 0; 653 switch (c1 * 3 + c2) { 654 case -4: 655 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 656 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 657 break; 658 case -3: 659 zero = 1; 660 break; 661 case -2: 662 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 663 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 664 neg = 1; 665 break; 666 case -1: 667 case 0: 668 case 1: 669 zero = 1; 670 break; 671 case 2: 672 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 673 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 674 neg = 1; 675 break; 676 case 3: 677 zero = 1; 678 break; 679 case 4: 680 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 681 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 682 break; 683 } 684 685# ifdef BN_MUL_COMBA 686 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take 687 extra args to do this well */ 688 { 689 if (!zero) 690 bn_mul_comba4(&(t[n2]), t, &(t[n])); 691 else 692 memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); 693 694 bn_mul_comba4(r, a, b); 695 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); 696 } else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could 697 take extra args to do this 698 well */ 699 { 700 if (!zero) 701 bn_mul_comba8(&(t[n2]), t, &(t[n])); 702 else 703 memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); 704 705 bn_mul_comba8(r, a, b); 706 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); 707 } else 708# endif /* BN_MUL_COMBA */ 709 { 710 p = &(t[n2 * 2]); 711 if (!zero) 712 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 713 else 714 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); 715 bn_mul_recursive(r, a, b, n, 0, 0, p); 716 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); 717 } 718 719 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 720 * r[10] holds (a[0]*b[0]) 721 * r[32] holds (b[1]*b[1]) 722 */ 723 724 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 725 726 if (neg) /* if t[32] is negative */ 727 { 728 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 729 } else { 730 /* Might have a carry */ 731 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 732 } 733 734 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 735 * r[10] holds (a[0]*b[0]) 736 * r[32] holds (b[1]*b[1]) 737 * c1 holds the carry bits 738 */ 739 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 740 if (c1) { 741 p = &(r[n + n2]); 742 lo= *p; 743 ln = (lo + c1) & BN_MASK2; 744 *p = ln; 745 746 /* The overflow will stop before we over write 747 * words we should not overwrite */ 748 if (ln < (BN_ULONG)c1) { 749 do { 750 p++; 751 lo= *p; 752 ln = (lo + 1) & BN_MASK2; 753 *p = ln; 754 } while (ln == 0); 755 } 756 } 757} 758 759/* n+tn is the word length 760 * t needs to be n*4 is size, as does r */ 761/* tnX may not be negative but less than n */ 762void 763bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, 764 int tnb, BN_ULONG *t) 765{ 766 int i, j, n2 = n * 2; 767 int c1, c2, neg; 768 BN_ULONG ln, lo, *p; 769 770 if (n < 8) { 771 bn_mul_normal(r, a, n + tna, b, n + tnb); 772 return; 773 } 774 775 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 776 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 777 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); 778 neg = 0; 779 switch (c1 * 3 + c2) { 780 case -4: 781 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 782 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 783 break; 784 case -3: 785 /* break; */ 786 case -2: 787 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 788 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 789 neg = 1; 790 break; 791 case -1: 792 case 0: 793 case 1: 794 /* break; */ 795 case 2: 796 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 797 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 798 neg = 1; 799 break; 800 case 3: 801 /* break; */ 802 case 4: 803 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 804 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 805 break; 806 } 807 /* The zero case isn't yet implemented here. The speedup 808 would probably be negligible. */ 809# if 0 810 if (n == 4) { 811 bn_mul_comba4(&(t[n2]), t, &(t[n])); 812 bn_mul_comba4(r, a, b); 813 bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); 814 memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2)); 815 } else 816# endif 817 if (n == 8) { 818 bn_mul_comba8(&(t[n2]), t, &(t[n])); 819 bn_mul_comba8(r, a, b); 820 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); 821 memset(&(r[n2 + tna + tnb]), 0, 822 sizeof(BN_ULONG) * (n2 - tna - tnb)); 823 } else { 824 p = &(t[n2*2]); 825 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 826 bn_mul_recursive(r, a, b, n, 0, 0, p); 827 i = n / 2; 828 /* If there is only a bottom half to the number, 829 * just do it */ 830 if (tna > tnb) 831 j = tna - i; 832 else 833 j = tnb - i; 834 if (j == 0) { 835 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), 836 i, tna - i, tnb - i, p); 837 memset(&(r[n2 + i * 2]), 0, 838 sizeof(BN_ULONG) * (n2 - i * 2)); 839 } 840 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ 841 { 842 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), 843 i, tna - i, tnb - i, p); 844 memset(&(r[n2 + tna + tnb]), 0, 845 sizeof(BN_ULONG) * (n2 - tna - tnb)); 846 } 847 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ 848 { 849 memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); 850 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && 851 tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { 852 bn_mul_normal(&(r[n2]), &(a[n]), tna, 853 &(b[n]), tnb); 854 } else { 855 for (;;) { 856 i /= 2; 857 /* these simplified conditions work 858 * exclusively because difference 859 * between tna and tnb is 1 or 0 */ 860 if (i < tna || i < tnb) { 861 bn_mul_part_recursive(&(r[n2]), 862 &(a[n]), &(b[n]), i, 863 tna - i, tnb - i, p); 864 break; 865 } else if (i == tna || i == tnb) { 866 bn_mul_recursive(&(r[n2]), 867 &(a[n]), &(b[n]), i, 868 tna - i, tnb - i, p); 869 break; 870 } 871 } 872 } 873 } 874 } 875 876 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 877 * r[10] holds (a[0]*b[0]) 878 * r[32] holds (b[1]*b[1]) 879 */ 880 881 c1 = (int)(bn_add_words(t, r,&(r[n2]), n2)); 882 883 if (neg) /* if t[32] is negative */ 884 { 885 c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2)); 886 } else { 887 /* Might have a carry */ 888 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 889 } 890 891 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 892 * r[10] holds (a[0]*b[0]) 893 * r[32] holds (b[1]*b[1]) 894 * c1 holds the carry bits 895 */ 896 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 897 if (c1) { 898 p = &(r[n + n2]); 899 lo= *p; 900 ln = (lo + c1)&BN_MASK2; 901 *p = ln; 902 903 /* The overflow will stop before we over write 904 * words we should not overwrite */ 905 if (ln < (BN_ULONG)c1) { 906 do { 907 p++; 908 lo= *p; 909 ln = (lo + 1) & BN_MASK2; 910 *p = ln; 911 } while (ln == 0); 912 } 913 } 914} 915 916/* a and b must be the same size, which is n2. 917 * r needs to be n2 words and t needs to be n2*2 918 */ 919void 920bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t) 921{ 922 int n = n2 / 2; 923 924 925 bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); 926 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { 927 bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); 928 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); 929 bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2])); 930 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); 931 } else { 932 bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n); 933 bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n); 934 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); 935 bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); 936 } 937} 938#endif /* BN_RECURSION */ 939 940int 941BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 942{ 943 int ret = 0; 944 int top, al, bl; 945 BIGNUM *rr; 946#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) 947 int i; 948#endif 949#ifdef BN_RECURSION 950 BIGNUM *t = NULL; 951 int j = 0, k; 952#endif 953 954 955 956 al = a->top; 957 bl = b->top; 958 959 if ((al == 0) || (bl == 0)) { 960 BN_zero(r); 961 return (1); 962 } 963 top = al + bl; 964 965 BN_CTX_start(ctx); 966 if ((r == a) || (r == b)) { 967 if ((rr = BN_CTX_get(ctx)) == NULL) 968 goto err; 969 } else 970 rr = r; 971 rr->neg = a->neg ^ b->neg; 972 973#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) 974 i = al - bl; 975#endif 976#ifdef BN_MUL_COMBA 977 if (i == 0) { 978# if 0 979 if (al == 4) { 980 if (!bn_wexpand(rr, 8)) 981 goto err; 982 rr->top = 8; 983 bn_mul_comba4(rr->d, a->d, b->d); 984 goto end; 985 } 986# endif 987 if (al == 8) { 988 if (!bn_wexpand(rr, 16)) 989 goto err; 990 rr->top = 16; 991 bn_mul_comba8(rr->d, a->d, b->d); 992 goto end; 993 } 994 } 995#endif /* BN_MUL_COMBA */ 996#ifdef BN_RECURSION 997 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { 998 if (i >= -1 && i <= 1) { 999 /* Find out the power of two lower or equal 1000 to the longest of the two numbers */ 1001 if (i >= 0) { 1002 j = BN_num_bits_word((BN_ULONG)al); 1003 } 1004 if (i == -1) { 1005 j = BN_num_bits_word((BN_ULONG)bl); 1006 } 1007 j = 1 << (j - 1); 1008 assert(j <= al || j <= bl); 1009 k = j + j; 1010 if ((t = BN_CTX_get(ctx)) == NULL) 1011 goto err; 1012 if (al > j || bl > j) { 1013 if (!bn_wexpand(t, k * 4)) 1014 goto err; 1015 if (!bn_wexpand(rr, k * 4)) 1016 goto err; 1017 bn_mul_part_recursive(rr->d, a->d, b->d, 1018 j, al - j, bl - j, t->d); 1019 } 1020 else /* al <= j || bl <= j */ 1021 { 1022 if (!bn_wexpand(t, k * 2)) 1023 goto err; 1024 if (!bn_wexpand(rr, k * 2)) 1025 goto err; 1026 bn_mul_recursive(rr->d, a->d, b->d, 1027 j, al - j, bl - j, t->d); 1028 } 1029 rr->top = top; 1030 goto end; 1031 } 1032#if 0 1033 if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) { 1034 BIGNUM *tmp_bn = (BIGNUM *)b; 1035 if (!bn_wexpand(tmp_bn, al)) 1036 goto err; 1037 tmp_bn->d[bl] = 0; 1038 bl++; 1039 i--; 1040 } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { 1041 BIGNUM *tmp_bn = (BIGNUM *)a; 1042 if (!bn_wexpand(tmp_bn, bl)) 1043 goto err; 1044 tmp_bn->d[al] = 0; 1045 al++; 1046 i++; 1047 } 1048 if (i == 0) { 1049 /* symmetric and > 4 */ 1050 /* 16 or larger */ 1051 j = BN_num_bits_word((BN_ULONG)al); 1052 j = 1 << (j - 1); 1053 k = j + j; 1054 if ((t = BN_CTX_get(ctx)) == NULL) 1055 goto err; 1056 if (al == j) /* exact multiple */ 1057 { 1058 if (!bn_wexpand(t, k * 2)) 1059 goto err; 1060 if (!bn_wexpand(rr, k * 2)) 1061 goto err; 1062 bn_mul_recursive(rr->d, a->d, b->d, al, t->d); 1063 } else { 1064 if (!bn_wexpand(t, k * 4)) 1065 goto err; 1066 if (!bn_wexpand(rr, k * 4)) 1067 goto err; 1068 bn_mul_part_recursive(rr->d, a->d, b->d, 1069 al - j, j, t->d); 1070 } 1071 rr->top = top; 1072 goto end; 1073 } 1074#endif 1075 } 1076#endif /* BN_RECURSION */ 1077 if (!bn_wexpand(rr, top)) 1078 goto err; 1079 rr->top = top; 1080 bn_mul_normal(rr->d, a->d, al, b->d, bl); 1081 1082#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) 1083end: 1084#endif 1085 bn_correct_top(rr); 1086 if (r != rr) 1087 BN_copy(r, rr); 1088 ret = 1; 1089err: 1090 BN_CTX_end(ctx); 1091 return (ret); 1092} 1093