bn_mul.c revision 1.33
1/* $OpenBSD: bn_mul.c,v 1.33 2023/02/15 18:10:16 jsing Exp $ */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59#include <assert.h> 60#include <stdio.h> 61#include <string.h> 62 63#include <openssl/opensslconf.h> 64 65#include "bn_arch.h" 66#include "bn_internal.h" 67#include "bn_local.h" 68 69/* 70 * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication 71 * (https://everything2.com/title/Comba+multiplication), where a and b are both 72 * four word arrays, producing an eight word array result. 73 */ 74#ifndef HAVE_BN_MUL_COMBA4 75void 76bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 77{ 78 BN_ULONG c0, c1, c2; 79 80 bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); 81 82 bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); 83 bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); 84 85 bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); 86 bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); 87 bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); 88 89 bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); 90 bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); 91 bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); 92 bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); 93 94 bn_mulw_addtw(a[3], b[1], 0, c2, c1, &c2, &c1, &c0); 95 bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); 96 bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]); 97 98 bn_mulw_addtw(a[2], b[3], 0, c2, c1, &c2, &c1, &c0); 99 bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]); 100 101 bn_mulw_addtw(a[3], b[3], 0, c2, c1, &c2, &r[7], &r[6]); 102} 103#endif 104 105/* 106 * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication 107 * (https://everything2.com/title/Comba+multiplication), where a and b are both 108 * eight word arrays, producing a 16 word array result. 109 */ 110#ifndef HAVE_BN_MUL_COMBA8 111void 112bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 113{ 114 BN_ULONG c0, c1, c2; 115 116 bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); 117 118 bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); 119 bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); 120 121 bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); 122 bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); 123 bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); 124 125 bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); 126 bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); 127 bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); 128 bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); 129 130 bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0); 131 bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0); 132 bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); 133 bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0); 134 bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]); 135 136 bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0); 137 bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0); 138 bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0); 139 bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0); 140 bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0); 141 bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]); 142 143 bn_mulw_addtw(a[6], b[0], 0, c2, c1, &c2, &c1, &c0); 144 bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0); 145 bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0); 146 bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0); 147 bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0); 148 bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0); 149 bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]); 150 151 bn_mulw_addtw(a[0], b[7], 0, c2, c1, &c2, &c1, &c0); 152 bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0); 153 bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0); 154 bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0); 155 bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0); 156 bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0); 157 bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0); 158 bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]); 159 160 bn_mulw_addtw(a[7], b[1], 0, c2, c1, &c2, &c1, &c0); 161 bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0); 162 bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0); 163 bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0); 164 bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0); 165 bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0); 166 bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]); 167 168 bn_mulw_addtw(a[2], b[7], 0, c2, c1, &c2, &c1, &c0); 169 bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0); 170 bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0); 171 bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0); 172 bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0); 173 bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]); 174 175 bn_mulw_addtw(a[7], b[3], 0, c2, c1, &c2, &c1, &c0); 176 bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0); 177 bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0); 178 bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0); 179 bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]); 180 181 bn_mulw_addtw(a[4], b[7], 0, c2, c1, &c2, &c1, &c0); 182 bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0); 183 bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0); 184 bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]); 185 186 bn_mulw_addtw(a[7], b[5], 0, c2, c1, &c2, &c1, &c0); 187 bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0); 188 bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]); 189 190 bn_mulw_addtw(a[6], b[7], 0, c2, c1, &c2, &c1, &c0); 191 bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]); 192 193 bn_mulw_addtw(a[7], b[7], 0, c2, c1, &c2, &r[15], &r[14]); 194} 195#endif 196 197/* 198 * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array 199 * of words and w is a single word. This should really be called bn_mulw_words() 200 * since only one input is an array. This is used as a step in the multiplication 201 * of word arrays. 202 */ 203#ifndef HAVE_BN_MUL_WORDS 204BN_ULONG 205bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) 206{ 207 BN_ULONG carry = 0; 208 209 assert(num >= 0); 210 if (num <= 0) 211 return 0; 212 213#ifndef OPENSSL_SMALL_FOOTPRINT 214 while (num & ~3) { 215 bn_mulw_addw(a[0], w, carry, &carry, &r[0]); 216 bn_mulw_addw(a[1], w, carry, &carry, &r[1]); 217 bn_mulw_addw(a[2], w, carry, &carry, &r[2]); 218 bn_mulw_addw(a[3], w, carry, &carry, &r[3]); 219 a += 4; 220 r += 4; 221 num -= 4; 222 } 223#endif 224 while (num) { 225 bn_mulw_addw(a[0], w, carry, &carry, &r[0]); 226 a++; 227 r++; 228 num--; 229 } 230 return carry; 231} 232#endif 233 234/* 235 * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where 236 * a is an array of words and w is a single word. This should really be called 237 * bn_mulw_add_words() since only one input is an array. This is used as a step 238 * in the multiplication of word arrays. 239 */ 240#ifndef HAVE_BN_MUL_ADD_WORDS 241BN_ULONG 242bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) 243{ 244 BN_ULONG carry = 0; 245 246 assert(num >= 0); 247 if (num <= 0) 248 return 0; 249 250#ifndef OPENSSL_SMALL_FOOTPRINT 251 while (num & ~3) { 252 bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); 253 bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]); 254 bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]); 255 bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]); 256 a += 4; 257 r += 4; 258 num -= 4; 259 } 260#endif 261 while (num) { 262 bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); 263 a++; 264 r++; 265 num--; 266 } 267 268 return carry; 269} 270#endif 271 272#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) 273/* 274 * Here follows a specialised variant of bn_sub_words(), which has the property 275 * performing operations on arrays of different sizes. The sizes of those arrays 276 * is expressed through cl, which is the common length (basically, 277 * min(len(a),len(b))), and dl, which is the delta between the two lengths, 278 * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the 279 * operations that require a result array as parameter, it must have the length 280 * cl+abs(dl). 281 */ 282BN_ULONG 283bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, 284 int dl) 285{ 286 BN_ULONG c, t; 287 288 assert(cl >= 0); 289 c = bn_sub_words(r, a, b, cl); 290 291 if (dl == 0) 292 return c; 293 294 r += cl; 295 a += cl; 296 b += cl; 297 298 if (dl < 0) { 299 for (;;) { 300 t = b[0]; 301 r[0] = (0 - t - c) & BN_MASK2; 302 if (t != 0) 303 c = 1; 304 if (++dl >= 0) 305 break; 306 307 t = b[1]; 308 r[1] = (0 - t - c) & BN_MASK2; 309 if (t != 0) 310 c = 1; 311 if (++dl >= 0) 312 break; 313 314 t = b[2]; 315 r[2] = (0 - t - c) & BN_MASK2; 316 if (t != 0) 317 c = 1; 318 if (++dl >= 0) 319 break; 320 321 t = b[3]; 322 r[3] = (0 - t - c) & BN_MASK2; 323 if (t != 0) 324 c = 1; 325 if (++dl >= 0) 326 break; 327 328 b += 4; 329 r += 4; 330 } 331 } else { 332 int save_dl = dl; 333 while (c) { 334 t = a[0]; 335 r[0] = (t - c) & BN_MASK2; 336 if (t != 0) 337 c = 0; 338 if (--dl <= 0) 339 break; 340 341 t = a[1]; 342 r[1] = (t - c) & BN_MASK2; 343 if (t != 0) 344 c = 0; 345 if (--dl <= 0) 346 break; 347 348 t = a[2]; 349 r[2] = (t - c) & BN_MASK2; 350 if (t != 0) 351 c = 0; 352 if (--dl <= 0) 353 break; 354 355 t = a[3]; 356 r[3] = (t - c) & BN_MASK2; 357 if (t != 0) 358 c = 0; 359 if (--dl <= 0) 360 break; 361 362 save_dl = dl; 363 a += 4; 364 r += 4; 365 } 366 if (dl > 0) { 367 if (save_dl > dl) { 368 switch (save_dl - dl) { 369 case 1: 370 r[1] = a[1]; 371 if (--dl <= 0) 372 break; 373 case 2: 374 r[2] = a[2]; 375 if (--dl <= 0) 376 break; 377 case 3: 378 r[3] = a[3]; 379 if (--dl <= 0) 380 break; 381 } 382 a += 4; 383 r += 4; 384 } 385 } 386 if (dl > 0) { 387 for (;;) { 388 r[0] = a[0]; 389 if (--dl <= 0) 390 break; 391 r[1] = a[1]; 392 if (--dl <= 0) 393 break; 394 r[2] = a[2]; 395 if (--dl <= 0) 396 break; 397 r[3] = a[3]; 398 if (--dl <= 0) 399 break; 400 401 a += 4; 402 r += 4; 403 } 404 } 405 } 406 return c; 407} 408#endif 409 410void 411bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) 412{ 413 BN_ULONG *rr; 414 415 416 if (na < nb) { 417 int itmp; 418 BN_ULONG *ltmp; 419 420 itmp = na; 421 na = nb; 422 nb = itmp; 423 ltmp = a; 424 a = b; 425 b = ltmp; 426 427 } 428 rr = &(r[na]); 429 if (nb <= 0) { 430 (void)bn_mul_words(r, a, na, 0); 431 return; 432 } else 433 rr[0] = bn_mul_words(r, a, na, b[0]); 434 435 for (;;) { 436 if (--nb <= 0) 437 return; 438 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); 439 if (--nb <= 0) 440 return; 441 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); 442 if (--nb <= 0) 443 return; 444 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); 445 if (--nb <= 0) 446 return; 447 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); 448 rr += 4; 449 r += 4; 450 b += 4; 451 } 452} 453 454#ifdef BN_RECURSION 455/* Karatsuba recursive multiplication algorithm 456 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ 457 458/* r is 2*n2 words in size, 459 * a and b are both n2 words in size. 460 * n2 must be a power of 2. 461 * We multiply and return the result. 462 * t must be 2*n2 words in size 463 * We calculate 464 * a[0]*b[0] 465 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 466 * a[1]*b[1] 467 */ 468/* dnX may not be positive, but n2/2+dnX has to be */ 469void 470bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, 471 int dnb, BN_ULONG *t) 472{ 473 int n = n2 / 2, c1, c2; 474 int tna = n + dna, tnb = n + dnb; 475 unsigned int neg, zero; 476 BN_ULONG ln, lo, *p; 477 478# ifdef BN_MUL_COMBA 479# if 0 480 if (n2 == 4) { 481 bn_mul_comba4(r, a, b); 482 return; 483 } 484# endif 485 /* Only call bn_mul_comba 8 if n2 == 8 and the 486 * two arrays are complete [steve] 487 */ 488 if (n2 == 8 && dna == 0 && dnb == 0) { 489 bn_mul_comba8(r, a, b); 490 return; 491 } 492# endif /* BN_MUL_COMBA */ 493 /* Else do normal multiply */ 494 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { 495 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); 496 if ((dna + dnb) < 0) 497 memset(&r[2*n2 + dna + dnb], 0, 498 sizeof(BN_ULONG) * -(dna + dnb)); 499 return; 500 } 501 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 502 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 503 c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n); 504 zero = neg = 0; 505 switch (c1 * 3 + c2) { 506 case -4: 507 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 508 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 509 break; 510 case -3: 511 zero = 1; 512 break; 513 case -2: 514 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 515 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 516 neg = 1; 517 break; 518 case -1: 519 case 0: 520 case 1: 521 zero = 1; 522 break; 523 case 2: 524 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 525 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 526 neg = 1; 527 break; 528 case 3: 529 zero = 1; 530 break; 531 case 4: 532 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 533 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 534 break; 535 } 536 537# ifdef BN_MUL_COMBA 538 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take 539 extra args to do this well */ 540 { 541 if (!zero) 542 bn_mul_comba4(&(t[n2]), t, &(t[n])); 543 else 544 memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); 545 546 bn_mul_comba4(r, a, b); 547 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); 548 } else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could 549 take extra args to do this 550 well */ 551 { 552 if (!zero) 553 bn_mul_comba8(&(t[n2]), t, &(t[n])); 554 else 555 memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); 556 557 bn_mul_comba8(r, a, b); 558 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); 559 } else 560# endif /* BN_MUL_COMBA */ 561 { 562 p = &(t[n2 * 2]); 563 if (!zero) 564 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 565 else 566 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); 567 bn_mul_recursive(r, a, b, n, 0, 0, p); 568 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); 569 } 570 571 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 572 * r[10] holds (a[0]*b[0]) 573 * r[32] holds (b[1]*b[1]) 574 */ 575 576 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 577 578 if (neg) /* if t[32] is negative */ 579 { 580 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 581 } else { 582 /* Might have a carry */ 583 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 584 } 585 586 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 587 * r[10] holds (a[0]*b[0]) 588 * r[32] holds (b[1]*b[1]) 589 * c1 holds the carry bits 590 */ 591 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 592 if (c1) { 593 p = &(r[n + n2]); 594 lo= *p; 595 ln = (lo + c1) & BN_MASK2; 596 *p = ln; 597 598 /* The overflow will stop before we over write 599 * words we should not overwrite */ 600 if (ln < (BN_ULONG)c1) { 601 do { 602 p++; 603 lo= *p; 604 ln = (lo + 1) & BN_MASK2; 605 *p = ln; 606 } while (ln == 0); 607 } 608 } 609} 610 611/* n+tn is the word length 612 * t needs to be n*4 is size, as does r */ 613/* tnX may not be negative but less than n */ 614void 615bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, 616 int tnb, BN_ULONG *t) 617{ 618 int i, j, n2 = n * 2; 619 int c1, c2, neg; 620 BN_ULONG ln, lo, *p; 621 622 if (n < 8) { 623 bn_mul_normal(r, a, n + tna, b, n + tnb); 624 return; 625 } 626 627 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 628 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 629 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); 630 neg = 0; 631 switch (c1 * 3 + c2) { 632 case -4: 633 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 634 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 635 break; 636 case -3: 637 /* break; */ 638 case -2: 639 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 640 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 641 neg = 1; 642 break; 643 case -1: 644 case 0: 645 case 1: 646 /* break; */ 647 case 2: 648 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 649 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 650 neg = 1; 651 break; 652 case 3: 653 /* break; */ 654 case 4: 655 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 656 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 657 break; 658 } 659 /* The zero case isn't yet implemented here. The speedup 660 would probably be negligible. */ 661# if 0 662 if (n == 4) { 663 bn_mul_comba4(&(t[n2]), t, &(t[n])); 664 bn_mul_comba4(r, a, b); 665 bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); 666 memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2)); 667 } else 668# endif 669 if (n == 8) { 670 bn_mul_comba8(&(t[n2]), t, &(t[n])); 671 bn_mul_comba8(r, a, b); 672 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); 673 memset(&(r[n2 + tna + tnb]), 0, 674 sizeof(BN_ULONG) * (n2 - tna - tnb)); 675 } else { 676 p = &(t[n2*2]); 677 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 678 bn_mul_recursive(r, a, b, n, 0, 0, p); 679 i = n / 2; 680 /* If there is only a bottom half to the number, 681 * just do it */ 682 if (tna > tnb) 683 j = tna - i; 684 else 685 j = tnb - i; 686 if (j == 0) { 687 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), 688 i, tna - i, tnb - i, p); 689 memset(&(r[n2 + i * 2]), 0, 690 sizeof(BN_ULONG) * (n2 - i * 2)); 691 } 692 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ 693 { 694 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), 695 i, tna - i, tnb - i, p); 696 memset(&(r[n2 + tna + tnb]), 0, 697 sizeof(BN_ULONG) * (n2 - tna - tnb)); 698 } 699 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ 700 { 701 memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); 702 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && 703 tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { 704 bn_mul_normal(&(r[n2]), &(a[n]), tna, 705 &(b[n]), tnb); 706 } else { 707 for (;;) { 708 i /= 2; 709 /* these simplified conditions work 710 * exclusively because difference 711 * between tna and tnb is 1 or 0 */ 712 if (i < tna || i < tnb) { 713 bn_mul_part_recursive(&(r[n2]), 714 &(a[n]), &(b[n]), i, 715 tna - i, tnb - i, p); 716 break; 717 } else if (i == tna || i == tnb) { 718 bn_mul_recursive(&(r[n2]), 719 &(a[n]), &(b[n]), i, 720 tna - i, tnb - i, p); 721 break; 722 } 723 } 724 } 725 } 726 } 727 728 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 729 * r[10] holds (a[0]*b[0]) 730 * r[32] holds (b[1]*b[1]) 731 */ 732 733 c1 = (int)(bn_add_words(t, r,&(r[n2]), n2)); 734 735 if (neg) /* if t[32] is negative */ 736 { 737 c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2)); 738 } else { 739 /* Might have a carry */ 740 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 741 } 742 743 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 744 * r[10] holds (a[0]*b[0]) 745 * r[32] holds (b[1]*b[1]) 746 * c1 holds the carry bits 747 */ 748 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 749 if (c1) { 750 p = &(r[n + n2]); 751 lo= *p; 752 ln = (lo + c1)&BN_MASK2; 753 *p = ln; 754 755 /* The overflow will stop before we over write 756 * words we should not overwrite */ 757 if (ln < (BN_ULONG)c1) { 758 do { 759 p++; 760 lo= *p; 761 ln = (lo + 1) & BN_MASK2; 762 *p = ln; 763 } while (ln == 0); 764 } 765 } 766} 767#endif /* BN_RECURSION */ 768 769#ifndef HAVE_BN_MUL 770#ifndef BN_RECURSION 771int 772bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) 773{ 774 bn_mul_normal(r->d, a->d, a->top, b->d, b->top); 775 776 return 1; 777} 778 779#else /* BN_RECURSION */ 780int 781bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx) 782{ 783 BIGNUM *t = NULL; 784 int al, bl, i, k; 785 int j = 0; 786 int ret = 0; 787 788 BN_CTX_start(ctx); 789 790 al = a->top; 791 bl = b->top; 792 793 i = al - bl; 794 795 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { 796 if (i >= -1 && i <= 1) { 797 /* Find out the power of two lower or equal 798 to the longest of the two numbers */ 799 if (i >= 0) { 800 j = BN_num_bits_word((BN_ULONG)al); 801 } 802 if (i == -1) { 803 j = BN_num_bits_word((BN_ULONG)bl); 804 } 805 j = 1 << (j - 1); 806 assert(j <= al || j <= bl); 807 k = j + j; 808 if ((t = BN_CTX_get(ctx)) == NULL) 809 goto err; 810 if (al > j || bl > j) { 811 if (!bn_wexpand(t, k * 4)) 812 goto err; 813 if (!bn_wexpand(r, k * 4)) 814 goto err; 815 bn_mul_part_recursive(r->d, a->d, b->d, 816 j, al - j, bl - j, t->d); 817 } 818 else /* al <= j || bl <= j */ 819 { 820 if (!bn_wexpand(t, k * 2)) 821 goto err; 822 if (!bn_wexpand(r, k * 2)) 823 goto err; 824 bn_mul_recursive(r->d, a->d, b->d, 825 j, al - j, bl - j, t->d); 826 } 827 r->top = rn; 828 goto end; 829 } 830#if 0 831 if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) { 832 BIGNUM *tmp_bn = (BIGNUM *)b; 833 if (!bn_wexpand(tmp_bn, al)) 834 goto err; 835 tmp_bn->d[bl] = 0; 836 bl++; 837 i--; 838 } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { 839 BIGNUM *tmp_bn = (BIGNUM *)a; 840 if (!bn_wexpand(tmp_bn, bl)) 841 goto err; 842 tmp_bn->d[al] = 0; 843 al++; 844 i++; 845 } 846 if (i == 0) { 847 /* symmetric and > 4 */ 848 /* 16 or larger */ 849 j = BN_num_bits_word((BN_ULONG)al); 850 j = 1 << (j - 1); 851 k = j + j; 852 if ((t = BN_CTX_get(ctx)) == NULL) 853 goto err; 854 if (al == j) /* exact multiple */ 855 { 856 if (!bn_wexpand(t, k * 2)) 857 goto err; 858 if (!bn_wexpand(r, k * 2)) 859 goto err; 860 bn_mul_recursive(r->d, a->d, b->d, al, t->d); 861 } else { 862 if (!bn_wexpand(t, k * 4)) 863 goto err; 864 if (!bn_wexpand(r, k * 4)) 865 goto err; 866 bn_mul_part_recursive(r->d, a->d, b->d, 867 al - j, j, t->d); 868 } 869 r->top = top; 870 goto end; 871 } 872#endif 873 } 874 875 bn_mul_normal(r->d, a->d, al, b->d, bl); 876 877 end: 878 ret = 1; 879 err: 880 BN_CTX_end(ctx); 881 882 return ret; 883} 884#endif /* BN_RECURSION */ 885#endif /* HAVE_BN_MUL */ 886 887int 888BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 889{ 890 BIGNUM *rr; 891 int rn; 892 int ret = 0; 893 894 BN_CTX_start(ctx); 895 896 if (BN_is_zero(a) || BN_is_zero(b)) { 897 BN_zero(r); 898 goto done; 899 } 900 901 rr = r; 902 if (rr == a || rr == b) 903 rr = BN_CTX_get(ctx); 904 if (rr == NULL) 905 goto err; 906 907 rn = a->top + b->top; 908 if (rn < a->top) 909 goto err; 910 if (!bn_wexpand(rr, rn)) 911 goto err; 912 913 if (a->top == 4 && b->top == 4) { 914 bn_mul_comba4(rr->d, a->d, b->d); 915 } else if (a->top == 8 && b->top == 8) { 916 bn_mul_comba8(rr->d, a->d, b->d); 917 } else { 918 if (!bn_mul(rr, a, b, rn, ctx)) 919 goto err; 920 } 921 922 rr->top = rn; 923 bn_correct_top(rr); 924 925 BN_set_negative(rr, a->neg ^ b->neg); 926 927 if (r != rr) 928 BN_copy(r, rr); 929 done: 930 ret = 1; 931 err: 932 BN_CTX_end(ctx); 933 934 return ret; 935} 936