1331766Sken// SPDX-License-Identifier: GPL-2.0-or-later 2331766Sken/* mpihelp-mul.c - MPI helper functions 3331766Sken * Copyright (C) 1994, 1996, 1998, 1999, 4331766Sken * 2000 Free Software Foundation, Inc. 5331766Sken * 6331766Sken * This file is part of GnuPG. 7331766Sken * 8331766Sken * Note: This code is heavily based on the GNU MP Library. 9331766Sken * Actually it's the same code with only minor changes in the 10331766Sken * way the data is stored; this is to support the abstraction 11331766Sken * of an optional secure memory allocation which may be used 12331766Sken * to avoid revealing of sensitive data due to paging etc. 13331766Sken * The GNU MP Library itself is published under the LGPL; 14331766Sken * however I decided to publish this code under the plain GPL. 15331766Sken */ 16331766Sken 17331766Sken#include <linux/string.h> 18331766Sken#include "mpi-internal.h" 19331766Sken#include "longlong.h" 20331766Sken 21331766Sken#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ 22331766Sken do { \ 23331766Sken if ((size) < KARATSUBA_THRESHOLD) \ 24331766Sken mul_n_basecase(prodp, up, vp, size); \ 25331766Sken else \ 26331766Sken mul_n(prodp, up, vp, size, tspace); \ 27331766Sken } while (0); 28331766Sken 29331766Sken#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ 30331766Sken do { \ 31331766Sken if ((size) < KARATSUBA_THRESHOLD) \ 32331766Sken mpih_sqr_n_basecase(prodp, up, size); \ 33331766Sken else \ 34331766Sken mpih_sqr_n(prodp, up, size, tspace); \ 35331766Sken } while (0); 36331766Sken 37331766Sken/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), 38331766Sken * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are 39331766Sken * always stored. Return the most significant limb. 40331766Sken * 41331766Sken * Argument constraints: 42331766Sken * 1. PRODP != UP and PRODP != VP, i.e. the destination 43331766Sken * must be distinct from the multiplier and the multiplicand. 44331766Sken * 45331766Sken * 46331766Sken * Handle simple cases with traditional multiplication. 47331766Sken * 48331766Sken * This is the most critical code of multiplication. All multiplies rely 49331766Sken * on this, both small and huge. Small ones arrive here immediately. Huge 50331766Sken * ones arrive here as this is the base case for Karatsuba's recursive 51331766Sken * algorithm below. 52331766Sken */ 53331766Sken 54331766Skenstatic mpi_limb_t 55331766Skenmul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 56331766Sken{ 57331766Sken mpi_size_t i; 58331766Sken mpi_limb_t cy; 59331766Sken mpi_limb_t v_limb; 60331766Sken 61331766Sken /* Multiply by the first limb in V separately, as the result can be 62331766Sken * stored (not added) to PROD. We also avoid a loop for zeroing. */ 63331766Sken v_limb = vp[0]; 64331766Sken if (v_limb <= 1) { 65331766Sken if (v_limb == 1) 66331766Sken MPN_COPY(prodp, up, size); 67331766Sken else 68331766Sken MPN_ZERO(prodp, size); 69331766Sken cy = 0; 70331766Sken } else 71331766Sken cy = mpihelp_mul_1(prodp, up, size, v_limb); 72331766Sken 73331766Sken prodp[size] = cy; 74331766Sken prodp++; 75331766Sken 76331766Sken /* For each iteration in the outer loop, multiply one limb from 77331766Sken * U with one limb from V, and add it to PROD. */ 78331766Sken for (i = 1; i < size; i++) { 79331766Sken v_limb = vp[i]; 80331766Sken if (v_limb <= 1) { 81331766Sken cy = 0; 82331766Sken if (v_limb == 1) 83331766Sken cy = mpihelp_add_n(prodp, prodp, up, size); 84331766Sken } else 85331766Sken cy = mpihelp_addmul_1(prodp, up, size, v_limb); 86331766Sken 87331766Sken prodp[size] = cy; 88331766Sken prodp++; 89331766Sken } 90331766Sken 91331766Sken return cy; 92331766Sken} 93331766Sken 94331766Skenstatic void 95331766Skenmul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, 96331766Sken mpi_size_t size, mpi_ptr_t tspace) 97331766Sken{ 98331766Sken if (size & 1) { 99331766Sken /* The size is odd, and the code below doesn't handle that. 100331766Sken * Multiply the least significant (size - 1) limbs with a recursive 101331766Sken * call, and handle the most significant limb of S1 and S2 102331766Sken * separately. 103331766Sken * A slightly faster way to do this would be to make the Karatsuba 104331766Sken * code below behave as if the size were even, and let it check for 105331766Sken * odd size in the end. I.e., in essence move this code to the end. 106331766Sken * Doing so would save us a recursive call, and potentially make the 107331766Sken * stack grow a lot less. 108331766Sken */ 109331766Sken mpi_size_t esize = size - 1; /* even size */ 110331766Sken mpi_limb_t cy_limb; 111331766Sken 112331766Sken MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); 113331766Sken cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); 114331766Sken prodp[esize + esize] = cy_limb; 115331766Sken cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); 116331766Sken prodp[esize + size] = cy_limb; 117331766Sken } else { 118331766Sken /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. 119331766Sken * 120331766Sken * Split U in two pieces, U1 and U0, such that 121331766Sken * U = U0 + U1*(B**n), 122331766Sken * and V in V1 and V0, such that 123331766Sken * V = V0 + V1*(B**n). 124331766Sken * 125331766Sken * UV is then computed recursively using the identity 126331766Sken * 127331766Sken * 2n n n n 128331766Sken * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V 129331766Sken * 1 1 1 0 0 1 0 0 130331766Sken * 131331766Sken * Where B = 2**BITS_PER_MP_LIMB. 132331766Sken */ 133331766Sken mpi_size_t hsize = size >> 1; 134331766Sken mpi_limb_t cy; 135331766Sken int negflg; 136331766Sken 137331766Sken /* Product H. ________________ ________________ 138331766Sken * |_____U1 x V1____||____U0 x V0_____| 139331766Sken * Put result in upper part of PROD and pass low part of TSPACE 140331766Sken * as new TSPACE. 141331766Sken */ 142331766Sken MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, 143331766Sken tspace); 144331766Sken 145331766Sken /* Product M. ________________ 146331766Sken * |_(U1-U0)(V0-V1)_| 147331766Sken */ 148331766Sken if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { 149331766Sken mpihelp_sub_n(prodp, up + hsize, up, hsize); 150331766Sken negflg = 0; 151331766Sken } else { 152331766Sken mpihelp_sub_n(prodp, up, up + hsize, hsize); 153331766Sken negflg = 1; 154331766Sken } 155331766Sken if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { 156331766Sken mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); 157331766Sken negflg ^= 1; 158331766Sken } else { 159331766Sken mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); 160331766Sken /* No change of NEGFLG. */ 161331766Sken } 162331766Sken /* Read temporary operands from low part of PROD. 163331766Sken * Put result in low part of TSPACE using upper part of TSPACE 164331766Sken * as new TSPACE. 165331766Sken */ 166331766Sken MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, 167331766Sken tspace + size); 168331766Sken 169331766Sken /* Add/copy product H. */ 170331766Sken MPN_COPY(prodp + hsize, prodp + size, hsize); 171331766Sken cy = mpihelp_add_n(prodp + size, prodp + size, 172331766Sken prodp + size + hsize, hsize); 173331766Sken 174331766Sken /* Add product M (if NEGFLG M is a negative number) */ 175331766Sken if (negflg) 176331766Sken cy -= 177331766Sken mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, 178331766Sken size); 179331766Sken else 180331766Sken cy += 181331766Sken mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, 182331766Sken size); 183331766Sken 184331766Sken /* Product L. ________________ ________________ 185331766Sken * |________________||____U0 x V0_____| 186331766Sken * Read temporary operands from low part of PROD. 187331766Sken * Put result in low part of TSPACE using upper part of TSPACE 188331766Sken * as new TSPACE. 189331766Sken */ 190331766Sken MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); 191331766Sken 192331766Sken /* Add/copy Product L (twice) */ 193331766Sken 194331766Sken cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 195331766Sken if (cy) 196331766Sken mpihelp_add_1(prodp + hsize + size, 197331766Sken prodp + hsize + size, hsize, cy); 198331766Sken 199331766Sken MPN_COPY(prodp, tspace, hsize); 200331766Sken cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 201331766Sken hsize); 202331766Sken if (cy) 203331766Sken mpihelp_add_1(prodp + size, prodp + size, size, 1); 204331766Sken } 205331766Sken} 206331766Sken 207331766Skenvoid mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) 208331766Sken{ 209331766Sken mpi_size_t i; 210331766Sken mpi_limb_t cy_limb; 211331766Sken mpi_limb_t v_limb; 212331766Sken 213331766Sken /* Multiply by the first limb in V separately, as the result can be 214331766Sken * stored (not added) to PROD. We also avoid a loop for zeroing. */ 215331766Sken v_limb = up[0]; 216331766Sken if (v_limb <= 1) { 217331766Sken if (v_limb == 1) 218331766Sken MPN_COPY(prodp, up, size); 219331766Sken else 220331766Sken MPN_ZERO(prodp, size); 221331766Sken cy_limb = 0; 222331766Sken } else 223331766Sken cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); 224331766Sken 225331766Sken prodp[size] = cy_limb; 226331766Sken prodp++; 227331766Sken 228331766Sken /* For each iteration in the outer loop, multiply one limb from 229331766Sken * U with one limb from V, and add it to PROD. */ 230331766Sken for (i = 1; i < size; i++) { 231331766Sken v_limb = up[i]; 232331766Sken if (v_limb <= 1) { 233331766Sken cy_limb = 0; 234331766Sken if (v_limb == 1) 235331766Sken cy_limb = mpihelp_add_n(prodp, prodp, up, size); 236331766Sken } else 237331766Sken cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); 238331766Sken 239331766Sken prodp[size] = cy_limb; 240331766Sken prodp++; 241331766Sken } 242331766Sken} 243331766Sken 244331766Skenvoid 245331766Skenmpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) 246331766Sken{ 247331766Sken if (size & 1) { 248331766Sken /* The size is odd, and the code below doesn't handle that. 249331766Sken * Multiply the least significant (size - 1) limbs with a recursive 250331766Sken * call, and handle the most significant limb of S1 and S2 251331766Sken * separately. 252331766Sken * A slightly faster way to do this would be to make the Karatsuba 253331766Sken * code below behave as if the size were even, and let it check for 254331766Sken * odd size in the end. I.e., in essence move this code to the end. 255331766Sken * Doing so would save us a recursive call, and potentially make the 256331766Sken * stack grow a lot less. 257331766Sken */ 258331766Sken mpi_size_t esize = size - 1; /* even size */ 259331766Sken mpi_limb_t cy_limb; 260331766Sken 261331766Sken MPN_SQR_N_RECURSE(prodp, up, esize, tspace); 262331766Sken cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); 263331766Sken prodp[esize + esize] = cy_limb; 264331766Sken cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); 265331766Sken 266331766Sken prodp[esize + size] = cy_limb; 267331766Sken } else { 268331766Sken mpi_size_t hsize = size >> 1; 269331766Sken mpi_limb_t cy; 270331766Sken 271331766Sken /* Product H. ________________ ________________ 272331766Sken * |_____U1 x U1____||____U0 x U0_____| 273331766Sken * Put result in upper part of PROD and pass low part of TSPACE 274331766Sken * as new TSPACE. 275331766Sken */ 276331766Sken MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); 277331766Sken 278331766Sken /* Product M. ________________ 279331766Sken * |_(U1-U0)(U0-U1)_| 280331766Sken */ 281331766Sken if (mpihelp_cmp(up + hsize, up, hsize) >= 0) 282331766Sken mpihelp_sub_n(prodp, up + hsize, up, hsize); 283331766Sken else 284331766Sken mpihelp_sub_n(prodp, up, up + hsize, hsize); 285331766Sken 286331766Sken /* Read temporary operands from low part of PROD. 287331766Sken * Put result in low part of TSPACE using upper part of TSPACE 288331766Sken * as new TSPACE. */ 289331766Sken MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); 290331766Sken 291331766Sken /* Add/copy product H */ 292331766Sken MPN_COPY(prodp + hsize, prodp + size, hsize); 293331766Sken cy = mpihelp_add_n(prodp + size, prodp + size, 294331766Sken prodp + size + hsize, hsize); 295331766Sken 296331766Sken /* Add product M (if NEGFLG M is a negative number). */ 297331766Sken cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); 298331766Sken 299331766Sken /* Product L. ________________ ________________ 300331766Sken * |________________||____U0 x U0_____| 301331766Sken * Read temporary operands from low part of PROD. 302331766Sken * Put result in low part of TSPACE using upper part of TSPACE 303331766Sken * as new TSPACE. */ 304331766Sken MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); 305331766Sken 306331766Sken /* Add/copy Product L (twice). */ 307331766Sken cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 308331766Sken if (cy) 309331766Sken mpihelp_add_1(prodp + hsize + size, 310331766Sken prodp + hsize + size, hsize, cy); 311331766Sken 312331766Sken MPN_COPY(prodp, tspace, hsize); 313331766Sken cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 314331766Sken hsize); 315331766Sken if (cy) 316331766Sken mpihelp_add_1(prodp + size, prodp + size, size, 1); 317331766Sken } 318331766Sken} 319331766Sken 320331766Sken 321331766Skenvoid mpihelp_mul_n(mpi_ptr_t prodp, 322331766Sken mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 323331766Sken{ 324331766Sken if (up == vp) { 325331766Sken if (size < KARATSUBA_THRESHOLD) 326331766Sken mpih_sqr_n_basecase(prodp, up, size); 327331766Sken else { 328331766Sken mpi_ptr_t tspace; 329331766Sken tspace = mpi_alloc_limb_space(2 * size); 330331766Sken mpih_sqr_n(prodp, up, size, tspace); 331331766Sken mpi_free_limb_space(tspace); 332331766Sken } 333331766Sken } else { 334331766Sken if (size < KARATSUBA_THRESHOLD) 335331766Sken mul_n_basecase(prodp, up, vp, size); 336331766Sken else { 337331766Sken mpi_ptr_t tspace; 338331766Sken tspace = mpi_alloc_limb_space(2 * size); 339331766Sken mul_n(prodp, up, vp, size, tspace); 340331766Sken mpi_free_limb_space(tspace); 341331766Sken } 342331766Sken } 343331766Sken} 344331766Sken 345331766Skenint 346331766Skenmpihelp_mul_karatsuba_case(mpi_ptr_t prodp, 347331766Sken mpi_ptr_t up, mpi_size_t usize, 348331766Sken mpi_ptr_t vp, mpi_size_t vsize, 349331766Sken struct karatsuba_ctx *ctx) 350331766Sken{ 351331766Sken mpi_limb_t cy; 352331766Sken 353331766Sken if (!ctx->tspace || ctx->tspace_size < vsize) { 354331766Sken if (ctx->tspace) 355331766Sken mpi_free_limb_space(ctx->tspace); 356331766Sken ctx->tspace = mpi_alloc_limb_space(2 * vsize); 357331766Sken if (!ctx->tspace) 358331766Sken return -ENOMEM; 359331766Sken ctx->tspace_size = vsize; 360331766Sken } 361331766Sken 362331766Sken MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); 363331766Sken 364331766Sken prodp += vsize; 365331766Sken up += vsize; 366331766Sken usize -= vsize; 367331766Sken if (usize >= vsize) { 368331766Sken if (!ctx->tp || ctx->tp_size < vsize) { 369331766Sken if (ctx->tp) 370331766Sken mpi_free_limb_space(ctx->tp); 371331766Sken ctx->tp = mpi_alloc_limb_space(2 * vsize); 372331766Sken if (!ctx->tp) { 373331766Sken if (ctx->tspace) 374331766Sken mpi_free_limb_space(ctx->tspace); 375331766Sken ctx->tspace = NULL; 376331766Sken return -ENOMEM; 377331766Sken } 378331766Sken ctx->tp_size = vsize; 379331766Sken } 380331766Sken 381331766Sken do { 382331766Sken MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); 383331766Sken cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); 384331766Sken mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, 385331766Sken cy); 386331766Sken prodp += vsize; 387331766Sken up += vsize; 388331766Sken usize -= vsize; 389331766Sken } while (usize >= vsize); 390331766Sken } 391331766Sken 392331766Sken if (usize) { 393331766Sken if (usize < KARATSUBA_THRESHOLD) { 394331766Sken mpi_limb_t tmp; 395331766Sken if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) 396331766Sken < 0) 397331766Sken return -ENOMEM; 398331766Sken } else { 399331766Sken if (!ctx->next) { 400331766Sken ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); 401331766Sken if (!ctx->next) 402331766Sken return -ENOMEM; 403331766Sken } 404331766Sken if (mpihelp_mul_karatsuba_case(ctx->tspace, 405331766Sken vp, vsize, 406331766Sken up, usize, 407331766Sken ctx->next) < 0) 408331766Sken return -ENOMEM; 409331766Sken } 410331766Sken 411331766Sken cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); 412331766Sken mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); 413331766Sken } 414331766Sken 415331766Sken return 0; 416331766Sken} 417331766Sken 418331766Skenvoid mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) 419331766Sken{ 420331766Sken struct karatsuba_ctx *ctx2; 421331766Sken 422331766Sken if (ctx->tp) 423331766Sken mpi_free_limb_space(ctx->tp); 424331766Sken if (ctx->tspace) 425331766Sken mpi_free_limb_space(ctx->tspace); 426331766Sken for (ctx = ctx->next; ctx; ctx = ctx2) { 427331766Sken ctx2 = ctx->next; 428331766Sken if (ctx->tp) 429331766Sken mpi_free_limb_space(ctx->tp); 430331766Sken if (ctx->tspace) 431331766Sken mpi_free_limb_space(ctx->tspace); 432331766Sken kfree(ctx); 433331766Sken } 434331766Sken} 435331766Sken 436331766Sken/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) 437331766Sken * and v (pointed to by VP, with VSIZE limbs), and store the result at 438331766Sken * PRODP. USIZE + VSIZE limbs are always stored, but if the input 439331766Sken * operands are normalized. Return the most significant limb of the 440331766Sken * result. 441331766Sken * 442331766Sken * NOTE: The space pointed to by PRODP is overwritten before finished 443331766Sken * with U and V, so overlap is an error. 444331766Sken * 445331766Sken * Argument constraints: 446331766Sken * 1. USIZE >= VSIZE. 447331766Sken * 2. PRODP != UP and PRODP != VP, i.e. the destination 448331766Sken * must be distinct from the multiplier and the multiplicand. 449331766Sken */ 450331766Sken 451331766Skenint 452331766Skenmpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, 453331766Sken mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) 454331766Sken{ 455331766Sken mpi_ptr_t prod_endp = prodp + usize + vsize - 1; 456331766Sken mpi_limb_t cy; 457331766Sken struct karatsuba_ctx ctx; 458331766Sken 459331766Sken if (vsize < KARATSUBA_THRESHOLD) { 460331766Sken mpi_size_t i; 461331766Sken mpi_limb_t v_limb; 462331766Sken 463331766Sken if (!vsize) { 464331766Sken *_result = 0; 465331766Sken return 0; 466331766Sken } 467331766Sken 468331766Sken /* Multiply by the first limb in V separately, as the result can be 469331766Sken * stored (not added) to PROD. We also avoid a loop for zeroing. */ 470331766Sken v_limb = vp[0]; 471331766Sken if (v_limb <= 1) { 472331766Sken if (v_limb == 1) 473331766Sken MPN_COPY(prodp, up, usize); 474331766Sken else 475331766Sken MPN_ZERO(prodp, usize); 476331766Sken cy = 0; 477331766Sken } else 478331766Sken cy = mpihelp_mul_1(prodp, up, usize, v_limb); 479331766Sken 480331766Sken prodp[usize] = cy; 481331766Sken prodp++; 482331766Sken 483331766Sken /* For each iteration in the outer loop, multiply one limb from 484331766Sken * U with one limb from V, and add it to PROD. */ 485331766Sken for (i = 1; i < vsize; i++) { 486331766Sken v_limb = vp[i]; 487331766Sken if (v_limb <= 1) { 488331766Sken cy = 0; 489331766Sken if (v_limb == 1) 490331766Sken cy = mpihelp_add_n(prodp, prodp, up, 491331766Sken usize); 492331766Sken } else 493331766Sken cy = mpihelp_addmul_1(prodp, up, usize, v_limb); 494331766Sken 495331766Sken prodp[usize] = cy; 496331766Sken prodp++; 497331766Sken } 498331766Sken 499331766Sken *_result = cy; 500331766Sken return 0; 501331766Sken } 502331766Sken 503331766Sken memset(&ctx, 0, sizeof ctx); 504331766Sken if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) 505331766Sken return -ENOMEM; 506331766Sken mpihelp_release_karatsuba_ctx(&ctx); 507331766Sken *_result = *prod_endp; 508331766Sken return 0; 509331766Sken} 510331766Sken