1/* mpn_powm -- Compute R = U^E mod M. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9Copyright 2007, 2008, 2009 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27/* 28 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd. 29 30 1. W <- U 31 32 2. T <- (B^n * U) mod M Convert to REDC form 33 34 3. Compute table U^1, U^3, U^5... of E-dependent size 35 36 4. While there are more bits in E 37 W <- power left-to-right base-k 38 39 40 TODO: 41 42 * Make getbits a macro, thereby allowing it to update the index operand. 43 That will simplify the code using getbits. (Perhaps make getbits' sibling 44 getbit then have similar form, for symmetry.) 45 46 * Write an itch function. Or perhaps get rid of tp parameter since the huge 47 pp area is allocated locally anyway? 48 49 * Choose window size without looping. (Superoptimize or think(tm).) 50 51 * Handle small bases with initial, reduction-free exponentiation. 52 53 * Call new division functions, not mpn_tdiv_qr. 54 55 * Consider special code for one-limb M. 56 57 * How should we handle the redc1/redc2/redc_n choice? 58 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1)) 59 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2)) 60 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n))) 61 This disregards the addmul_N constant term, but we could think of 62 that as part of the respective mullo. 63 64 * When U (the base) is small, we should start the exponentiation with plain 65 operations, then convert that partial result to REDC form. 66 67 * When U is just one limb, should it be handled without the k-ary tricks? 68 We could keep a factor of B^n in W, but use U' = BU as base. After 69 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B 70 mod M. 71*/ 72 73#include "gmp.h" 74#include "gmp-impl.h" 75#include "longlong.h" 76 77#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 78#define WANT_REDC_2 1 79#endif 80 81#define getbit(p,bi) \ 82 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) 83 84static inline mp_limb_t 85getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) 86{ 87 int nbits_in_r; 88 mp_limb_t r; 89 mp_size_t i; 90 91 if (bi < nbits) 92 { 93 return p[0] & (((mp_limb_t) 1 << bi) - 1); 94 } 95 else 96 { 97 bi -= nbits; /* bit index of low bit to extract */ 98 i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ 99 bi %= GMP_NUMB_BITS; /* bit index in low word */ 100 r = p[i] >> bi; /* extract (low) bits */ 101 nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ 102 if (nbits_in_r < nbits) /* did we get enough bits? */ 103 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ 104 return r & (((mp_limb_t ) 1 << nbits) - 1); 105 } 106} 107 108static inline int 109win_size (mp_bitcnt_t eb) 110{ 111 int k; 112 static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; 113 for (k = 1; eb > x[k]; k++) 114 ; 115 return k; 116} 117 118/* Convert U to REDC form, U_r = B^n * U mod M */ 119static void 120redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) 121{ 122 mp_ptr tp, qp; 123 TMP_DECL; 124 TMP_MARK; 125 126 tp = TMP_ALLOC_LIMBS (un + n); 127 qp = TMP_ALLOC_LIMBS (un + 1); /* FIXME: Put at tp+? */ 128 129 MPN_ZERO (tp, n); 130 MPN_COPY (tp + n, up, un); 131 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); 132 TMP_FREE; 133} 134 135/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] 136 Requires that mp[n-1..0] is odd. 137 Requires that ep[en-1..0] is > 1. 138 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ 139void 140mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, 141 mp_srcptr ep, mp_size_t en, 142 mp_srcptr mp, mp_size_t n, mp_ptr tp) 143{ 144 mp_limb_t ip[2], *mip; 145 int cnt; 146 mp_bitcnt_t ebi; 147 int windowsize, this_windowsize; 148 mp_limb_t expbits; 149 mp_ptr pp, this_pp; 150 long i; 151 TMP_DECL; 152 153 ASSERT (en > 1 || (en == 1 && ep[0] > 1)); 154 ASSERT (n >= 1 && ((mp[0] & 1) != 0)); 155 156 TMP_MARK; 157 158 count_leading_zeros (cnt, ep[en - 1]); 159 ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt; 160 161#if 0 162 if (bn < n) 163 { 164 /* Do the first few exponent bits without mod reductions, 165 until the result is greater than the mod argument. */ 166 for (;;) 167 { 168 mpn_sqr (tp, this_pp, tn); 169 tn = tn * 2 - 1, tn += tp[tn] != 0; 170 if (getbit (ep, ebi) != 0) 171 mpn_mul (..., tp, tn, bp, bn); 172 ebi--; 173 } 174 } 175#endif 176 177 windowsize = win_size (ebi); 178 179#if WANT_REDC_2 180 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 181 { 182 mip = ip; 183 binvert_limb (mip[0], mp[0]); 184 mip[0] = -mip[0]; 185 } 186 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 187 { 188 mip = ip; 189 mpn_binvert (mip, mp, 2, tp); 190 mip[0] = -mip[0]; mip[1] = ~mip[1]; 191 } 192#else 193 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 194 { 195 mip = ip; 196 binvert_limb (mip[0], mp[0]); 197 mip[0] = -mip[0]; 198 } 199#endif 200 else 201 { 202 mip = TMP_ALLOC_LIMBS (n); 203 mpn_binvert (mip, mp, n, tp); 204 } 205 206 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); 207 208 this_pp = pp; 209 redcify (this_pp, bp, bn, mp, n); 210 211 /* Store b^2 at rp. */ 212 mpn_sqr (tp, this_pp, n); 213#if WANT_REDC_2 214 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 215 mpn_redc_1 (rp, tp, mp, n, mip[0]); 216 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 217 mpn_redc_2 (rp, tp, mp, n, mip); 218#else 219 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 220 mpn_redc_1 (rp, tp, mp, n, mip[0]); 221#endif 222 else 223 mpn_redc_n (rp, tp, mp, n, mip); 224 225 /* Precompute odd powers of b and put them in the temporary area at pp. */ 226 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) 227 { 228 mpn_mul_n (tp, this_pp, rp, n); 229 this_pp += n; 230#if WANT_REDC_2 231 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 232 mpn_redc_1 (this_pp, tp, mp, n, mip[0]); 233 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 234 mpn_redc_2 (this_pp, tp, mp, n, mip); 235#else 236 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 237 mpn_redc_1 (this_pp, tp, mp, n, mip[0]); 238#endif 239 else 240 mpn_redc_n (this_pp, tp, mp, n, mip); 241 } 242 243 expbits = getbits (ep, ebi, windowsize); 244 if (ebi < windowsize) 245 ebi = 0; 246 else 247 ebi -= windowsize; 248 249 count_trailing_zeros (cnt, expbits); 250 ebi += cnt; 251 expbits >>= cnt; 252 253 MPN_COPY (rp, pp + n * (expbits >> 1), n); 254 255#define INNERLOOP \ 256 while (ebi != 0) \ 257 { \ 258 while (getbit (ep, ebi) == 0) \ 259 { \ 260 MPN_SQR (tp, rp, n); \ 261 MPN_REDUCE (rp, tp, mp, n, mip); \ 262 ebi--; \ 263 if (ebi == 0) \ 264 goto done; \ 265 } \ 266 \ 267 /* The next bit of the exponent is 1. Now extract the largest \ 268 block of bits <= windowsize, and such that the least \ 269 significant bit is 1. */ \ 270 \ 271 expbits = getbits (ep, ebi, windowsize); \ 272 this_windowsize = windowsize; \ 273 if (ebi < windowsize) \ 274 { \ 275 this_windowsize -= windowsize - ebi; \ 276 ebi = 0; \ 277 } \ 278 else \ 279 ebi -= windowsize; \ 280 \ 281 count_trailing_zeros (cnt, expbits); \ 282 this_windowsize -= cnt; \ 283 ebi += cnt; \ 284 expbits >>= cnt; \ 285 \ 286 do \ 287 { \ 288 MPN_SQR (tp, rp, n); \ 289 MPN_REDUCE (rp, tp, mp, n, mip); \ 290 this_windowsize--; \ 291 } \ 292 while (this_windowsize != 0); \ 293 \ 294 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \ 295 MPN_REDUCE (rp, tp, mp, n, mip); \ 296 } 297 298 299#if WANT_REDC_2 300 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) 301 { 302 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 303 { 304#undef MPN_MUL_N 305#undef MPN_SQR 306#undef MPN_REDUCE 307#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 308#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 309#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 310 INNERLOOP; 311 } 312 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 313 { 314#undef MPN_MUL_N 315#undef MPN_SQR 316#undef MPN_REDUCE 317#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 318#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 319#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 320 INNERLOOP; 321 } 322 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 323 { 324#undef MPN_MUL_N 325#undef MPN_SQR 326#undef MPN_REDUCE 327#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 328#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 329#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 330 INNERLOOP; 331 } 332 else 333 { 334#undef MPN_MUL_N 335#undef MPN_SQR 336#undef MPN_REDUCE 337#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 338#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 339#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 340 INNERLOOP; 341 } 342 } 343 else 344 { 345 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 346 { 347#undef MPN_MUL_N 348#undef MPN_SQR 349#undef MPN_REDUCE 350#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 351#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 352#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 353 INNERLOOP; 354 } 355 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 356 { 357#undef MPN_MUL_N 358#undef MPN_SQR 359#undef MPN_REDUCE 360#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 361#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 362#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 363 INNERLOOP; 364 } 365 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 366 { 367#undef MPN_MUL_N 368#undef MPN_SQR 369#undef MPN_REDUCE 370#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 371#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 372#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 373 INNERLOOP; 374 } 375 else 376 { 377#undef MPN_MUL_N 378#undef MPN_SQR 379#undef MPN_REDUCE 380#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 381#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 382#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 383 INNERLOOP; 384 } 385 } 386 387#else /* WANT_REDC_2 */ 388 389 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) 390 { 391 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 392 { 393#undef MPN_MUL_N 394#undef MPN_SQR 395#undef MPN_REDUCE 396#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 397#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 398#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 399 INNERLOOP; 400 } 401 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 402 { 403#undef MPN_MUL_N 404#undef MPN_SQR 405#undef MPN_REDUCE 406#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 407#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 408#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 409 INNERLOOP; 410 } 411 else 412 { 413#undef MPN_MUL_N 414#undef MPN_SQR 415#undef MPN_REDUCE 416#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 417#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 418#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 419 INNERLOOP; 420 } 421 } 422 else 423 { 424 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 425 { 426#undef MPN_MUL_N 427#undef MPN_SQR 428#undef MPN_REDUCE 429#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 430#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 431#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 432 INNERLOOP; 433 } 434 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 435 { 436#undef MPN_MUL_N 437#undef MPN_SQR 438#undef MPN_REDUCE 439#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 440#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 441#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 442 INNERLOOP; 443 } 444 else 445 { 446#undef MPN_MUL_N 447#undef MPN_SQR 448#undef MPN_REDUCE 449#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 450#define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 451#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 452 INNERLOOP; 453 } 454 } 455#endif /* WANT_REDC_2 */ 456 457 done: 458 459 MPN_COPY (tp, rp, n); 460 MPN_ZERO (tp + n, n); 461 462#if WANT_REDC_2 463 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 464 mpn_redc_1 (rp, tp, mp, n, mip[0]); 465 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 466 mpn_redc_2 (rp, tp, mp, n, mip); 467#else 468 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 469 mpn_redc_1 (rp, tp, mp, n, mip[0]); 470#endif 471 else 472 mpn_redc_n (rp, tp, mp, n, mip); 473 474 if (mpn_cmp (rp, mp, n) >= 0) 475 mpn_sub_n (rp, rp, mp, n); 476 477 TMP_FREE; 478} 479