1/* mpih-div.c - MPI helper functions 2 * Copyright (C) 1994, 1996, 1998, 2000, 3 * 2001, 2002 Free Software Foundation, Inc. 4 * 5 * This file is part of Libgcrypt. 6 * 7 * Libgcrypt is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Lesser General Public License as 9 * published by the Free Software Foundation; either version 2.1 of 10 * the License, or (at your option) any later version. 11 * 12 * Libgcrypt is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 20 * 21 * Note: This code is heavily based on the GNU MP Library. 22 * Actually it's the same code with only minor changes in the 23 * way the data is stored; this is to support the abstraction 24 * of an optional secure memory allocation which may be used 25 * to avoid revealing of sensitive data due to paging etc. 26 */ 27 28#include <config.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include "mpi-internal.h" 32#include "longlong.h" 33 34#ifndef UMUL_TIME 35#define UMUL_TIME 1 36#endif 37#ifndef UDIV_TIME 38#define UDIV_TIME UMUL_TIME 39#endif 40 41/* FIXME: We should be using invert_limb (or invert_normalized_limb) 42 * here (not udiv_qrnnd). 43 */ 44 45mpi_limb_t 46_gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 47 mpi_limb_t divisor_limb) 48{ 49 mpi_size_t i; 50 mpi_limb_t n1, n0, r; 51 int dummy; 52 53 /* Botch: Should this be handled at all? Rely on callers? */ 54 if( !dividend_size ) 55 return 0; 56 57 /* If multiplication is much faster than division, and the 58 * dividend is large, pre-invert the divisor, and use 59 * only multiplications in the inner loop. 60 * 61 * This test should be read: 62 * Does it ever help to use udiv_qrnnd_preinv? 63 * && Does what we save compensate for the inversion overhead? 64 */ 65 if( UDIV_TIME > (2 * UMUL_TIME + 6) 66 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { 67 int normalization_steps; 68 69 count_leading_zeros( normalization_steps, divisor_limb ); 70 if( normalization_steps ) { 71 mpi_limb_t divisor_limb_inverted; 72 73 divisor_limb <<= normalization_steps; 74 75 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 76 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 77 * most significant bit (with weight 2**N) implicit. 78 * 79 * Special case for DIVISOR_LIMB == 100...000. 80 */ 81 if( !(divisor_limb << 1) ) 82 divisor_limb_inverted = ~(mpi_limb_t)0; 83 else 84 udiv_qrnnd(divisor_limb_inverted, dummy, 85 -divisor_limb, 0, divisor_limb); 86 87 n1 = dividend_ptr[dividend_size - 1]; 88 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 89 90 /* Possible optimization: 91 * if (r == 0 92 * && divisor_limb > ((n1 << normalization_steps) 93 * | (dividend_ptr[dividend_size - 2] >> ...))) 94 * ...one division less... 95 */ 96 for( i = dividend_size - 2; i >= 0; i--) { 97 n0 = dividend_ptr[i]; 98 UDIV_QRNND_PREINV(dummy, r, r, 99 ((n1 << normalization_steps) 100 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 101 divisor_limb, divisor_limb_inverted); 102 n1 = n0; 103 } 104 UDIV_QRNND_PREINV(dummy, r, r, 105 n1 << normalization_steps, 106 divisor_limb, divisor_limb_inverted); 107 return r >> normalization_steps; 108 } 109 else { 110 mpi_limb_t divisor_limb_inverted; 111 112 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 113 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 114 * most significant bit (with weight 2**N) implicit. 115 * 116 * Special case for DIVISOR_LIMB == 100...000. 117 */ 118 if( !(divisor_limb << 1) ) 119 divisor_limb_inverted = ~(mpi_limb_t)0; 120 else 121 udiv_qrnnd(divisor_limb_inverted, dummy, 122 -divisor_limb, 0, divisor_limb); 123 124 i = dividend_size - 1; 125 r = dividend_ptr[i]; 126 127 if( r >= divisor_limb ) 128 r = 0; 129 else 130 i--; 131 132 for( ; i >= 0; i--) { 133 n0 = dividend_ptr[i]; 134 UDIV_QRNND_PREINV(dummy, r, r, 135 n0, divisor_limb, divisor_limb_inverted); 136 } 137 return r; 138 } 139 } 140 else { 141 if( UDIV_NEEDS_NORMALIZATION ) { 142 int normalization_steps; 143 144 count_leading_zeros(normalization_steps, divisor_limb); 145 if( normalization_steps ) { 146 divisor_limb <<= normalization_steps; 147 148 n1 = dividend_ptr[dividend_size - 1]; 149 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 150 151 /* Possible optimization: 152 * if (r == 0 153 * && divisor_limb > ((n1 << normalization_steps) 154 * | (dividend_ptr[dividend_size - 2] >> ...))) 155 * ...one division less... 156 */ 157 for(i = dividend_size - 2; i >= 0; i--) { 158 n0 = dividend_ptr[i]; 159 udiv_qrnnd (dummy, r, r, 160 ((n1 << normalization_steps) 161 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 162 divisor_limb); 163 n1 = n0; 164 } 165 udiv_qrnnd (dummy, r, r, 166 n1 << normalization_steps, 167 divisor_limb); 168 return r >> normalization_steps; 169 } 170 } 171 /* No normalization needed, either because udiv_qrnnd doesn't require 172 * it, or because DIVISOR_LIMB is already normalized. */ 173 i = dividend_size - 1; 174 r = dividend_ptr[i]; 175 176 if(r >= divisor_limb) 177 r = 0; 178 else 179 i--; 180 181 for(; i >= 0; i--) { 182 n0 = dividend_ptr[i]; 183 udiv_qrnnd (dummy, r, r, n0, divisor_limb); 184 } 185 return r; 186 } 187} 188 189/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write 190 * the NSIZE-DSIZE least significant quotient limbs at QP 191 * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is 192 * non-zero, generate that many fraction bits and append them after the 193 * other quotient limbs. 194 * Return the most significant limb of the quotient, this is always 0 or 1. 195 * 196 * Preconditions: 197 * 0. NSIZE >= DSIZE. 198 * 1. The most significant bit of the divisor must be set. 199 * 2. QP must either not overlap with the input operands at all, or 200 * QP + DSIZE >= NP must hold true. (This means that it's 201 * possible to put the quotient in the high part of NUM, right after the 202 * remainder in NUM. 203 * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. 204 */ 205 206mpi_limb_t 207_gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, 208 mpi_ptr_t np, mpi_size_t nsize, 209 mpi_ptr_t dp, mpi_size_t dsize) 210{ 211 mpi_limb_t most_significant_q_limb = 0; 212 213 switch(dsize) { 214 case 0: 215 /* We are asked to divide by zero, so go ahead and do it! (To make 216 the compiler not remove this statement, return the value.) */ 217 return 1 / dsize; 218 219 case 1: 220 { 221 mpi_size_t i; 222 mpi_limb_t n1; 223 mpi_limb_t d; 224 225 d = dp[0]; 226 n1 = np[nsize - 1]; 227 228 if( n1 >= d ) { 229 n1 -= d; 230 most_significant_q_limb = 1; 231 } 232 233 qp += qextra_limbs; 234 for( i = nsize - 2; i >= 0; i--) 235 udiv_qrnnd( qp[i], n1, n1, np[i], d ); 236 qp -= qextra_limbs; 237 238 for( i = qextra_limbs - 1; i >= 0; i-- ) 239 udiv_qrnnd (qp[i], n1, n1, 0, d); 240 241 np[0] = n1; 242 } 243 break; 244 245 case 2: 246 { 247 mpi_size_t i; 248 mpi_limb_t n1, n0, n2; 249 mpi_limb_t d1, d0; 250 251 np += nsize - 2; 252 d1 = dp[1]; 253 d0 = dp[0]; 254 n1 = np[1]; 255 n0 = np[0]; 256 257 if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) { 258 sub_ddmmss (n1, n0, n1, n0, d1, d0); 259 most_significant_q_limb = 1; 260 } 261 262 for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) { 263 mpi_limb_t q; 264 mpi_limb_t r; 265 266 if( i >= qextra_limbs ) 267 np--; 268 else 269 np[0] = 0; 270 271 if( n1 == d1 ) { 272 /* Q should be either 111..111 or 111..110. Need special 273 * treatment of this rare case as normal division would 274 * give overflow. */ 275 q = ~(mpi_limb_t)0; 276 277 r = n0 + d1; 278 if( r < d1 ) { /* Carry in the addition? */ 279 add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 ); 280 qp[i] = q; 281 continue; 282 } 283 n1 = d0 - (d0 != 0?1:0); 284 n0 = -d0; 285 } 286 else { 287 udiv_qrnnd (q, r, n1, n0, d1); 288 umul_ppmm (n1, n0, d0, q); 289 } 290 291 n2 = np[0]; 292 q_test: 293 if( n1 > r || (n1 == r && n0 > n2) ) { 294 /* The estimated Q was too large. */ 295 q--; 296 sub_ddmmss (n1, n0, n1, n0, 0, d0); 297 r += d1; 298 if( r >= d1 ) /* If not carry, test Q again. */ 299 goto q_test; 300 } 301 302 qp[i] = q; 303 sub_ddmmss (n1, n0, r, n2, n1, n0); 304 } 305 np[1] = n1; 306 np[0] = n0; 307 } 308 break; 309 310 default: 311 { 312 mpi_size_t i; 313 mpi_limb_t dX, d1, n0; 314 315 np += nsize - dsize; 316 dX = dp[dsize - 1]; 317 d1 = dp[dsize - 2]; 318 n0 = np[dsize - 1]; 319 320 if( n0 >= dX ) { 321 if(n0 > dX || _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) { 322 _gcry_mpih_sub_n(np, np, dp, dsize); 323 n0 = np[dsize - 1]; 324 most_significant_q_limb = 1; 325 } 326 } 327 328 for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { 329 mpi_limb_t q; 330 mpi_limb_t n1, n2; 331 mpi_limb_t cy_limb; 332 333 if( i >= qextra_limbs ) { 334 np--; 335 n2 = np[dsize]; 336 } 337 else { 338 n2 = np[dsize - 1]; 339 MPN_COPY_DECR (np + 1, np, dsize - 1); 340 np[0] = 0; 341 } 342 343 if( n0 == dX ) { 344 /* This might over-estimate q, but it's probably not worth 345 * the extra code here to find out. */ 346 q = ~(mpi_limb_t)0; 347 } 348 else { 349 mpi_limb_t r; 350 351 udiv_qrnnd(q, r, n0, np[dsize - 1], dX); 352 umul_ppmm(n1, n0, d1, q); 353 354 while( n1 > r || (n1 == r && n0 > np[dsize - 2])) { 355 q--; 356 r += dX; 357 if( r < dX ) /* I.e. "carry in previous addition?" */ 358 break; 359 n1 -= n0 < d1; 360 n0 -= d1; 361 } 362 } 363 364 /* Possible optimization: We already have (q * n0) and (1 * n1) 365 * after the calculation of q. Taking advantage of that, we 366 * could make this loop make two iterations less. */ 367 cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q); 368 369 if( n2 != cy_limb ) { 370 _gcry_mpih_add_n(np, np, dp, dsize); 371 q--; 372 } 373 374 qp[i] = q; 375 n0 = np[dsize - 1]; 376 } 377 } 378 } 379 380 return most_significant_q_limb; 381} 382 383 384/**************** 385 * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. 386 * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. 387 * Return the single-limb remainder. 388 * There are no constraints on the value of the divisor. 389 * 390 * QUOT_PTR and DIVIDEND_PTR might point to the same limb. 391 */ 392 393mpi_limb_t 394_gcry_mpih_divmod_1( mpi_ptr_t quot_ptr, 395 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 396 mpi_limb_t divisor_limb) 397{ 398 mpi_size_t i; 399 mpi_limb_t n1, n0, r; 400 int dummy; 401 402 if( !dividend_size ) 403 return 0; 404 405 /* If multiplication is much faster than division, and the 406 * dividend is large, pre-invert the divisor, and use 407 * only multiplications in the inner loop. 408 * 409 * This test should be read: 410 * Does it ever help to use udiv_qrnnd_preinv? 411 * && Does what we save compensate for the inversion overhead? 412 */ 413 if( UDIV_TIME > (2 * UMUL_TIME + 6) 414 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { 415 int normalization_steps; 416 417 count_leading_zeros( normalization_steps, divisor_limb ); 418 if( normalization_steps ) { 419 mpi_limb_t divisor_limb_inverted; 420 421 divisor_limb <<= normalization_steps; 422 423 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 424 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 425 * most significant bit (with weight 2**N) implicit. 426 */ 427 /* Special case for DIVISOR_LIMB == 100...000. */ 428 if( !(divisor_limb << 1) ) 429 divisor_limb_inverted = ~(mpi_limb_t)0; 430 else 431 udiv_qrnnd(divisor_limb_inverted, dummy, 432 -divisor_limb, 0, divisor_limb); 433 434 n1 = dividend_ptr[dividend_size - 1]; 435 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 436 437 /* Possible optimization: 438 * if (r == 0 439 * && divisor_limb > ((n1 << normalization_steps) 440 * | (dividend_ptr[dividend_size - 2] >> ...))) 441 * ...one division less... 442 */ 443 for( i = dividend_size - 2; i >= 0; i--) { 444 n0 = dividend_ptr[i]; 445 UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r, 446 ((n1 << normalization_steps) 447 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 448 divisor_limb, divisor_limb_inverted); 449 n1 = n0; 450 } 451 UDIV_QRNND_PREINV( quot_ptr[0], r, r, 452 n1 << normalization_steps, 453 divisor_limb, divisor_limb_inverted); 454 return r >> normalization_steps; 455 } 456 else { 457 mpi_limb_t divisor_limb_inverted; 458 459 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 460 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 461 * most significant bit (with weight 2**N) implicit. 462 */ 463 /* Special case for DIVISOR_LIMB == 100...000. */ 464 if( !(divisor_limb << 1) ) 465 divisor_limb_inverted = ~(mpi_limb_t) 0; 466 else 467 udiv_qrnnd(divisor_limb_inverted, dummy, 468 -divisor_limb, 0, divisor_limb); 469 470 i = dividend_size - 1; 471 r = dividend_ptr[i]; 472 473 if( r >= divisor_limb ) 474 r = 0; 475 else 476 quot_ptr[i--] = 0; 477 478 for( ; i >= 0; i-- ) { 479 n0 = dividend_ptr[i]; 480 UDIV_QRNND_PREINV( quot_ptr[i], r, r, 481 n0, divisor_limb, divisor_limb_inverted); 482 } 483 return r; 484 } 485 } 486 else { 487 if(UDIV_NEEDS_NORMALIZATION) { 488 int normalization_steps; 489 490 count_leading_zeros (normalization_steps, divisor_limb); 491 if( normalization_steps ) { 492 divisor_limb <<= normalization_steps; 493 494 n1 = dividend_ptr[dividend_size - 1]; 495 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 496 497 /* Possible optimization: 498 * if (r == 0 499 * && divisor_limb > ((n1 << normalization_steps) 500 * | (dividend_ptr[dividend_size - 2] >> ...))) 501 * ...one division less... 502 */ 503 for( i = dividend_size - 2; i >= 0; i--) { 504 n0 = dividend_ptr[i]; 505 udiv_qrnnd (quot_ptr[i + 1], r, r, 506 ((n1 << normalization_steps) 507 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 508 divisor_limb); 509 n1 = n0; 510 } 511 udiv_qrnnd (quot_ptr[0], r, r, 512 n1 << normalization_steps, 513 divisor_limb); 514 return r >> normalization_steps; 515 } 516 } 517 /* No normalization needed, either because udiv_qrnnd doesn't require 518 * it, or because DIVISOR_LIMB is already normalized. */ 519 i = dividend_size - 1; 520 r = dividend_ptr[i]; 521 522 if(r >= divisor_limb) 523 r = 0; 524 else 525 quot_ptr[i--] = 0; 526 527 for(; i >= 0; i--) { 528 n0 = dividend_ptr[i]; 529 udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb ); 530 } 531 return r; 532 } 533} 534