1/************************************************************** 2 * 3 * giants.c 4 * 5 * Library for large-integer arithmetic. 6 * 7 * The large-gcd implementation is due to J. P. Buhler. 8 * Special mod routines use ideas of R. McIntosh. 9 * Contributions from G. Woltman, A. Powell acknowledged. 10 * 11 * Updates: 12 * 18 Jul 99 REC Added routine fer_mod(), for use when Fermat 13 giant itself is available. 14 * 17 Jul 99 REC Fixed sign bug in fermatmod() 15 * 17 Apr 99 REC Fixed various comment/line wraps 16 * 25 Mar 99 REC G. Woltman/A. Powell fixes Karat. routines 17 * 05 Mar 99 REC Moved invaux, binvaux giants to stack 18 * 05 Mar 99 REC Moved gread/gwrite giants to stack 19 * 05 Mar 99 REC No static on cur_den, cur_recip (A. Powell) 20 * 28 Feb 99 REC Error detection added atop newgiant(). 21 * 27 Feb 99 REC Reduced wasted work in addsignal(). 22 * 27 Feb 99 REC Reduced wasted work in FFTmulg(). 23 * 19 Feb 99 REC Generalized iaddg() per R. Mcintosh. 24 * 2 Feb 99 REC Fixed comments. 25 * 6 Dec 98 REC Fixed yet another Karatsuba glitch. 26 * 1 Dec 98 REC Fixed errant case of addg(). 27 * 28 Nov 98 REC Installed A. Powell's (correct) variant of 28 Karatsuba multiply. 29 * 15 May 98 REC Modified gwrite() to handle huge integers. 30 * 13 May 98 REC Changed to static stack declarations 31 * 11 May 98 REC Installed Karatsuba multiply, to handle 32 * medregion 'twixt grammar- and FFT-multiply. 33 * 1 May 98 JF gshifts now handle bits < 0 correctly. 34 * 30 Apr 98 JF 68k assembler code removed, 35 * stack giant size now invariant and based 36 * on first call of newgiant(), 37 * stack memory leaks fixed. 38 * 29 Apr 98 JF function prototyes cleaned up, 39 * GCD no longer uses personal stack, 40 * optimized shifts for bits%16 == 0. 41 * 27 Apr 98 JF scratch giants now replaced with stack 42 * 20 Apr 98 JF grammarsquareg fixed for asize == 0. 43 * scratch giants now static. 44 * 29 Jan 98 JF Corrected out-of-range errors in 45 * mersennemod and fermatmod. 46 * 23 Dec 97 REC Sped up divide routines via split-shift. 47 * 18 Nov 97 JF Improved mersennemod, fermatmod. 48 * 9 Nov 97 JF Sped up grammarsquareg. 49 * 20 May 97 RDW Fixed Win32 compiler warnings. 50 * 18 May 97 REC Installed new, fast divide. 51 * 17 May 97 REC Reduced memory for FFT multiply. 52 * 26 Apr 97 REC Creation. 53 * 54 * c. 1997,1998 Perfectly Scientific, Inc. 55 * All Rights Reserved. 56 * 57 **************************************************************/ 58 59 60/* Include Files. */ 61 62#include <stdio.h> 63#include <stdlib.h> 64#include <string.h> 65#include <math.h> 66#include "giants.h" 67 68 69/* Compiler options. */ 70 71#ifdef _WIN32 72#pragma warning( disable : 4127 4706 ) /* disable conditional is constant warning */ 73#endif 74 75 76/* Global variables. */ 77 78int error = 0; 79int mulmode = AUTO_MUL; 80int cur_prec = 0; 81int cur_shift = 0; 82static int cur_stack_size = 0; 83static int cur_stack_elem = 0; 84static int stack_glen = 0; 85static giant *stack; 86giant cur_den = NULL, 87 cur_recip = NULL; 88int current_max_size = 0, 89 cur_run = 0; 90double * sinCos=NULL; 91int checkFFTerror = 0; 92double maxFFTerror; 93static giant u0=NULL, u1=NULL, v0=NULL, v1=NULL; 94static double *z = NULL, 95 *z2 = NULL; 96 97/* stack handling functions. */ 98static giant popg(void); 99static void pushg(int); 100 101 102/* Private function prototypes. */ 103 104int gerr(void); 105double gfloor(double); 106int radixdiv(int, int, giant); 107void columnwrite(FILE *, short *, char *, short, int); 108 109void normal_addg(giant, giant); 110void normal_subg(giant, giant); 111void reverse_subg(giant, giant); 112int binvaux(giant, giant); 113int invaux(giant, giant); 114int allzeros(int, int, giant); 115void auxmulg(giant a, giant b); 116void karatmulg(giant a, giant b); 117void karatsquareg(giant b); 118void grammarmulg(giant a, giant b); 119void grammarsquareg(giant b); 120 121int lpt(int, int *); 122void addsignal(giant, double *, int); 123void FFTsquareg(giant x); 124void FFTmulg(giant y, giant x); 125void scramble_real(); 126void fft_real_to_hermitian(double *z, int n); 127void fftinv_hermitian_to_real(double *z, int n); 128void mul_hermitian(double *a, double *b, int n); 129void square_hermitian(double *b, int n); 130void giant_to_double(giant x, int sizex, double *z, int L); 131void gswap(giant *, giant *); 132void onestep(giant, giant, gmatrix); 133void mulvM(gmatrix, giant, giant); 134void mulmM(gmatrix, gmatrix); 135void writeM(gmatrix); 136static void punch(giant, gmatrix); 137static void dotproduct(giant, giant, giant, giant); 138void fix(giant *, giant *); 139void hgcd(int, giant, giant, gmatrix); 140void shgcd(int, int, gmatrix); 141 142 143 144/************************************************************** 145 * 146 * Functions 147 * 148 **************************************************************/ 149 150 151/************************************************************** 152 * 153 * Initialization and utility functions 154 * 155 **************************************************************/ 156 157double 158gfloor( 159 double f 160) 161{ 162 return floor(f); 163} 164 165 166void 167init_sinCos( 168 int n 169) 170{ 171 int j; 172 double e = TWOPI/n; 173 174 if (n<=cur_run) 175 return; 176 cur_run = n; 177 if (sinCos) 178 free(sinCos); 179 sinCos = (double *)malloc(sizeof(double)*(1+(n>>2))); 180 for (j=0;j<=(n>>2);j++) 181 { 182 sinCos[j] = sin(e*j); 183 } 184} 185 186 187double 188s_sin( 189 int n 190) 191{ 192 int seg = n/(cur_run>>2); 193 194 switch (seg) 195 { 196 case 0: return(sinCos[n]); 197 case 1: return(sinCos[(cur_run>>1)-n]); 198 case 2: return(-sinCos[n-(cur_run>>1)]); 199 case 3: return(-sinCos[cur_run-n]); 200 } 201 return 0; 202} 203 204 205double 206s_cos( 207 int n 208) 209{ 210 int quart = (cur_run>>2); 211 212 if (n < quart) 213 return(s_sin(n+quart)); 214 return(-s_sin(n-quart)); 215} 216 217 218int 219gerr(void) 220{ 221 return(error); 222} 223 224 225giant 226popg ( 227 void 228) 229{ 230 int i; 231 232 if (current_max_size <= 0) current_max_size = MAX_SHORTS; 233 234 if (cur_stack_size == 0) { 235/* Initialize the stack if we're just starting out. 236 * Note that all stack giants will be whatever current_max_size is 237 * when newgiant() is first called. */ 238 cur_stack_size = STACK_GROW; 239 stack = (giant *) malloc (cur_stack_size * sizeof(giant)); 240 for(i = 0; i < STACK_GROW; i++) 241 stack[i] = NULL; 242 if (stack_glen == 0) stack_glen = current_max_size; 243 } else if (cur_stack_elem >= cur_stack_size) { 244/* Expand the stack if we need to. */ 245 i = cur_stack_size; 246 cur_stack_size += STACK_GROW; 247 stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); 248 for (; i < cur_stack_size; i++) 249 stack[i] = NULL; 250 } else if (cur_stack_elem < cur_stack_size - 2*STACK_GROW) { 251/* Prune the stack if it's too big. Disabled, so the stack can only expand */ 252 /* cur_stack_size -= STACK_GROW; 253 for (i = cur_stack_size - STACK_GROW; i < cur_stack_size; i++) 254 free(stack[i]); 255 stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); */ 256 } 257 258/* Malloc our giant. */ 259 if (stack[cur_stack_elem] == NULL) 260 stack[cur_stack_elem] = malloc(stack_glen*sizeof(short)+sizeof(int)); 261 stack[cur_stack_elem]->sign = 0; 262 263 return(stack[cur_stack_elem++]); 264} 265 266 267void 268pushg ( 269 int a 270) 271{ 272 if (a < 0) return; 273 cur_stack_elem -= a; 274 if (cur_stack_elem < 0) cur_stack_elem = 0; 275} 276 277 278giant 279newgiant( 280 int numshorts 281) 282{ 283 int size; 284 giant thegiant; 285 286 if (numshorts > MAX_SHORTS) { 287 fprintf(stderr, "Requested giant too big.\n"); 288 fflush(stderr); 289 } 290 if (numshorts<=0) 291 numshorts = MAX_SHORTS; 292 size = numshorts*sizeof(short)+sizeof(int); 293 thegiant = (giant)malloc(size); 294 thegiant->sign = 0; 295 296 if (newmin(2*numshorts,MAX_SHORTS) > current_max_size) 297 current_max_size = newmin(2*numshorts,MAX_SHORTS); 298 299/* If newgiant() is being called for the first time, set the 300 * size of the stack giants. */ 301 if (stack_glen == 0) stack_glen = current_max_size; 302 303 return(thegiant); 304} 305 306 307gmatrix 308newgmatrix( 309 void 310) 311/* Allocates space for a gmatrix struct, but does not 312 * allocate space for the giants. */ 313{ 314 return((gmatrix) malloc (4*sizeof(giant))); 315} 316 317int 318bitlen( 319 giant n 320) 321{ 322 int b = 16, c = 1<<15, w; 323 324 if (isZero(n)) 325 return(0); 326 w = n->n[abs(n->sign) - 1]; 327 while ((w&c) == 0) 328 { 329 b--; 330 c >>= 1; 331 } 332 return (16*(abs(n->sign)-1) + b); 333} 334 335 336int 337bitval( 338 giant n, 339 int pos 340) 341{ 342 int i = abs(pos)>>4, c = 1 << (pos&15); 343 344 return ((n->n[i]) & c); 345} 346 347 348int 349isone( 350 giant g 351) 352{ 353 return((g->sign==1)&&(g->n[0]==1)); 354} 355 356 357int isZero( 358 giant thegiant 359) 360/* Returns TR if thegiant == 0. */ 361{ 362 register int count; 363 int length = abs(thegiant->sign); 364 register unsigned short * numpointer = thegiant->n; 365 366 if (length) 367 { 368 for(count = 0; count<length; ++count,++numpointer) 369 { 370 if (*numpointer != 0 ) 371 return(FA); 372 } 373 } 374 return(TR); 375} 376 377 378void 379gtog( 380 giant srcgiant, 381 giant destgiant 382) 383/* destgiant becomes equal to srcgiant. */ 384{ 385 int numbytes = sizeof(int) + abs(srcgiant->sign)*sizeof(short); 386 387 memcpy((char *)destgiant,(char *)srcgiant,numbytes); 388} 389 390 391void 392itog( 393 int i, 394 giant g 395) 396/* The giant g becomes set to the integer value i. */ 397{ 398 unsigned int j = abs(i); 399 400 if (i==0) 401 { 402 g->sign = 0; 403 g->n[0] = 0; 404 return; 405 } 406 g->n[0] = (unsigned short)(j & 0xFFFF); 407 j >>= 16; 408 if (j) 409 { 410 g->n[1] = (unsigned short)j; 411 g->sign = 2; 412 } 413 else 414 { 415 g->sign = 1; 416 } 417 if (i<0) 418 g->sign = -(g->sign); 419} 420 421 422signed int 423gtoi( 424 giant x 425) 426/* Calculate the value of an int-sized giant NOT exceeding 31 bits. */ 427{ 428 register int size = abs(x->sign); 429 register int sign = (x->sign < 0) ? -1 : 1; 430 431 switch(size) 432 { 433 case 0: 434 break; 435 case 1: 436 return sign * x->n[0]; 437 case 2: 438 return sign * (x->n[0]+((x->n[1])<<16)); 439 default: 440 fprintf(stderr,"Giant too large for gtoi\n"); 441 break; 442 } 443 return 0; 444} 445 446 447int 448gsign( 449 giant g 450) 451/* Returns the sign of g. */ 452{ 453 if (isZero(g)) 454 return(0); 455 if (g->sign >0) 456 return(1); 457 return(-1); 458} 459 460 461#if 0 462int gcompg(a,b) 463/* Returns -1,0,1 if a<b, a=b, a>b, respectively. */ 464 giant a,b; 465{ 466 int size = abs(a->sign); 467 468 if(isZero(a)) size = 0; 469 if (size == 0) { 470 if (isZero(b)) return(0); else return(-gsign(b)); 471 } 472 473 if (b->sign == 0) return(gsign(a)); 474 if (gsign(a)!=gsign(b)) return(gsign(a)); 475 if (size>abs(b->sign)) return(gsign(a)); 476 if (size<abs(b->sign)) return(-gsign(a)); 477 478 do { 479 --size; 480 if (a->n[size] > b->n[size]) return(gsign(a)); 481 if (a->n[size] < b->n[size]) return(-gsign(a)); 482 } while(size>0); 483 484 return(0); 485} 486#else 487 488int 489gcompg( 490 giant a, 491 giant b 492) 493/* Returns -1,0,1 if a<b, a=b, a>b, respectively. */ 494{ 495 int sa = a->sign, j, sb = b->sign, va, vb, sgn; 496 497 if(sa > sb) 498 return(1); 499 if(sa < sb) 500 return(-1); 501 if(sa < 0) 502 { 503 sa = -sa; /* Take absolute value of sa. */ 504 sgn = -1; 505 } 506 else 507 { 508 sgn = 1; 509 } 510 for(j = sa-1; j >= 0; j--) 511 { 512 va = a->n[j]; 513 vb = b->n[j]; 514 if (va > vb) 515 return(sgn); 516 if (va < vb) 517 return(-sgn); 518 } 519 return(0); 520} 521#endif 522 523 524void 525setmulmode( 526 int mode 527) 528{ 529 mulmode = mode; 530} 531 532 533/************************************************************** 534 * 535 * Private I/O Functions 536 * 537 **************************************************************/ 538 539 540int 541radixdiv( 542 int base, 543 int divisor, 544 giant thegiant 545) 546/* Divides giant of arbitrary base by divisor. 547 * Returns remainder. Used by idivg and gread. */ 548{ 549 int first = TR; 550 int finalsize = abs(thegiant->sign); 551 int j = finalsize-1; 552 unsigned short *digitpointer=&thegiant->n[j]; 553 unsigned int num,rem=0; 554 555 if (divisor == 0) 556 { 557 error = DIVIDEBYZERO; 558 exit(error); 559 } 560 561 while (j>=0) 562 { 563 num=rem*base + *digitpointer; 564 *digitpointer = (unsigned short)(num/divisor); 565 if (first) 566 { 567 if (*digitpointer == 0) 568 --finalsize; 569 else 570 first = FA; 571 } 572 rem = num % divisor; 573 --digitpointer; 574 --j; 575 } 576 577 if ((divisor<0) ^ (thegiant->sign<0)) 578 finalsize=-finalsize; 579 thegiant->sign=finalsize; 580 return(rem); 581} 582 583 584void 585columnwrite( 586 FILE *filepointer, 587 short *column, 588 char *format, 589 short arg, 590 int newlines 591) 592/* Used by gwriteln. */ 593{ 594 char outstring[10]; 595 short i; 596 597 sprintf(outstring,format,arg); 598 for (i=0; outstring[i]!=0; ++i) 599 { 600 if (newlines) 601 { 602 if (*column >= COLUMNWIDTH) 603 { 604 fputs("\\\n",filepointer); 605 *column = 0; 606 } 607 } 608 fputc(outstring[i],filepointer); 609 ++*column; 610 } 611} 612 613 614void 615gwrite( 616 giant thegiant, 617 FILE *filepointer, 618 int newlines 619) 620/* Outputs thegiant to filepointer. Output is terminated by a newline. */ 621{ 622 short column; 623 unsigned int i; 624 unsigned short *numpointer; 625 giant garbagegiant, basetengrand; 626 627 basetengrand = popg(); 628 garbagegiant = popg(); 629 630 if (isZero(thegiant)) 631 { 632 fputs("0",filepointer); 633 } 634 else 635 { 636 numpointer = basetengrand->n; 637 gtog(thegiant,garbagegiant); 638 639 basetengrand->sign = 0; 640 do 641 { 642 *numpointer = (unsigned short)idivg(10000,garbagegiant); 643 ++numpointer; 644 if (++basetengrand->sign >= current_max_size) 645 { 646 error = OVFLOW; 647 exit(error); 648 } 649 } while (!isZero(garbagegiant)); 650 651 if (!error) 652 { 653 i = basetengrand->sign-1; 654 column = 0; 655 if (thegiant->sign<0 && basetengrand->n[i]!=0) 656 columnwrite(filepointer,&column,"-",0, newlines); 657 columnwrite(filepointer,&column,"%d",basetengrand->n[i],newlines); 658 for( ; i>0; ) 659 { 660 --i; 661 columnwrite(filepointer,&column,"%04d",basetengrand->n[i],newlines); 662 } 663 } 664 } 665 pushg(2); 666} 667 668 669void 670gwriteln( 671 giant theg, 672 FILE *filepointer 673) 674{ 675 gwrite(theg, filepointer, 1); 676 fputc('\n',filepointer); 677} 678 679 680void 681gread( 682 giant theg, 683 FILE *filepointer 684) 685{ 686 char currentchar; 687 int isneg,size,backslash=FA,numdigits=0; 688 unsigned short *numpointer; 689 giant basetenthousand; 690 static char *inbuf = NULL; 691 692 basetenthousand = popg(); 693 if (inbuf == NULL) 694 inbuf = (char*)malloc(MAX_DIGITS); 695 696 currentchar = (char)fgetc(filepointer); 697 if (currentchar=='-') 698 { 699 isneg=TR; 700 } 701 else 702 { 703 isneg=FA; 704 if (currentchar!='+') 705 ungetc(currentchar,filepointer); 706 } 707 708 do 709 { 710 currentchar = (char)fgetc(filepointer); 711 if ((currentchar>='0') && (currentchar<='9')) 712 { 713 inbuf[numdigits]=currentchar; 714 if(++numdigits==MAX_DIGITS) 715 break; 716 backslash=FA; 717 } 718 else 719 { 720 if (currentchar=='\\') 721 backslash=TR; 722 } 723 } while(((currentchar!=' ') && (currentchar!='\n') && 724 (currentchar!='\t')) || (backslash) ); 725 if (numdigits) 726 { 727 size = 0; 728 do 729 { 730 inbuf[numdigits] = 0; 731 numdigits-=4; 732 if (numdigits<0) 733 numdigits=0; 734 basetenthousand->n[size] = (unsigned short)strtol(&inbuf[numdigits],NULL,10); 735 ++size; 736 } while (numdigits>0); 737 738 basetenthousand->sign = size; 739 theg->sign = 0; 740 numpointer = theg->n; 741 do 742 { 743 *numpointer = (unsigned short) 744 radixdiv(10000,1<<(8*sizeof(short)),basetenthousand); 745 ++numpointer; 746 if (++theg->sign >= current_max_size) 747 { 748 error = OVFLOW; 749 exit(error); 750 } 751 } while (!isZero(basetenthousand)); 752 753 if (isneg) 754 theg->sign = -theg->sign; 755 } 756 pushg(1); 757} 758 759 760 761/************************************************************** 762 * 763 * Private Math Functions 764 * 765 **************************************************************/ 766 767 768void 769negg( 770 giant g 771) 772/* g becomes -g. */ 773{ 774 g->sign = -g->sign; 775} 776 777 778void 779absg( 780 giant g 781) 782{ 783 /* g becomes the absolute value of g. */ 784 if (g->sign <0) 785 g->sign=-g->sign; 786} 787 788 789void 790iaddg( 791 int i, 792 giant g 793) 794/* Giant g becomes g + (int)i. */ 795{ 796 int w,j=0,carry = 0, size = abs(g->sign); 797 giant tmp; 798 799 if (isZero(g)) 800 { 801 itog(i,g); 802 } 803 else if(g->sign < 0) { 804 tmp = popg(); 805 itog(i, tmp); 806 addg(tmp, g); 807 pushg(1); 808 return; 809 } 810 else 811 { 812 w = g->n[0]+i; 813 do 814 { 815 g->n[j] = (unsigned short) (w & 65535L); 816 carry = w >> 16; 817 w = g->n[++j]+carry; 818 } while ((carry!=0) && (j<size)); 819 } 820 if (carry) 821 { 822 ++g->sign; 823 g->n[size] = (unsigned short)carry; 824 } 825} 826 827 828/* New subtract routines. 829 The basic subtract "subg()" uses the following logic table: 830 831 a b if(b > a) if(a > b) 832 833 + + b := b - a b := -(a - b) 834 - + b := b + (-a) N.A. 835 + - N.A. b := -((-b) + a) 836 - - b := (-a) - (-b) b := -((-b) - (-a)) 837 838 The basic addition routine "addg()" uses: 839 840 a b if(b > -a) if(-a > b) 841 842 + + b := b + a N.A. 843 - + b := b - (-a) b := -((-a) - b) 844 + - b := a - (-b) b := -((-b) - a) 845 - - N.A. b := -((-b) + (-a)) 846 847 In this way, internal routines "normal_addg," "normal_subg," 848 and "reverse_subg;" each of which assumes non-negative 849 operands and a non-negative result, are now used for greater 850 efficiency. 851 */ 852 853void 854normal_addg( 855 giant a, 856 giant b 857) 858/* b := a + b, both a,b assumed non-negative. */ 859{ 860 int carry = 0; 861 int asize = a->sign, bsize = b->sign; 862 long k; 863 int j=0; 864 unsigned short *aptr = a->n, *bptr = b->n; 865 866 if (asize < bsize) 867 { 868 for (j=0; j<asize; j++) 869 { 870 k = *aptr++ + *bptr + carry; 871 carry = 0; 872 if (k >= 65536L) 873 { 874 k -= 65536L; 875 ++carry; 876 } 877 *bptr++ = (unsigned short)k; 878 } 879 for (j=asize; j<bsize; j++) 880 { 881 k = *bptr + carry; 882 carry = 0; 883 if (k >= 65536L) 884 { 885 k -= 65536L; 886 ++carry; 887 } 888 *bptr++ = (unsigned short)k; 889 } 890 } 891 else 892 { 893 for (j=0; j<bsize; j++) 894 { 895 k = *aptr++ + *bptr + carry; 896 carry = 0; 897 if (k >= 65536L) 898 { 899 k -= 65536L; 900 ++carry; 901 } 902 *bptr++ = (unsigned short)k; 903 } 904 for (j=bsize; j<asize; j++) 905 { 906 k = *aptr++ + carry; 907 carry = 0; 908 if (k >= 65536L) 909 { 910 k -= 65536L; 911 ++carry; 912 } 913 *bptr++ = (unsigned short)k; 914 } 915 } 916 if (carry) 917 { 918 *bptr = 1; ++j; 919 } 920 b->sign = j; 921} 922 923 924void 925normal_subg( 926 giant a, 927 giant b 928) 929/* b := b - a; requires b, a non-negative and b >= a. */ 930{ 931 int j, size = b->sign; 932 unsigned int k; 933 934 if (a->sign == 0) 935 return; 936 937 k = 0; 938 for (j=0; j<a->sign; ++j) 939 { 940 k += 0xffff - a->n[j] + b->n[j]; 941 b->n[j] = (unsigned short)(k & 0xffff); 942 k >>= 16; 943 } 944 for (j=a->sign; j<size; ++j) 945 { 946 k += 0xffff + b->n[j]; 947 b->n[j] = (unsigned short)(k & 0xffff); 948 k >>= 16; 949 } 950 951 if (b->n[0] == 0xffff) 952 iaddg(1,b); 953 else 954 ++b->n[0]; 955 956 while ((size-- > 0) && (b->n[size]==0)); 957 958 b->sign = (b->n[size]==0) ? 0 : size+1; 959} 960 961 962void 963reverse_subg( 964 giant a, 965 giant b 966) 967/* b := a - b; requires b, a non-negative and a >= b. */ 968{ 969 int j, size = a->sign; 970 unsigned int k; 971 972 k = 0; 973 for (j=0; j<b->sign; ++j) 974 { 975 k += 0xffff - b->n[j] + a->n[j]; 976 b->n[j] = (unsigned short)(k & 0xffff); 977 k >>= 16; 978 } 979 for (j=b->sign; j<size; ++j) 980 { 981 k += 0xffff + a->n[j]; 982 b->n[j] = (unsigned short)(k & 0xffff); 983 k >>= 16; 984 } 985 986 b->sign = size; /* REC, 21 Apr 1996. */ 987 if (b->n[0] == 0xffff) 988 iaddg(1,b); 989 else 990 ++b->n[0]; 991 992 while (!b->n[--size]); 993 994 b->sign = size+1; 995} 996 997void 998addg( 999 giant a, 1000 giant b 1001) 1002/* b := b + a, any signs any result. */ 1003{ 1004 int asgn = a->sign, bsgn = b->sign; 1005 1006 if (asgn == 0) 1007 return; 1008 if (bsgn == 0) 1009 { 1010 gtog(a,b); 1011 return; 1012 } 1013 if ((asgn < 0) == (bsgn < 0)) 1014 { 1015 if (bsgn > 0) 1016 { 1017 normal_addg(a,b); 1018 return; 1019 } 1020 absg(b); 1021 if(a != b) absg(a); 1022 normal_addg(a,b); 1023 negg(b); 1024 if(a != b) negg(a); 1025 return; 1026 } 1027 if(bsgn > 0) 1028 { 1029 negg(a); 1030 if (gcompg(b,a) >= 0) 1031 { 1032 normal_subg(a,b); 1033 negg(a); 1034 return; 1035 } 1036 reverse_subg(a,b); 1037 negg(a); 1038 negg(b); 1039 return; 1040 } 1041 negg(b); 1042 if(gcompg(b,a) < 0) 1043 { 1044 reverse_subg(a,b); 1045 return; 1046 } 1047 normal_subg(a,b); 1048 negg(b); 1049 return; 1050} 1051 1052void 1053subg( 1054 giant a, 1055 giant b 1056) 1057/* b := b - a, any signs, any result. */ 1058{ 1059 int asgn = a->sign, bsgn = b->sign; 1060 1061 if (asgn == 0) 1062 return; 1063 if (bsgn == 0) 1064 { 1065 gtog(a,b); 1066 negg(b); 1067 return; 1068 } 1069 if ((asgn < 0) != (bsgn < 0)) 1070 { 1071 if (bsgn > 0) 1072 { 1073 negg(a); 1074 normal_addg(a,b); 1075 negg(a); 1076 return; 1077 } 1078 negg(b); 1079 normal_addg(a,b); 1080 negg(b); 1081 return; 1082 } 1083 if (bsgn > 0) 1084 { 1085 if (gcompg(b,a) >= 0) 1086 { 1087 normal_subg(a,b); 1088 return; 1089 } 1090 reverse_subg(a,b); 1091 negg(b); 1092 return; 1093 } 1094 negg(a); 1095 negg(b); 1096 if (gcompg(b,a) >= 0) 1097 { 1098 normal_subg(a,b); 1099 negg(a); 1100 negg(b); 1101 return; 1102 } 1103 reverse_subg(a,b); 1104 negg(a); 1105 return; 1106} 1107 1108 1109int 1110numtrailzeros( 1111 giant g 1112) 1113/* Returns the number of trailing zero bits in g. */ 1114{ 1115 register int numshorts = abs(g->sign), j, bcount=0; 1116 register unsigned short gshort, c; 1117 1118 for (j=0;j<numshorts;j++) 1119 { 1120 gshort = g->n[j]; 1121 c = 1; 1122 for (bcount=0;bcount<16; bcount++) 1123 { 1124 if (c & gshort) 1125 break; 1126 c <<= 1; 1127 } 1128 if (bcount<16) 1129 break; 1130 } 1131 return(bcount + 16*j); 1132} 1133 1134 1135void 1136bdivg( 1137 giant v, 1138 giant u 1139) 1140/* u becomes greatest power of two not exceeding u/v. */ 1141{ 1142 int diff = bitlen(u) - bitlen(v); 1143 giant scratch7; 1144 1145 if (diff<0) 1146 { 1147 itog(0,u); 1148 return; 1149 } 1150 scratch7 = popg(); 1151 gtog(v, scratch7); 1152 gshiftleft(diff,scratch7); 1153 if (gcompg(u,scratch7) < 0) 1154 diff--; 1155 if (diff<0) 1156 { 1157 itog(0,u); 1158 pushg(1); 1159 return; 1160 } 1161 itog(1,u); 1162 gshiftleft(diff,u); 1163 1164 pushg(1); 1165} 1166 1167 1168int 1169binvaux( 1170 giant p, 1171 giant x 1172) 1173/* Binary inverse method. Returns zero if no inverse exists, 1174 * in which case x becomes GCD(x,p). */ 1175{ 1176 1177 giant scratch7, u0, u1, v0, v1; 1178 1179 if (isone(x)) 1180 return(1); 1181 u0 = popg(); 1182 u1 = popg(); 1183 v0 = popg(); 1184 v1 = popg(); 1185 itog(1, v0); 1186 gtog(x, v1); 1187 itog(0,x); 1188 gtog(p, u1); 1189 1190 scratch7 = popg(); 1191 while(!isZero(v1)) 1192 { 1193 gtog(u1, u0); 1194 bdivg(v1, u0); 1195 gtog(x, scratch7); 1196 gtog(v0, x); 1197 mulg(u0, v0); 1198 subg(v0,scratch7); 1199 gtog(scratch7, v0); 1200 1201 gtog(u1, scratch7); 1202 gtog(v1, u1); 1203 mulg(u0, v1); 1204 subg(v1,scratch7); 1205 gtog(scratch7, v1); 1206 } 1207 1208 pushg(1); 1209 1210 if (!isone(u1)) 1211 { 1212 gtog(u1,x); 1213 if(x->sign<0) addg(p, x); 1214 pushg(4); 1215 return(0); 1216 } 1217 if(x->sign<0) 1218 addg(p, x); 1219 pushg(4); 1220 return(1); 1221} 1222 1223 1224int 1225binvg( 1226 giant p, 1227 giant x 1228) 1229{ 1230 modg(p, x); 1231 return(binvaux(p,x)); 1232} 1233 1234 1235int 1236invg( 1237 giant p, 1238 giant x 1239) 1240{ 1241 modg(p, x); 1242 return(invaux(p,x)); 1243} 1244 1245int 1246invaux( 1247 giant p, 1248 giant x 1249) 1250/* Returns zero if no inverse exists, in which case x becomes 1251 * GCD(x,p). */ 1252{ 1253 1254 giant scratch7, u0, u1, v0, v1; 1255 1256 if ((x->sign==1)&&(x->n[0]==1)) 1257 return(1); 1258 1259 u0 = popg(); 1260 u1 = popg(); 1261 v0 = popg(); 1262 v1 = popg(); 1263 1264 itog(1,u1); 1265 gtog(p, v0); 1266 gtog(x, v1); 1267 itog(0,x); 1268 1269 scratch7 = popg(); 1270 while (!isZero(v1)) 1271 { 1272 gtog(v0, u0); 1273 divg(v1, u0); 1274 gtog(u0, scratch7); 1275 mulg(v1, scratch7); 1276 subg(v0, scratch7); 1277 negg(scratch7); 1278 gtog(v1, v0); 1279 gtog(scratch7, v1); 1280 gtog(u1, scratch7); 1281 mulg(u0, scratch7); 1282 subg(x, scratch7); 1283 negg(scratch7); 1284 gtog(u1,x); 1285 gtog(scratch7, u1); 1286 } 1287 pushg(1); 1288 1289 if ((v0->sign!=1)||(v0->n[0]!=1)) 1290 { 1291 gtog(v0,x); 1292 pushg(4); 1293 return(0); 1294 } 1295 if(x->sign<0) 1296 addg(p, x); 1297 pushg(4); 1298 return(1); 1299} 1300 1301 1302int 1303mersenneinvg( 1304 int q, 1305 giant x 1306) 1307{ 1308 int k; 1309 giant u0, u1, v1; 1310 1311 u0 = popg(); 1312 u1 = popg(); 1313 v1 = popg(); 1314 1315 itog(1, u0); 1316 itog(0, u1); 1317 itog(1, v1); 1318 gshiftleft(q, v1); 1319 subg(u0, v1); 1320 mersennemod(q, x); 1321 while (1) 1322 { 1323 k = -1; 1324 if (isZero(x)) 1325 { 1326 gtog(v1, x); 1327 pushg(3); 1328 return(0); 1329 } 1330 while (bitval(x, ++k) == 0); 1331 1332 gshiftright(k, x); 1333 if (k) 1334 { 1335 gshiftleft(q-k, u0); 1336 mersennemod(q, u0); 1337 } 1338 if (isone(x)) 1339 break; 1340 addg(u1, u0); 1341 mersennemod(q, u0); 1342 negg(u1); 1343 addg(u0, u1); 1344 mersennemod(q, u1); 1345 if (!gcompg(v1,x)) { 1346 pushg(3); 1347 return(0); 1348 } 1349 addg(v1, x); 1350 negg(v1); 1351 addg(x, v1); 1352 mersennemod(q, v1); 1353 } 1354 gtog(u0, x); 1355 mersennemod(q,x); 1356 pushg(3); 1357 return(1); 1358} 1359 1360 1361void 1362cgcdg( 1363 giant a, 1364 giant v 1365) 1366/* Classical Euclid GCD. v becomes gcd(a, v). */ 1367{ 1368 giant u, r; 1369 1370 v->sign = abs(v->sign); 1371 if (isZero(a)) 1372 return; 1373 1374 u = popg(); 1375 r = popg(); 1376 gtog(a, u); 1377 u->sign = abs(u->sign); 1378 while (!isZero(v)) 1379 { 1380 gtog(u, r); 1381 modg(v, r); 1382 gtog(v, u); 1383 gtog(r, v); 1384 } 1385 gtog(u,v); 1386 pushg(2); 1387} 1388 1389 1390void 1391gcdg( 1392 giant x, 1393 giant y 1394) 1395{ 1396 if (bitlen(y)<= GCDLIMIT) 1397 bgcdg(x,y); 1398 else 1399 ggcd(x,y); 1400} 1401 1402void 1403bgcdg( 1404 giant a, 1405 giant b 1406) 1407/* Binary form of the gcd. b becomes the gcd of a,b. */ 1408{ 1409 int k = isZero(b), m = isZero(a); 1410 giant u, v, t; 1411 1412 if (k || m) 1413 { 1414 if (m) 1415 { 1416 if (k) 1417 itog(1,b); 1418 return; 1419 } 1420 if (k) 1421 { 1422 if (m) 1423 itog(1,b); 1424 else 1425 gtog(a,b); 1426 return; 1427 } 1428 } 1429 1430 u = popg(); 1431 v = popg(); 1432 t = popg(); 1433 1434 /* Now neither a nor b is zero. */ 1435 gtog(a, u); 1436 u->sign = abs(a->sign); 1437 gtog(b, v); 1438 v->sign = abs(b->sign); 1439 k = numtrailzeros(u); 1440 m = numtrailzeros(v); 1441 if (k>m) 1442 k = m; 1443 gshiftright(k,u); 1444 gshiftright(k,v); 1445 if (u->n[0] & 1) 1446 { 1447 gtog(v, t); 1448 negg(t); 1449 } 1450 else 1451 { 1452 gtog(u,t); 1453 } 1454 1455 while (!isZero(t)) 1456 { 1457 m = numtrailzeros(t); 1458 gshiftright(m, t); 1459 if (t->sign > 0) 1460 { 1461 gtog(t, u); 1462 subg(v,t); 1463 } 1464 else 1465 { 1466 gtog(t, v); 1467 negg(v); 1468 addg(u,t); 1469 } 1470 } 1471 gtog(u,b); 1472 gshiftleft(k, b); 1473 pushg(3); 1474} 1475 1476 1477void 1478powerg( 1479 int m, 1480 int n, 1481 giant g 1482) 1483/* g becomes m^n, NO mod performed. */ 1484{ 1485 giant scratch2 = popg(); 1486 1487 itog(1, g); 1488 itog(m, scratch2); 1489 while (n) 1490 { 1491 if (n & 1) 1492 mulg(scratch2, g); 1493 n >>= 1; 1494 if (n) 1495 squareg(scratch2); 1496 } 1497 pushg(1); 1498} 1499 1500#if 0 1501void 1502jtest( 1503 giant n 1504) 1505{ 1506 if (n->sign) 1507 { 1508 if (n->n[n->sign-1] == 0) 1509 { 1510 fprintf(stderr,"%d %d tilt",n->sign, (int)(n->n[n->sign-1])); 1511 exit(7); 1512 } 1513 } 1514} 1515#endif 1516 1517 1518void 1519make_recip( 1520 giant d, 1521 giant r 1522) 1523/* r becomes the steady-state reciprocal 1524 * 2^(2b)/d, where b = bit-length of d-1. */ 1525{ 1526 int b; 1527 giant tmp, tmp2; 1528 1529 if (isZero(d) || (d->sign < 0)) 1530 { 1531 exit(SIGN); 1532 } 1533 tmp = popg(); 1534 tmp2 = popg(); 1535 itog(1, r); 1536 subg(r, d); 1537 b = bitlen(d); 1538 addg(r, d); 1539 gshiftleft(b, r); 1540 gtog(r, tmp2); 1541 while (1) 1542 { 1543 gtog(r, tmp); 1544 squareg(tmp); 1545 gshiftright(b, tmp); 1546 mulg(d, tmp); 1547 gshiftright(b, tmp); 1548 addg(r, r); 1549 subg(tmp, r); 1550 if (gcompg(r, tmp2) <= 0) 1551 break; 1552 gtog(r, tmp2); 1553 } 1554 itog(1, tmp); 1555 gshiftleft(2*b, tmp); 1556 gtog(r, tmp2); 1557 mulg(d, tmp2); 1558 subg(tmp2, tmp); 1559 itog(1, tmp2); 1560 while (tmp->sign < 0) 1561 { 1562 subg(tmp2, r); 1563 addg(d, tmp); 1564 } 1565 pushg(2); 1566} 1567 1568void 1569divg_via_recip( 1570 giant d, 1571 giant r, 1572 giant n 1573) 1574/* n := n/d, where r is the precalculated 1575 * steady-state reciprocal of d. */ 1576{ 1577 int s = 2*(bitlen(r)-1), sign = gsign(n); 1578 giant tmp, tmp2; 1579 1580 if (isZero(d) || (d->sign < 0)) 1581 { 1582 exit(SIGN); 1583 } 1584 1585 tmp = popg(); 1586 tmp2 = popg(); 1587 1588 n->sign = abs(n->sign); 1589 itog(0, tmp2); 1590 while (1) 1591 { 1592 gtog(n, tmp); 1593 mulg(r, tmp); 1594 gshiftright(s, tmp); 1595 addg(tmp, tmp2); 1596 mulg(d, tmp); 1597 subg(tmp, n); 1598 if (gcompg(n,d) >= 0) 1599 { 1600 subg(d,n); 1601 iaddg(1, tmp2); 1602 } 1603 if (gcompg(n,d) < 0) 1604 break; 1605 } 1606 gtog(tmp2, n); 1607 n->sign *= sign; 1608 pushg(2); 1609} 1610 1611#if 1 1612void 1613modg_via_recip( 1614 giant d, 1615 giant r, 1616 giant n 1617) 1618/* This is the fastest mod of the present collection. 1619 * n := n % d, where r is the precalculated 1620 * steady-state reciprocal of d. */ 1621 1622{ 1623 int s = (bitlen(r)-1), sign = n->sign; 1624 giant tmp, tmp2; 1625 1626 if (isZero(d) || (d->sign < 0)) 1627 { 1628 exit(SIGN); 1629 } 1630 1631 tmp = popg(); 1632 tmp2 = popg(); 1633 1634 n->sign = abs(n->sign); 1635 while (1) 1636 { 1637 gtog(n, tmp); gshiftright(s-1, tmp); 1638 mulg(r, tmp); 1639 gshiftright(s+1, tmp); 1640 mulg(d, tmp); 1641 subg(tmp, n); 1642 if (gcompg(n,d) >= 0) 1643 subg(d,n); 1644 if (gcompg(n,d) < 0) 1645 break; 1646 } 1647 if (sign >= 0) 1648 goto done; 1649 if (isZero(n)) 1650 goto done; 1651 negg(n); 1652 addg(d,n); 1653done: 1654 pushg(2); 1655 return; 1656} 1657 1658#else 1659void 1660modg_via_recip( 1661 giant d, 1662 giant r, 1663 giant n 1664) 1665{ 1666 int s = 2*(bitlen(r)-1), sign = n->sign; 1667 giant tmp, tmp2; 1668 1669 if (isZero(d) || (d->sign < 0)) 1670 { 1671 exit(SIGN); 1672 } 1673 1674 tmp = popg(); 1675 tmp2 = popg() 1676 1677 n->sign = abs(n->sign); 1678 while (1) 1679 { 1680 gtog(n, tmp); 1681 mulg(r, tmp); 1682 gshiftright(s, tmp); 1683 mulg(d, tmp); 1684 subg(tmp, n); 1685 if (gcompg(n,d) >= 0) 1686 subg(d,n); 1687 if (gcompg(n,d) < 0) 1688 break; 1689 } 1690 if (sign >= 0) 1691 goto done; 1692 if (isZero(n)) 1693 goto done; 1694 negg(n); 1695 addg(d,n); 1696done: 1697 pushg(2); 1698 return; 1699} 1700#endif 1701 1702void 1703modg( 1704 giant d, 1705 giant n 1706) 1707/* n becomes n%d. n is arbitrary, but the denominator d must be positive! */ 1708{ 1709 if (cur_recip == NULL) { 1710 cur_recip = newgiant(current_max_size); 1711 cur_den = newgiant(current_max_size); 1712 gtog(d, cur_den); 1713 make_recip(d, cur_recip); 1714 } else if (gcompg(d, cur_den)) { 1715 gtog(d, cur_den); 1716 make_recip(d, cur_recip); 1717 } 1718 modg_via_recip(d, cur_recip, n); 1719} 1720 1721 1722#if 0 1723int 1724feemulmod ( 1725 giant a, 1726 giant b, 1727 int q, 1728 int k 1729) 1730/* a becomes (a*b) (mod 2^q-k) where q % 16 == 0 and k is "small" (0 < k < 65535). 1731 * Returns 0 if unsuccessful, otherwise 1. */ 1732{ 1733 giant carry, kk, scratch; 1734 int i, j; 1735 int asize = abs(a->sign), bsize = abs(b->sign); 1736 unsigned short *aptr,*bptr,*destptr; 1737 unsigned int words; 1738 int kpower, curk; 1739 1740 if ((q % 16) || (k <= 0) || (k >= 65535)) { 1741 return (0); 1742 } 1743 1744 carry = popg(); 1745 kk = popg(); 1746 scratch = popg(); 1747 1748 for (i=0; i<asize+bsize; i++) scratch->n[i]=0; 1749 1750 words = q >> 4; 1751 1752 bptr = b->n; 1753 for (i = 0; i < bsize; i++) { 1754 mult = *bptr++; 1755 if (mult) { 1756 kpower = i/words; 1757 1758 if (kpower >= 1) itog (kpower,kk); 1759 for (j = 1; j < kpower; k++) smulg(kpower,kk); 1760 1761 itog(0,carry); 1762 1763 aptr = a->n; 1764 for (j = 0; j < bsize; b++) { 1765 gtog(kk,scratch); 1766 smulg(*aptr++,scratch); 1767 smulg(mult,scratch); 1768 iaddg(*destptr,scratch); 1769 addg(carry,scratch); 1770 *destptr++ = scratch->n[0]; 1771 gshiftright(scratch,16); 1772 gtog(scratch,carry); 1773 if (destptr - scratch->n >= words) { 1774 smulg(k, carry); 1775 smulg(k, kk); 1776 destptr -= words; 1777 } 1778 } 1779 } 1780 } 1781 1782 int i,j,m; 1783 unsigned int prod,carry=0; 1784 int asize = abs(a->sign), bsize = abs(b->sign); 1785 unsigned short *aptr,*bptr,*destptr; 1786 unsigned short mult; 1787 int words, excess; 1788 int temp; 1789 giant scratch = popg(), scratch2 = popg(), scratch3 = popg(); 1790 short *carryptr = scratch->n; 1791 int kpower,kpowerlimit, curk; 1792 1793 if ((q % 16) || (k <= 0) || (k >= 65535)) { 1794 return (0); 1795 } 1796 1797 scratch 1798 1799 for (i=0; i<asize+bsize; i++) scratch->n[i]=0; 1800 1801 words = q >> 4; 1802 1803 bptr = b->n; 1804 for (i=0; i<bsize; ++i) 1805 { 1806 mult = *bptr++; 1807 if (mult) 1808 { 1809 kpower = i/words; 1810 aptr = a->n; 1811 destptr = scratch->n + i; 1812 1813 if (kpower == 0) { 1814 carry = 0; 1815 } else if (kpower <= kpowerlimit) { 1816 carry = 0; 1817 curk = k; 1818 for (j = 1; j < kpower; j++) curk *= k; 1819 } else { 1820 itog (k,scratch); 1821 for (j = 1; j < kpower; j++) smulg(k,scratch); 1822 itog(0,scratch2); 1823 } 1824 1825 for (j = 0; j < asize; j++) { 1826 if(kpower == 0) { 1827 prod = *aptr++ * mult + *destptr + carry; 1828 *destptr++ = (unsigned short)(prod & 0xFFFF); 1829 carry = prod >> 16; 1830 } else if (kpower < kpowerlimit) { 1831 prod = kcur * *aptr++; 1832 temp = prod >> 16; 1833 prod &= 0xFFFF; 1834 temp *= mult; 1835 prod *= mult; 1836 temp += prod >> 16; 1837 prod &= 0xFFFF; 1838 prod += *destptr + carry; 1839 carry = prod >> 16 + temp; 1840 *destptr++ = (unsigned short)(prod & 0xFFFF); 1841 } else { 1842 gtog(scratch,scratch3); 1843 smulg(*aptr++,scratch3); 1844 smulg(mult,scratch3); 1845 iaddg(*destptr,scratch3); 1846 addg(scratch3,scratch2); 1847 *destptr++ = scratch2->n[0]; 1848 memmove(scratch2->n,scratch2->n+1,2*(scratch2->size-1)); 1849 scratch2->sign--; 1850 } 1851 if (destptr - scratch->n > words) { 1852 if (kpower == 0) { 1853 curk = k; 1854 carry *= k; 1855 } else if (kpower < kpowerlimit) { 1856 curk *= k; 1857 carry *= curk; 1858 } else if (kpower == kpowerlimit) { 1859 itog (k,scratch); 1860 for (j = 1; j < kpower; j++) smulg(k,scratch); 1861 itog(carry,scratch2); 1862 smulg(k,scratch2); 1863 } else { 1864 smulg(k,scratch); 1865 smulg(k,scratch2); 1866 } 1867 kpower++; 1868 destptr -= words; 1869 } 1870 } 1871 1872 /* Next, deal with the carry term. Needs to be improved to 1873 handle overflow carry cases. */ 1874 if (kpower <= kpowerlimit) { 1875 iaddg(carry,scratch); 1876 } else { 1877 addg(scratch2,scratch); 1878 } 1879 while(scratch->sign > q) 1880 gtog(scratch,scratch2) 1881 } 1882 } 1883 scratch->sign = destptr - scratch->n; 1884 if (!carry) 1885 --(scratch->sign); 1886 scratch->sign *= gsign(a)*gsign(b); 1887 gtog(scratch,a); 1888 pushg(3); 1889 return (1); 1890} 1891#endif 1892 1893int 1894idivg( 1895 int divisor, 1896 giant theg 1897) 1898{ 1899 /* theg becomes theg/divisor. Returns remainder. */ 1900 int n; 1901 int base = 1<<(8*sizeof(short)); 1902 1903 n = radixdiv(base,divisor,theg); 1904 return(n); 1905} 1906 1907 1908void 1909divg( 1910 giant d, 1911 giant n 1912) 1913/* n becomes n/d. n is arbitrary, but the denominator d must be positive! */ 1914{ 1915 if (cur_recip == NULL) { 1916 cur_recip = newgiant(current_max_size); 1917 cur_den = newgiant(current_max_size); 1918 gtog(d, cur_den); 1919 make_recip(d, cur_recip); 1920 } else if (gcompg(d, cur_den)) { 1921 gtog(d, cur_den); 1922 make_recip(d, cur_recip); 1923 } 1924 divg_via_recip(d, cur_recip, n); 1925} 1926 1927 1928void 1929powermod( 1930 giant x, 1931 int n, 1932 giant g 1933) 1934/* x becomes x^n (mod g). */ 1935{ 1936 giant scratch2 = popg(); 1937 gtog(x, scratch2); 1938 itog(1, x); 1939 while (n) 1940 { 1941 if (n & 1) 1942 { 1943 mulg(scratch2, x); 1944 modg(g, x); 1945 } 1946 n >>= 1; 1947 if (n) 1948 { 1949 squareg(scratch2); 1950 modg(g, scratch2); 1951 } 1952 } 1953 pushg(1); 1954} 1955 1956 1957void 1958powermodg( 1959 giant x, 1960 giant n, 1961 giant g 1962) 1963/* x becomes x^n (mod g). */ 1964{ 1965 int len, pos; 1966 giant scratch2 = popg(); 1967 1968 gtog(x, scratch2); 1969 itog(1, x); 1970 len = bitlen(n); 1971 pos = 0; 1972 while (1) 1973 { 1974 if (bitval(n, pos++)) 1975 { 1976 mulg(scratch2, x); 1977 modg(g, x); 1978 } 1979 if (pos>=len) 1980 break; 1981 squareg(scratch2); 1982 modg(g, scratch2); 1983 } 1984 pushg(1); 1985} 1986 1987 1988void 1989fermatpowermod( 1990 giant x, 1991 int n, 1992 int q 1993) 1994/* x becomes x^n (mod 2^q+1). */ 1995{ 1996 giant scratch2 = popg(); 1997 1998 gtog(x, scratch2); 1999 itog(1, x); 2000 while (n) 2001 { 2002 if (n & 1) 2003 { 2004 mulg(scratch2, x); 2005 fermatmod(q, x); 2006 } 2007 n >>= 1; 2008 if (n) 2009 { 2010 squareg(scratch2); 2011 fermatmod(q, scratch2); 2012 } 2013 } 2014 pushg(1); 2015} 2016 2017 2018void 2019fermatpowermodg( 2020 giant x, 2021 giant n, 2022 int q 2023) 2024/* x becomes x^n (mod 2^q+1). */ 2025{ 2026 int len, pos; 2027 giant scratch2 = popg(); 2028 2029 gtog(x, scratch2); 2030 itog(1, x); 2031 len = bitlen(n); 2032 pos = 0; 2033 while (1) 2034 { 2035 if (bitval(n, pos++)) 2036 { 2037 mulg(scratch2, x); 2038 fermatmod(q, x); 2039 } 2040 if (pos>=len) 2041 break; 2042 squareg(scratch2); 2043 fermatmod(q, scratch2); 2044 } 2045 pushg(1); 2046} 2047 2048 2049void 2050mersennepowermod( 2051 giant x, 2052 int n, 2053 int q 2054) 2055/* x becomes x^n (mod 2^q-1). */ 2056{ 2057 giant scratch2 = popg(); 2058 2059 gtog(x, scratch2); 2060 itog(1, x); 2061 while (n) 2062 { 2063 if (n & 1) 2064 { 2065 mulg(scratch2, x); 2066 mersennemod(q, x); 2067 } 2068 n >>= 1; 2069 if (n) 2070 { 2071 squareg(scratch2); 2072 mersennemod(q, scratch2); 2073 } 2074 } 2075 pushg(1); 2076} 2077 2078 2079void 2080mersennepowermodg( 2081 giant x, 2082 giant n, 2083 int q 2084) 2085/* x becomes x^n (mod 2^q-1). */ 2086{ 2087 int len, pos; 2088 giant scratch2 = popg(); 2089 2090 gtog(x, scratch2); 2091 itog(1, x); 2092 len = bitlen(n); 2093 pos = 0; 2094 while (1) 2095 { 2096 if (bitval(n, pos++)) 2097 { 2098 mulg(scratch2, x); 2099 mersennemod(q, x); 2100 } 2101 if (pos>=len) 2102 break; 2103 squareg(scratch2); 2104 mersennemod(q, scratch2); 2105 } 2106 pushg(1); 2107} 2108 2109 2110void 2111gshiftleft( 2112 int bits, 2113 giant g 2114) 2115/* shift g left bits bits. Equivalent to g = g*2^bits. */ 2116{ 2117 int rem = bits&15, crem = 16-rem, words = bits>>4; 2118 int size = abs(g->sign), j, k, sign = gsign(g); 2119 unsigned short carry, dat; 2120 2121 if (!bits) 2122 return; 2123 if (!size) 2124 return; 2125 if (bits < 0) { 2126 gshiftright(-bits,g); 2127 return; 2128 } 2129 if (size+words+1 > current_max_size) { 2130 error = OVFLOW; 2131 exit(error); 2132 } 2133 if (rem == 0) { 2134 memmove(g->n + words, g->n, size * sizeof(short)); 2135 for (j = 0; j < words; j++) g->n[j] = 0; 2136 g->sign += (g->sign < 0)?(-words):(words); 2137 } else { 2138 k = size+words; 2139 carry = 0; 2140 for (j=size-1; j>=0; j--) { 2141 dat = g->n[j]; 2142 g->n[k--] = (unsigned short)((dat >> crem) | carry); 2143 carry = (unsigned short)(dat << rem); 2144 } 2145 do { 2146 g->n[k--] = carry; 2147 carry = 0; 2148 } while(k>=0); 2149 2150 k = size+words; 2151 if (g->n[k] == 0) 2152 --k; 2153 g->sign = sign*(k+1); 2154 } 2155} 2156 2157 2158void 2159gshiftright( 2160 int bits, 2161 giant g 2162) 2163/* shift g right bits bits. Equivalent to g = g/2^bits. */ 2164{ 2165 register int j,size=abs(g->sign); 2166 register unsigned int carry; 2167 int words = bits >> 4; 2168 int remain = bits & 15, cremain = (16-remain); 2169 2170 if (bits==0) 2171 return; 2172 if (isZero(g)) 2173 return; 2174 if (bits < 0) { 2175 gshiftleft(-bits,g); 2176 return; 2177 } 2178 if (words >= size) { 2179 g->sign = 0; 2180 return; 2181 } 2182 if (remain == 0) { 2183 memmove(g->n,g->n + words,(size - words) * sizeof(short)); 2184 g->sign += (g->sign < 0)?(words):(-words); 2185 } else { 2186 size -= words; 2187 2188 if (size) 2189 { 2190 for(j=0;j<size-1;++j) 2191 { 2192 carry = g->n[j+words+1] << cremain; 2193 g->n[j] = (unsigned short)((g->n[j+words] >> remain ) | carry); 2194 } 2195 g->n[size-1] = (unsigned short)(g->n[size-1+words] >> remain); 2196 } 2197 2198 if (g->n[size-1] == 0) 2199 --size; 2200 2201 if (g->sign > 0) 2202 g->sign = size; 2203 else 2204 g->sign = -size; 2205 } 2206} 2207 2208 2209void 2210extractbits( 2211 int n, 2212 giant src, 2213 giant dest 2214) 2215/* dest becomes lowermost n bits of src. Equivalent to dest = src % 2^n. */ 2216{ 2217 register int words = n >> 4, numbytes = words*sizeof(short); 2218 register int bits = n & 15; 2219 2220 if (n<=0) 2221 return; 2222 if (words >= abs(src->sign)) 2223 gtog(src,dest); 2224 else 2225 { 2226 memcpy((char *)(dest->n), (char *)(src->n), numbytes); 2227 if (bits) 2228 { 2229 dest->n[words] = (unsigned short)(src->n[words] & ((1<<bits)-1)); 2230 ++words; 2231 } 2232 while ((dest->n[words-1] == 0) && (words > 0)) 2233 { 2234 --words; 2235 } 2236 if (src->sign<0) 2237 dest->sign = -words; 2238 else 2239 dest->sign = words; 2240 } 2241} 2242 2243 2244int 2245allzeros( 2246 int shorts, 2247 int bits, 2248 giant g 2249) 2250{ 2251 int i=shorts; 2252 2253 while (i>0) 2254 { 2255 if (g->n[--i]) 2256 return(0); 2257 } 2258 return((int)(!(g->n[shorts] & ((1<<bits)-1)))); 2259} 2260 2261 2262void 2263fermatnegate( 2264 int n, 2265 giant g 2266) 2267/* negate g. g is mod 2^n+1. */ 2268{ 2269 register int shorts = n>>4, 2270 bits = n & 15, 2271 i = shorts, 2272 mask = 1<<bits; 2273 register unsigned carry,temp; 2274 2275 for (temp=(unsigned)shorts; (int)temp>g->sign-1; --temp) 2276 { 2277 g->n[temp] = 0; 2278 } 2279 if (g->n[shorts] & mask) 2280 { /* if high bit is set, -g = 1. */ 2281 g->sign = 1; 2282 g->n[0] = 1; 2283 return; 2284 } 2285 if (allzeros(shorts,bits,g)) 2286 return; /* if g=0, -g = 0. */ 2287 2288 while (i>0) 2289 { --i; 2290 g->n[i] = (unsigned short)(~(g->n[i+1])); 2291 } 2292 g->n[shorts] ^= mask-1; 2293 2294 carry = 2; 2295 i = 0; 2296 while (carry) 2297 { 2298 temp = g->n[i]+carry; 2299 g->n[i++] = (unsigned short)(temp & 0xffff); 2300 carry = temp>>16; 2301 } 2302 while(!g->n[shorts]) 2303 { 2304 --shorts; 2305 } 2306 g->sign = shorts+1; 2307} 2308 2309 2310void 2311mersennemod ( 2312 int n, 2313 giant g 2314) 2315/* g := g (mod 2^n - 1) */ 2316{ 2317 int the_sign, s; 2318 giant scratch3 = popg(), scratch4 = popg(); 2319 2320 if ((the_sign = gsign(g)) < 0) absg(g); 2321 while (bitlen(g) > n) { 2322 gtog(g,scratch3); 2323 gshiftright(n,scratch3); 2324 addg(scratch3,g); 2325 gshiftleft(n,scratch3); 2326 subg(scratch3,g); 2327 } 2328 if(!isZero(g)) { 2329 if ((s = gsign(g)) < 0) absg(g); 2330 itog(1,scratch3); 2331 gshiftleft(n,scratch3); 2332 itog(1,scratch4); 2333 subg(scratch4,scratch3); 2334 if(gcompg(g,scratch3) >= 0) subg(scratch3,g); 2335 if (s < 0) { 2336 g->sign = -g->sign; 2337 addg(scratch3,g); 2338 } 2339 if (the_sign < 0) { 2340 g->sign = -g->sign; 2341 addg(scratch3,g); 2342 } 2343 } 2344 pushg(2); 2345} 2346 2347void 2348fermatmod ( 2349 int n, 2350 giant g 2351) 2352/* g := g (mod 2^n + 1), */ 2353{ 2354 int the_sign, s; 2355 giant scratch3 = popg(); 2356 2357 if ((the_sign = gsign(g)) < 0) absg(g); 2358 while (bitlen(g) > n) { 2359 gtog(g,scratch3); 2360 gshiftright(n,scratch3); 2361 subg(scratch3,g); 2362 gshiftleft(n,scratch3); 2363 subg(scratch3,g); 2364 } 2365 if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave; 2366 if(!isZero(g)) { 2367 if ((s = gsign(g)) < 0) absg(g); 2368 itog(1,scratch3); 2369 gshiftleft(n,scratch3); 2370 iaddg(1,scratch3); 2371 if(gcompg(g,scratch3) >= 0) subg(scratch3,g); 2372 if (s * the_sign < 0) { 2373 g->sign = -g->sign; 2374 addg(scratch3,g); 2375 } 2376 } 2377leave: 2378 pushg(1); 2379 2380} 2381 2382void 2383fer_mod ( 2384 int n, 2385 giant g, 2386 giant modulus 2387) 2388/* Same as fermatmod(), except modulus = 2^n should be passed 2389if available (i.e. if already allocated and set). */ 2390{ 2391 int the_sign, s; 2392 giant scratch3 = popg(); 2393 2394 if ((the_sign = gsign(g)) < 0) absg(g); 2395 while (bitlen(g) > n) { 2396 gtog(g,scratch3); 2397 gshiftright(n,scratch3); 2398 subg(scratch3,g); 2399 gshiftleft(n,scratch3); 2400 subg(scratch3,g); 2401 } 2402 if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave; 2403 if(!isZero(g)) { 2404 if ((s = gsign(g)) < 0) absg(g); 2405 if(gcompg(g,modulus) >= 0) subg(modulus,g); 2406 if (s * the_sign < 0) { 2407 g->sign = -g->sign; 2408 addg(modulus,g); 2409 } 2410 } 2411leave: 2412 pushg(1); 2413} 2414 2415 2416void 2417smulg( 2418 unsigned short i, 2419 giant g 2420) 2421/* g becomes g * i. */ 2422{ 2423 unsigned short carry = 0; 2424 int size = abs(g->sign); 2425 register int j,k,mul = abs(i); 2426 unsigned short *digit = g->n; 2427 2428 for (j=0; j<size; ++j) 2429 { 2430 k = *digit * mul + carry; 2431 carry = (unsigned short)(k>>16); 2432 *digit = (unsigned short)(k & 0xffff); 2433 ++digit; 2434 } 2435 if (carry) 2436 { 2437 if (++j >= current_max_size) 2438 { 2439 error = OVFLOW; 2440 exit(error); 2441 } 2442 *digit = carry; 2443 } 2444 2445 if ((g->sign>0) ^ (i>0)) 2446 g->sign = -j; 2447 else 2448 g->sign = j; 2449} 2450 2451 2452void 2453squareg( 2454 giant b 2455) 2456/* b becomes b^2. */ 2457{ 2458 auxmulg(b,b); 2459} 2460 2461 2462void 2463mulg( 2464 giant a, 2465 giant b 2466) 2467/* b becomes a*b. */ 2468{ 2469 auxmulg(a,b); 2470} 2471 2472 2473void 2474auxmulg( 2475 giant a, 2476 giant b 2477) 2478/* Optimized general multiply, b becomes a*b. Modes are: 2479 * AUTO_MUL: switch according to empirical speed criteria. 2480 * GRAMMAR_MUL: force grammar-school algorithm. 2481 * KARAT_MUL: force Karatsuba divide-conquer method. 2482 * FFT_MUL: force floating point FFT method. */ 2483{ 2484 float grammartime; 2485 int square = (a==b); 2486 int sizea, sizeb; 2487 2488 switch (mulmode) 2489 { 2490 case GRAMMAR_MUL: 2491 if (square) grammarsquareg(b); 2492 else grammarmulg(a,b); 2493 break; 2494 case FFT_MUL: 2495 if (square) 2496 FFTsquareg(b); 2497 else 2498 FFTmulg(a,b); 2499 break; 2500 case KARAT_MUL: 2501 if (square) karatsquareg(b); 2502 else karatmulg(a,b); 2503 break; 2504 case AUTO_MUL: 2505 sizea = abs(a->sign); 2506 sizeb = abs(b->sign); 2507 if((sizea > KARAT_BREAK) && (sizea <= FFT_BREAK) && 2508 (sizeb > KARAT_BREAK) && (sizeb <= FFT_BREAK)){ 2509 if(square) karatsquareg(b); 2510 else karatmulg(a,b); 2511 2512 } else { 2513 grammartime = (float)sizea; 2514 grammartime *= (float)sizeb; 2515 if (grammartime < FFT_BREAK * FFT_BREAK) 2516 { 2517 if (square) grammarsquareg(b); 2518 else grammarmulg(a,b); 2519 } 2520 else 2521 { 2522 if (square) FFTsquareg(b); 2523 else FFTmulg(a,b); 2524 } 2525 } 2526 break; 2527 } 2528} 2529 2530void 2531justg(giant x) { 2532 int s = x->sign, sg = 1; 2533 2534 if(s<0) { 2535 sg = -1; 2536 s = -s; 2537 } 2538 --s; 2539 while(x->n[s] == 0) { 2540 --s; 2541 if(s < 0) break; 2542 } 2543 x->sign = sg*(s+1); 2544} 2545 2546/* Next, improved Karatsuba routines from A. Powell, 2547 improvements by G. Woltman. */ 2548 2549void 2550karatmulg(giant x, giant y) 2551/* y becomes x*y. */ 2552{ 2553 int s = abs(x->sign), t = abs(y->sign), w, bits, 2554 sg = gsign(x)*gsign(y); 2555 giant a, b, c, d, e, f; 2556 2557 if((s <= KARAT_BREAK) || (t <= KARAT_BREAK)) { 2558 grammarmulg(x,y); 2559 return; 2560 } 2561 w = (s + t + 2)/4; bits = 16*w; 2562 a = popg(); b = popg(); c = popg(); 2563 d = popg(); e = popg(); f = popg(); 2564 gtog(x,a); absg(a); if (w <= s) {a->sign = w; justg(a);} 2565 gtog(x,b); absg(b); 2566 gshiftright(bits, b); 2567 gtog(y,c); absg(c); if (w <= t) {c->sign = w; justg(c);} 2568 gtog(y,d); absg(d); 2569 gshiftright(bits,d); 2570 gtog(a,e); normal_addg(b,e); /* e := (a + b) */ 2571 gtog(c,f); normal_addg(d,f); /* f := (c + d) */ 2572 karatmulg(e,f); /* f := (a + b)(c + d) */ 2573 karatmulg(c,a); /* a := a c */ 2574 karatmulg(d,b); /* b := b d */ 2575 normal_subg(a,f); 2576 /* f := (a + b)(c + d) - a c */ 2577 normal_subg(b,f); 2578 /* f := (a + b)(c + d) - a c - b d */ 2579 gshiftleft(bits, b); 2580 normal_addg(f, b); 2581 gshiftleft(bits, b); 2582 normal_addg(a, b); 2583 gtog(b, y); y->sign *= sg; 2584 pushg(6); 2585 2586 return; 2587} 2588 2589void 2590karatsquareg(giant x) 2591/* x becomes x^2. */ 2592{ 2593 int s = abs(x->sign), w, bits; 2594 giant a, b, c; 2595 2596 if(s <= KARAT_BREAK) { 2597 grammarsquareg(x); 2598 return; 2599 } 2600 w = (s+1)/2; bits = 16*w; 2601 a = popg(); b = popg(); c = popg(); 2602 gtog(x, a); a->sign = w; justg(a); 2603 gtog(x, b); absg(b); 2604 gshiftright(bits, b); 2605 gtog(a,c); normal_addg(b,c); 2606 karatsquareg(c); 2607 karatsquareg(a); 2608 karatsquareg(b); 2609 normal_subg(b, c); 2610 normal_subg(a, c); 2611 gshiftleft(bits, b); 2612 normal_addg(c,b); 2613 gshiftleft(bits, b); 2614 normal_addg(a, b); 2615 gtog(b, x); 2616 pushg(3); 2617 2618 return; 2619} 2620 2621void 2622grammarmulg( 2623 giant a, 2624 giant b 2625) 2626/* b becomes a*b. */ 2627{ 2628 int i,j; 2629 unsigned int prod,carry=0; 2630 int asize = abs(a->sign), bsize = abs(b->sign); 2631 unsigned short *aptr,*bptr,*destptr; 2632 unsigned short mult; 2633 giant scratch = popg(); 2634 2635 for (i=0; i<asize+bsize; ++i) 2636 { 2637 scratch->n[i]=0; 2638 } 2639 2640 bptr = &(b->n[0]); 2641 for (i=0; i<bsize; ++i) 2642 { 2643 mult = *(bptr++); 2644 if (mult) 2645 { 2646 carry = 0; 2647 aptr = &(a->n[0]); 2648 destptr = &(scratch->n[i]); 2649 for (j=0; j<asize; ++j) 2650 { 2651 prod = *(aptr++) * mult + *destptr + carry; 2652 *(destptr++) = (unsigned short)(prod & 0xffff); 2653 carry = prod >> 16; 2654 } 2655 *destptr = (unsigned short)carry; 2656 } 2657 } 2658 bsize+=asize; 2659 if (!carry) 2660 --bsize; 2661 scratch->sign = gsign(a)*gsign(b)*bsize; 2662 gtog(scratch,b); 2663 pushg(1); 2664} 2665 2666 2667void 2668grammarsquareg ( 2669 giant a 2670) 2671/* a := a^2. */ 2672{ 2673 unsigned int cur_term; 2674 unsigned int prod, carry=0, temp; 2675 int asize = abs(a->sign), max = asize * 2 - 1; 2676 unsigned short *ptr = a->n, *ptr1, *ptr2; 2677 giant scratch; 2678 2679 if(asize == 0) { 2680 itog(0,a); 2681 return; 2682 } 2683 2684 scratch = popg(); 2685 2686 asize--; 2687 2688 temp = *ptr; 2689 temp *= temp; 2690 scratch->n[0] = temp; 2691 carry = temp >> 16; 2692 2693 for (cur_term = 1; cur_term < max; cur_term++) { 2694 ptr1 = ptr2 = ptr; 2695 if (cur_term <= asize) { 2696 ptr2 += cur_term; 2697 } else { 2698 ptr1 += cur_term - asize; 2699 ptr2 += asize; 2700 } 2701 prod = carry & 0xFFFF; 2702 carry >>= 16; 2703 while(ptr1 < ptr2) { 2704 temp = *ptr1++ * *ptr2--; 2705 prod += (temp << 1) & 0xFFFF; 2706 carry += (temp >> 15); 2707 } 2708 if (ptr1 == ptr2) { 2709 temp = *ptr1; 2710 temp *= temp; 2711 prod += temp & 0xFFFF; 2712 carry += (temp >> 16); 2713 } 2714 carry += prod >> 16; 2715 scratch->n[cur_term] = (unsigned short) (prod); 2716 } 2717 if (carry) { 2718 scratch->n[cur_term] = carry; 2719 scratch->sign = cur_term+1; 2720 } else scratch->sign = cur_term; 2721 2722 gtog(scratch,a); 2723 pushg(1); 2724} 2725 2726 2727/************************************************************** 2728 * 2729 * FFT multiply Functions 2730 * 2731 **************************************************************/ 2732 2733int initL = 0; 2734 2735int 2736lpt( 2737 int n, 2738 int *lambda 2739) 2740/* Returns least power of two greater than n. */ 2741{ 2742 register int i = 1; 2743 2744 *lambda = 0; 2745 while (i<n) 2746 { 2747 i<<=1; 2748 ++(*lambda); 2749 } 2750 return(i); 2751} 2752 2753 2754void 2755addsignal( 2756 giant x, 2757 double *z, 2758 int n 2759) 2760{ 2761 register int j, k, m, car, last; 2762 register double f, g,err; 2763 2764 maxFFTerror = 0; 2765 last = 0; 2766 for (j=0;j<n;j++) 2767 { 2768 f = gfloor(z[j]+0.5); 2769 if(f != 0.0) last = j; 2770 if (checkFFTerror) 2771 { 2772 err = fabs(f - z[j]); 2773 if (err > maxFFTerror) 2774 maxFFTerror = err; 2775 } 2776 z[j] =0; 2777 k = 0; 2778 do 2779 { 2780 g = gfloor(f*TWOM16); 2781 z[j+k] += f-g*TWO16; 2782 ++k; 2783 f=g; 2784 } while(f != 0.0); 2785 } 2786 car = 0; 2787 for(j=0;j < last + 1;j++) 2788 { 2789 m = (int)(z[j]+car); 2790 x->n[j] = (unsigned short)(m & 0xffff); 2791 car = (m>>16); 2792 } 2793 if (car) 2794 x->n[j] = (unsigned short)car; 2795 else 2796 --j; 2797 2798 while(!(x->n[j])) --j; 2799 2800 x->sign = j+1; 2801} 2802 2803 2804void 2805FFTsquareg( 2806 giant x 2807) 2808{ 2809 int j,size = abs(x->sign); 2810 register int L; 2811 2812 if (size<4) 2813 { 2814 grammarmulg(x,x); 2815 return; 2816 } 2817 L = lpt(size+size, &j); 2818 if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double)); 2819 giant_to_double(x, size, z, L); 2820 fft_real_to_hermitian(z, L); 2821 square_hermitian(z, L); 2822 fftinv_hermitian_to_real(z, L); 2823 addsignal(x,z,L); 2824 x->sign = abs(x->sign); 2825} 2826 2827 2828void 2829FFTmulg( 2830 giant y, 2831 giant x 2832) 2833{ 2834 /* x becomes y*x. */ 2835 int lambda, sizex = abs(x->sign), sizey = abs(y->sign); 2836 int finalsign = gsign(x)*gsign(y); 2837 register int L; 2838 2839 if ((sizex<=4)||(sizey<=4)) 2840 { 2841 grammarmulg(y,x); 2842 return; 2843 } 2844 L = lpt(sizex+sizey, &lambda); 2845 if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double)); 2846 if(!z2) z2 = (double *)malloc(MAX_SHORTS * sizeof(double)); 2847 2848 giant_to_double(x, sizex, z, L); 2849 giant_to_double(y, sizey, z2, L); 2850 fft_real_to_hermitian(z, L); 2851 fft_real_to_hermitian(z2, L); 2852 mul_hermitian(z2, z, L); 2853 fftinv_hermitian_to_real(z, L); 2854 addsignal(x,z,L); 2855 x->sign = finalsign*abs(x->sign); 2856} 2857 2858 2859void 2860scramble_real( 2861 double *x, 2862 int n 2863) 2864{ 2865 register int i,j,k; 2866 register double tmp; 2867 2868 for (i=0,j=0;i<n-1;i++) 2869 { 2870 if (i<j) 2871 { 2872 tmp = x[j]; 2873 x[j]=x[i]; 2874 x[i]=tmp; 2875 } 2876 k = n/2; 2877 while (k<=j) 2878 { 2879 j -= k; 2880 k>>=1; 2881 } 2882 j += k; 2883 } 2884} 2885 2886 2887void 2888fft_real_to_hermitian( 2889 double *z, 2890 int n 2891) 2892/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 2893 * This is a decimation-in-time, split-radix algorithm. 2894 */ 2895{ 2896 register double cc1, ss1, cc3, ss3; 2897 register int is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8, 2898 a, a3, b, b3, nminus = n-1, dil, expand; 2899 register double *x, e; 2900 int nn = n>>1; 2901 double t1, t2, t3, t4, t5, t6; 2902 register int n2, n4, n8, i, j; 2903 2904 init_sinCos(n); 2905 expand = cur_run/n; 2906 scramble_real(z, n); 2907 x = z-1; /* FORTRAN compatibility. */ 2908 is = 1; 2909 id = 4; 2910 do 2911 { 2912 for (i0=is;i0<=n;i0+=id) 2913 { 2914 i1 = i0+1; 2915 e = x[i0]; 2916 x[i0] = e + x[i1]; 2917 x[i1] = e - x[i1]; 2918 } 2919 is = (id<<1)-1; 2920 id <<= 2; 2921 } while(is<n); 2922 2923 n2 = 2; 2924 while(nn>>=1) 2925 { 2926 n2 <<= 1; 2927 n4 = n2>>2; 2928 n8 = n2>>3; 2929 is = 0; 2930 id = n2<<1; 2931 do 2932 { 2933 for (i=is;i<n;i+=id) 2934 { 2935 i1 = i+1; 2936 i2 = i1 + n4; 2937 i3 = i2 + n4; 2938 i4 = i3 + n4; 2939 t1 = x[i4]+x[i3]; 2940 x[i4] -= x[i3]; 2941 x[i3] = x[i1] - t1; 2942 x[i1] += t1; 2943 if (n4==1) 2944 continue; 2945 i1 += n8; 2946 i2 += n8; 2947 i3 += n8; 2948 i4 += n8; 2949 t1 = (x[i3]+x[i4])*SQRTHALF; 2950 t2 = (x[i3]-x[i4])*SQRTHALF; 2951 x[i4] = x[i2] - t1; 2952 x[i3] = -x[i2] - t1; 2953 x[i2] = x[i1] - t2; 2954 x[i1] += t2; 2955 } 2956 is = (id<<1) - n2; 2957 id <<= 2; 2958 } while(is<n); 2959 dil = n/n2; 2960 a = dil; 2961 for (j=2;j<=n8;j++) 2962 { 2963 a3 = (a+(a<<1))&nminus; 2964 b = a*expand; 2965 b3 = a3*expand; 2966 cc1 = s_cos(b); 2967 ss1 = s_sin(b); 2968 cc3 = s_cos(b3); 2969 ss3 = s_sin(b3); 2970 a = (a+dil)&nminus; 2971 is = 0; 2972 id = n2<<1; 2973 do 2974 { 2975 for(i=is;i<n;i+=id) 2976 { 2977 i1 = i+j; 2978 i2 = i1 + n4; 2979 i3 = i2 + n4; 2980 i4 = i3 + n4; 2981 i5 = i + n4 - j + 2; 2982 i6 = i5 + n4; 2983 i7 = i6 + n4; 2984 i8 = i7 + n4; 2985 t1 = x[i3]*cc1 + x[i7]*ss1; 2986 t2 = x[i7]*cc1 - x[i3]*ss1; 2987 t3 = x[i4]*cc3 + x[i8]*ss3; 2988 t4 = x[i8]*cc3 - x[i4]*ss3; 2989 t5 = t1 + t3; 2990 t6 = t2 + t4; 2991 t3 = t1 - t3; 2992 t4 = t2 - t4; 2993 t2 = x[i6] + t6; 2994 x[i3] = t6 - x[i6]; 2995 x[i8] = t2; 2996 t2 = x[i2] - t3; 2997 x[i7] = -x[i2] - t3; 2998 x[i4] = t2; 2999 t1 = x[i1] + t5; 3000 x[i6] = x[i1] - t5; 3001 x[i1] = t1; 3002 t1 = x[i5] + t4; 3003 x[i5] -= t4; 3004 x[i2] = t1; 3005 } 3006 is = (id<<1) - n2; 3007 id <<= 2; 3008 } while(is<n); 3009 } 3010 } 3011} 3012 3013 3014void 3015fftinv_hermitian_to_real( 3016 double *z, 3017 int n 3018) 3019/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 3020 * This is a decimation-in-frequency, split-radix algorithm. 3021 */ 3022{ 3023 register double cc1, ss1, cc3, ss3; 3024 register int is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8, 3025 a, a3, b, b3, nminus = n-1, dil, expand; 3026 register double *x, e; 3027 int nn = n>>1; 3028 double t1, t2, t3, t4, t5; 3029 int n2, n4, n8, i, j; 3030 3031 init_sinCos(n); 3032 expand = cur_run/n; 3033 x = z-1; 3034 n2 = n<<1; 3035 while(nn >>= 1) 3036 { 3037 is = 0; 3038 id = n2; 3039 n2 >>= 1; 3040 n4 = n2>>2; 3041 n8 = n4>>1; 3042 do 3043 { 3044 for(i=is;i<n;i+=id) 3045 { 3046 i1 = i+1; 3047 i2 = i1 + n4; 3048 i3 = i2 + n4; 3049 i4 = i3 + n4; 3050 t1 = x[i1] - x[i3]; 3051 x[i1] += x[i3]; 3052 x[i2] += x[i2]; 3053 x[i3] = t1 - 2.0*x[i4]; 3054 x[i4] = t1 + 2.0*x[i4]; 3055 if (n4==1) 3056 continue; 3057 i1 += n8; 3058 i2 += n8; 3059 i3 += n8; 3060 i4 += n8; 3061 t1 = (x[i2]-x[i1])*SQRTHALF; 3062 t2 = (x[i4]+x[i3])*SQRTHALF; 3063 x[i1] += x[i2]; 3064 x[i2] = x[i4]-x[i3]; 3065 x[i3] = -2.0*(t2+t1); 3066 x[i4] = 2.0*(t1-t2); 3067 } 3068 is = (id<<1) - n2; 3069 id <<= 2; 3070 } while (is<n-1); 3071 dil = n/n2; 3072 a = dil; 3073 for (j=2;j<=n8;j++) 3074 { 3075 a3 = (a+(a<<1))&nminus; 3076 b = a*expand; 3077 b3 = a3*expand; 3078 cc1 = s_cos(b); 3079 ss1 = s_sin(b); 3080 cc3 = s_cos(b3); 3081 ss3 = s_sin(b3); 3082 a = (a+dil)&nminus; 3083 is = 0; 3084 id = n2<<1; 3085 do 3086 { 3087 for(i=is;i<n;i+=id) 3088 { 3089 i1 = i+j; 3090 i2 = i1+n4; 3091 i3 = i2+n4; 3092 i4 = i3+n4; 3093 i5 = i+n4-j+2; 3094 i6 = i5+n4; 3095 i7 = i6+n4; 3096 i8 = i7+n4; 3097 t1 = x[i1] - x[i6]; 3098 x[i1] += x[i6]; 3099 t2 = x[i5] - x[i2]; 3100 x[i5] += x[i2]; 3101 t3 = x[i8] + x[i3]; 3102 x[i6] = x[i8] - x[i3]; 3103 t4 = x[i4] + x[i7]; 3104 x[i2] = x[i4] - x[i7]; 3105 t5 = t1 - t4; 3106 t1 += t4; 3107 t4 = t2 - t3; 3108 t2 += t3; 3109 x[i3] = t5*cc1 + t4*ss1; 3110 x[i7] = -t4*cc1 + t5*ss1; 3111 x[i4] = t1*cc3 - t2*ss3; 3112 x[i8] = t2*cc3 + t1*ss3; 3113 } 3114 is = (id<<1) - n2; 3115 id <<= 2; 3116 } while(is<n-1); 3117 } 3118 } 3119 is = 1; 3120 id = 4; 3121 do 3122 { 3123 for (i0=is;i0<=n;i0+=id) 3124 { 3125 i1 = i0+1; 3126 e = x[i0]; 3127 x[i0] = e + x[i1]; 3128 x[i1] = e - x[i1]; 3129 } 3130 is = (id<<1) - 1; 3131 id <<= 2; 3132 } while(is<n); 3133 scramble_real(z, n); 3134 e = 1/(double)n; 3135 for (i=0;i<n;i++) 3136 { 3137 z[i] *= e; 3138 } 3139} 3140 3141 3142void 3143mul_hermitian( 3144 double *a, 3145 double *b, 3146 int n 3147) 3148{ 3149 register int k, half = n>>1; 3150 register double aa, bb, am, bm; 3151 3152 b[0] *= a[0]; 3153 b[half] *= a[half]; 3154 for (k=1;k<half;k++) 3155 { 3156 aa = a[k]; 3157 bb = b[k]; 3158 am = a[n-k]; 3159 bm = b[n-k]; 3160 b[k] = aa*bb - am*bm; 3161 b[n-k] = aa*bm + am*bb; 3162 } 3163} 3164 3165 3166void 3167square_hermitian( 3168 double *b, 3169 int n 3170) 3171{ 3172 register int k, half = n>>1; 3173 register double c, d; 3174 3175 b[0] *= b[0]; 3176 b[half] *= b[half]; 3177 for (k=1;k<half;k++) 3178 { 3179 c = b[k]; 3180 d = b[n-k]; 3181 b[n-k] = 2.0*c*d; 3182 b[k] = (c+d)*(c-d); 3183 } 3184} 3185 3186 3187void 3188giant_to_double 3189( 3190 giant x, 3191 int sizex, 3192 double *z, 3193 int L 3194) 3195{ 3196 register int j; 3197 3198 for (j=sizex;j<L;j++) 3199 { 3200 z[j]=0.0; 3201 } 3202 for (j=0;j<sizex;j++) 3203 { 3204 z[j] = x->n[j]; 3205 } 3206} 3207 3208 3209void 3210gswap( 3211 giant *p, 3212 giant *q 3213) 3214{ 3215 giant t; 3216 3217 t = *p; 3218 *p = *q; 3219 *q = t; 3220} 3221 3222 3223void 3224onestep( 3225 giant x, 3226 giant y, 3227 gmatrix A 3228) 3229/* Do one step of the euclidean algorithm and modify 3230 * the matrix A accordingly. */ 3231{ 3232 giant s4 = popg(); 3233 3234 gtog(x,s4); 3235 gtog(y,x); 3236 gtog(s4,y); 3237 divg(x,s4); 3238 punch(s4,A); 3239 mulg(x,s4); 3240 subg(s4,y); 3241 3242 pushg(1); 3243} 3244 3245 3246void 3247mulvM( 3248 gmatrix A, 3249 giant x, 3250 giant y 3251) 3252/* Multiply vector by Matrix; changes x,y. */ 3253{ 3254 giant s0 = popg(), s1 = popg(); 3255 3256 gtog(A->ur,s0); 3257 gtog( A->lr,s1); 3258 dotproduct(x,y,A->ul,s0); 3259 dotproduct(x,y,A->ll,s1); 3260 gtog(s0,x); 3261 gtog(s1,y); 3262 3263 pushg(2); 3264} 3265 3266 3267void 3268mulmM( 3269 gmatrix A, 3270 gmatrix B 3271) 3272/* Multiply matrix by Matrix; changes second matrix. */ 3273{ 3274 giant s0 = popg(); 3275 giant s1 = popg(); 3276 giant s2 = popg(); 3277 giant s3 = popg(); 3278 3279 gtog(B->ul,s0); 3280 gtog(B->ur,s1); 3281 gtog(B->ll,s2); 3282 gtog(B->lr,s3); 3283 dotproduct(A->ur,A->ul,B->ll,s0); 3284 dotproduct(A->ur,A->ul,B->lr,s1); 3285 dotproduct(A->ll,A->lr,B->ul,s2); 3286 dotproduct(A->ll,A->lr,B->ur,s3); 3287 gtog(s0,B->ul); 3288 gtog(s1,B->ur); 3289 gtog(s2,B->ll); 3290 gtog(s3,B->lr); 3291 3292 pushg(4); 3293} 3294 3295 3296void 3297writeM( 3298 gmatrix A 3299) 3300{ 3301 printf(" ul:"); 3302 gout(A->ul); 3303 printf(" ur:"); 3304 gout(A->ur); 3305 printf(" ll:"); 3306 gout(A->ll); 3307 printf(" lr:"); 3308 gout(A->lr); 3309} 3310 3311 3312void 3313punch( 3314 giant q, 3315 gmatrix A 3316) 3317/* Multiply the matrix A on the left by [0,1,1,-q]. */ 3318{ 3319 giant s0 = popg(); 3320 3321 gtog(A->ll,s0); 3322 mulg(q,A->ll); 3323 gswap(&A->ul,&A->ll); 3324 subg(A->ul,A->ll); 3325 gtog(s0,A->ul); 3326 gtog(A->lr,s0); 3327 mulg(q,A->lr); 3328 gswap(&A->ur,&A->lr); 3329 subg(A->ur,A->lr); 3330 gtog(s0,A->ur); 3331 3332 pushg(1); 3333} 3334 3335 3336static void 3337dotproduct( 3338 giant a, 3339 giant b, 3340 giant c, 3341 giant d 3342) 3343/* Replace last argument with the dot product of two 2-vectors. */ 3344{ 3345 giant s4 = popg(); 3346 3347 gtog(c,s4); 3348 mulg(a, s4); 3349 mulg(b,d); 3350 addg(s4,d); 3351 3352 pushg(1); 3353} 3354 3355 3356void 3357ggcd( 3358 giant xx, 3359 giant yy 3360) 3361/* A giant gcd. Modifies its arguments. */ 3362{ 3363 giant x = popg(), y = popg(); 3364 gmatrix A = newgmatrix(); 3365 3366 gtog(xx,x); gtog(yy,y); 3367 for(;;) 3368 { 3369 fix(&x,&y); 3370 if (bitlen(y) <= GCDLIMIT ) 3371 break; 3372 A->ul = popg(); 3373 A->ur = popg(); 3374 A->ll = popg(); 3375 A->lr = popg(); 3376 itog(1,A->ul); 3377 itog(0,A->ur); 3378 itog(0,A->ll); 3379 itog(1,A->lr); 3380 hgcd(0,x,y,A); 3381 mulvM(A,x,y); 3382 pushg(4); 3383 fix(&x,&y); 3384 if (bitlen(y) <= GCDLIMIT ) 3385 break; 3386 modg(y,x); 3387 gswap(&x,&y); 3388 } 3389 bgcdg(x,y); 3390 gtog(y,yy); 3391 pushg(2); 3392 free(A); 3393} 3394 3395 3396void 3397fix( 3398 giant *p, 3399 giant *q 3400) 3401/* Insure that x > y >= 0. */ 3402{ 3403 if( gsign(*p) < 0 ) 3404 negg(*p); 3405 if( gsign(*q) < 0 ) 3406 negg(*q); 3407 if( gcompg(*p,*q) < 0 ) 3408 gswap(p,q); 3409} 3410 3411 3412void 3413hgcd( 3414 int n, 3415 giant xx, 3416 giant yy, 3417 gmatrix A 3418) 3419/* hgcd(n,x,y,A) chops n bits off x and y and computes th 3420 * 2 by 2 matrix A such that A[x y] is the pair of terms 3421 * in the remainder sequence starting with x,y that is 3422 * half the size of x. Note that the argument A is modified 3423 * but that the arguments xx and yy are left unchanged. 3424 */ 3425{ 3426 giant x, y; 3427 3428 if (isZero(yy)) 3429 return; 3430 3431 x = popg(); 3432 y = popg(); 3433 gtog(xx,x); 3434 gtog(yy,y); 3435 gshiftright(n,x); 3436 gshiftright(n,y); 3437 if (bitlen(x) <= INTLIMIT ) 3438 { 3439 shgcd(gtoi(x),gtoi(y),A); 3440 } 3441 else 3442 { 3443 gmatrix B = newgmatrix(); 3444 int m = bitlen(x)/2; 3445 3446 hgcd(m,x,y,A); 3447 mulvM(A,x,y); 3448 if (gsign(x) < 0) 3449 { 3450 negg(x); negg(A->ul); negg(A->ur); 3451 } 3452 if (gsign(y) < 0) 3453 { 3454 negg(y); negg(A->ll); negg(A->lr); 3455 } 3456 if (gcompg(x,y) < 0) 3457 { 3458 gswap(&x,&y); 3459 gswap(&A->ul,&A->ll); 3460 gswap(&A->ur,&A->lr); 3461 } 3462 if (!isZero(y)) 3463 { 3464 onestep(x,y,A); 3465 m /= 2; 3466 B->ul = popg(); 3467 B->ur = popg(); 3468 B->ll = popg(); 3469 B->lr = popg(); 3470 itog(1,B->ul); 3471 itog(0,B->ur); 3472 itog(0,B->ll); 3473 itog(1,B->lr); 3474 hgcd(m,x,y,B); 3475 mulmM(B,A); 3476 pushg(4); 3477 } 3478 free(B); 3479 } 3480 pushg(2); 3481} 3482 3483 3484void 3485shgcd( 3486 register int x, 3487 register int y, 3488 gmatrix A 3489) 3490/* 3491 * Do a half gcd on the integers a and b, putting the result in A 3492 * It is fairly easy to use the 2 by 2 matrix description of the 3493 * extended Euclidean algorithm to prove that the quantity q*t 3494 * never overflows. 3495 */ 3496{ 3497 register int q, t, start = x; 3498 int Aul = 1, Aur = 0, All = 0, Alr = 1; 3499 3500 while (y != 0 && y > start/y) 3501 { 3502 q = x/y; 3503 t = y; 3504 y = x%y; 3505 x = t; 3506 t = All; 3507 All = Aul-q*t; 3508 Aul = t; 3509 t = Alr; 3510 Alr = Aur-q*t; 3511 Aur = t; 3512 } 3513 itog(Aul,A->ul); 3514 itog(Aur,A->ur); 3515 itog(All,A->ll); 3516 itog(Alr,A->lr); 3517} 3518