1/* Copyright (c) 1998 Apple Computer, Inc. All rights reserved. 2 * 3 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT 4 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE 5 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE 6 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER, 7 * INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL 8 * EXPOSE YOU TO LIABILITY. 9 *************************************************************************** 10 11 giantFFT.c 12 Library for large-integer arithmetic via FFT. Currently unused 13 in CryptKit. 14 15 R. E. Crandall, Scientific Computation Group, NeXT Computer, Inc. 16 17 Revision History 18 ---------------- 19 19 Jan 1998 Doug Mitchell at Apple 20 Split off from NSGiantIntegers.c. 21 22*/ 23 24/* 25 * FIXME - make sure platform-specific math lib has floor(), fmod(), 26 * sin(), pow() 27 */ 28#include <math.h> 29#include "NSGiantIntegers.h" 30 31#define AUTO_MUL 0 32#define GRAMMAR_MUL 1 33#define FFT_MUL 2 34 35#define TWOPI (double)(2*3.1415926535897932384626433) 36#define SQRT2 (double)(1.414213562373095048801688724209) 37#define SQRTHALF (double)(0.707106781186547524400844362104) 38#define TWO16 (double)(65536.0) 39#define TWOM16 (double)(0.0000152587890625) 40#define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over. 41 42static int lpt(int n, int *lambda); 43static void mul_hermitian(double *a, double *b, int n) ; 44static void square_hermitian(double *b, int n); 45static void addsignal(giant x, double *zs, int n); 46static void scramble_real(double *x, int n); 47static void fft_real_to_hermitian(double *zs, int n); 48static void fftinv_hermitian_to_real(double *zs, int n); 49static void GiantFFTSquare(giant gx); 50static void GiantFFTMul(giant,giant); 51static void giant_to_double(giant x, int sizex, double *zs, int L); 52 53static int mulmode = AUTO_MUL; 54 55void mulg(giant a, giant b) { /* b becomes a*b. */ 56 PROF_START; 57 INCR_MULGS; 58 GiantAuxMul(a,b); 59 #if FEE_DEBUG 60 (void)bitlen(b); // XXX 61 #endif FEE_DEBUG 62 PROF_END(mulgTime); 63 PROF_INCR(numMulg); 64} 65 66static void GiantAuxMul(giant a, giant b) { 67/* Optimized general multiply, b becomes a*b. Modes are: 68 AUTO_MUL: switch according to empirical speed criteria. 69 GRAMMAR_MUL: force grammar-school algorithm. 70 FFT_MUL: force floating point FFT method. 71*/ 72 int square = (a==b); 73 74 if (isZero(b)) return; 75 if (isZero(a)) { 76 gtog(a, b); 77 return; 78 } 79 switch(mulmode) { 80 case GRAMMAR_MUL: 81 GiantGrammarMul(a,b); 82 break; 83 case FFT_MUL: 84 if (square) { 85 GiantFFTSquare(b); 86 } 87 else { 88 GiantFFTMul(a,b); 89 } 90 break; 91 case AUTO_MUL: { 92 int sizea, sizeb; 93 float grammartime; 94 sizea = abs(a->sign); 95 sizeb = abs(b->sign); 96 grammartime = sizea; grammartime *= sizeb; 97 if(grammartime < BREAK_SHORTS*BREAK_SHORTS) { 98 GiantGrammarMul(a,b); 99 } 100 else { 101 if (square) GiantFFTSquare(b); 102 else GiantFFTMul(a,b); 103 } 104 break; 105 } 106 } 107} 108 109/***************** Commence FFT multiply routines ****************/ 110 111static int CurrentRun = 0; 112double *sincos = NULL; 113static void init_sincos(int n) { 114 int j; 115 double e = TWOPI/n; 116 117 if (n <= CurrentRun) return; 118 CurrentRun = n; 119 if (sincos) free(sincos); 120 sincos = (double *)malloc(sizeof(double)*(1+(n>>2))); 121 for(j=0;j<=(n>>2);j++) { 122 sincos[j] = sin(e*j); 123 } 124} 125 126static double s_sin(int n) { 127 int seg = n/(CurrentRun>>2); 128 129 switch(seg) { 130 case 0: return(sincos[n]); 131 case 1: return(sincos[(CurrentRun>>1)-n]); 132 case 2: return(-sincos[n-(CurrentRun>>1)]); 133 case 3: 134 default: return(-sincos[CurrentRun-n]); 135 } 136} 137 138static double s_cos(int n) { 139 int quart = (CurrentRun>>2); 140 141 if (n < quart) return(s_sin(n+quart)); 142 return(-s_sin(n-quart)); 143} 144 145 146static int lpt(int n, int *lambda) { 147/* returns least power of two greater than n */ 148 register int i = 1; 149 150 *lambda = 0; 151 while(i<n) { 152 i<<=1; 153 ++(*lambda); 154 } 155 return(i); 156} 157 158static void addsignal(giant x, double *zs, int n) { 159 register int j, k, m, car; 160 register double f, g; 161 /*double err, maxerr = 0.0;*/ 162 163 for(j=0;j<n;j++) { 164 f = floor(zs[j]+0.5); 165 166 /* err = fabs(zs[j]-f); 167 if(err>maxerr) maxerr = err; 168 */ 169 170 zs[j] =0; 171 k = 0; 172 do{ 173 g = floor(f*TWOM16); 174 zs[j+k] += f-g*TWO16; 175 ++k; 176 f=g; 177 } while(f != 0.0); 178 } 179 car = 0; 180 for(j=0;j<n;j++) { 181 m = zs[j]+car; 182 x->n[j] = m & 0xffff; 183 car = (m>>16); 184 } 185 if(car) x->n[j] = car; 186 else --j; 187 while(!(x->n[j])) --j; 188 x->sign = j+1; 189 if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow"); 190} 191 192static void GiantFFTSquare(giant gx) { 193 int j,size = abs(gx->sign); 194 register int L; 195 196 if(size<4) { GiantGrammarMul(gx,gx); return; } 197 L = lpt(size+size, &j); 198 { 199 //was...double doubles[L]; 200 //is... 201 double *doubles = malloc(sizeof(double) * L); 202 // end 203 giant_to_double(gx, size, doubles, L); 204 fft_real_to_hermitian(doubles, L); 205 square_hermitian(doubles, L); 206 fftinv_hermitian_to_real(doubles, L); 207 addsignal(gx, doubles, L); 208 // new 209 free(doubles); 210 } 211 gx->sign = abs(gx->sign); 212 bitlen(gx); // XXX 213 if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow"); 214} 215 216static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */ 217 int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign); 218 int finalsign = gsign(x)*gsign(y); 219 register int L; 220 221 if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; } 222 size = sizex; if(size<sizey) size=sizey; 223 L = lpt(size+size, &lambda); 224 { 225 //double doubles1[L]; 226 //double doubles2[L]; 227 double *doubles1 = malloc(sizeof(double) * L); 228 double *doubles2 = malloc(sizeof(double) * L); 229 230 giant_to_double(x, sizex, doubles1, L); 231 giant_to_double(y, sizey, doubles2, L); 232 fft_real_to_hermitian(doubles1, L); 233 fft_real_to_hermitian(doubles2, L); 234 mul_hermitian(doubles2, doubles1, L); 235 fftinv_hermitian_to_real(doubles1, L); 236 addsignal(x, doubles1, L); 237 238 free(doubles1); 239 free(doubles2); 240 } 241 x->sign = finalsign*abs(x->sign); 242 bitlen(x); // XXX 243 if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow"); 244} 245 246static void scramble_real(double *x, int n) { 247 register int i,j,k; 248 register double tmp; 249 250 for(i=0,j=0;i<n-1;i++) { 251 if(i<j) { 252 tmp = x[j]; 253 x[j]=x[i]; 254 x[i]=tmp; 255 } 256 k = n/2; 257 while(k<=j) { 258 j -= k; 259 k>>=1; 260 } 261 j += k; 262 } 263} 264 265static void fft_real_to_hermitian(double *zs, int n) { 266/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 267 This is a decimation-in-time, split-radix algorithm. 268 */ 269 register double cc1, ss1, cc3, ss3; 270 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, 271 a, a3, b, b3, nminus = n-1, dil, expand; 272 register double *x, e; 273 int nn = n>>1; 274 double t1, t2, t3, t4, t5, t6; 275 register int n2, n4, n8, i, j; 276 277 init_sincos(n); 278 expand = CurrentRun/n; 279 scramble_real(zs, n); 280 x = zs-1; /* FORTRAN compatibility. */ 281 is = 1; 282 iD = 4; 283 do{ 284 for(i0=is;i0<=n;i0+=iD) { 285 i1 = i0+1; 286 e = x[i0]; 287 x[i0] = e + x[i1]; 288 x[i1] = e - x[i1]; 289 } 290 is = (iD<<1)-1; 291 iD <<= 2; 292 } while(is<n); 293 n2 = 2; 294 while(nn>>=1) { 295 n2 <<= 1; 296 n4 = n2>>2; 297 n8 = n2>>3; 298 is = 0; 299 iD = n2<<1; 300 do { 301 for(i=is;i<n;i+=iD) { 302 i1 = i+1; 303 i2 = i1 + n4; 304 i3 = i2 + n4; 305 i4 = i3 + n4; 306 t1 = x[i4]+x[i3]; 307 x[i4] -= x[i3]; 308 x[i3] = x[i1] - t1; 309 x[i1] += t1; 310 if(n4==1) continue; 311 i1 += n8; 312 i2 += n8; 313 i3 += n8; 314 i4 += n8; 315 t1 = (x[i3]+x[i4])*SQRTHALF; 316 t2 = (x[i3]-x[i4])*SQRTHALF; 317 x[i4] = x[i2] - t1; 318 x[i3] = -x[i2] - t1; 319 x[i2] = x[i1] - t2; 320 x[i1] += t2; 321 } 322 is = (iD<<1) - n2; 323 iD <<= 2; 324 } while(is<n); 325 dil = n/n2; 326 a = dil; 327 for(j=2;j<=n8;j++) { 328 a3 = (a+(a<<1))&nminus; 329 b = a*expand; 330 b3 = a3*expand; 331 cc1 = s_cos(b); 332 ss1 = s_sin(b); 333 cc3 = s_cos(b3); 334 ss3 = s_sin(b3); 335 a = (a+dil)&nminus; 336 is = 0; 337 iD = n2<<1; 338 do { 339 for(i=is;i<n;i+=iD) { 340 i1 = i+j; 341 i2 = i1 + n4; 342 i3 = i2 + n4; 343 i4 = i3 + n4; 344 i5 = i + n4 - j + 2; 345 i6 = i5 + n4; 346 i7 = i6 + n4; 347 i8 = i7 + n4; 348 t1 = x[i3]*cc1 + x[i7]*ss1; 349 t2 = x[i7]*cc1 - x[i3]*ss1; 350 t3 = x[i4]*cc3 + x[i8]*ss3; 351 t4 = x[i8]*cc3 - x[i4]*ss3; 352 t5 = t1 + t3; 353 t6 = t2 + t4; 354 t3 = t1 - t3; 355 t4 = t2 - t4; 356 t2 = x[i6] + t6; 357 x[i3] = t6 - x[i6]; 358 x[i8] = t2; 359 t2 = x[i2] - t3; 360 x[i7] = -x[i2] - t3; 361 x[i4] = t2; 362 t1 = x[i1] + t5; 363 x[i6] = x[i1] - t5; 364 x[i1] = t1; 365 t1 = x[i5] + t4; 366 x[i5] -= t4; 367 x[i2] = t1; 368 } 369 is = (iD<<1) - n2; 370 iD <<= 2; 371 } while(is<n); 372 } 373 } 374} 375 376static void fftinv_hermitian_to_real(double *zs, int n) { 377/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 378 This is a decimation-in-frequency, split-radix algorithm. 379 */ 380 register double cc1, ss1, cc3, ss3; 381 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, 382 a, a3, b, b3, nminus = n-1, dil, expand; 383 register double *x, e; 384 int nn = n>>1; 385 double t1, t2, t3, t4, t5; 386 int n2, n4, n8, i, j; 387 388 init_sincos(n); 389 expand = CurrentRun/n; 390 x = zs-1; 391 n2 = n<<1; 392 while(nn >>= 1) { 393 is = 0; 394 iD = n2; 395 n2 >>= 1; 396 n4 = n2>>2; 397 n8 = n4>>1; 398 do { 399 for(i=is;i<n;i+=iD) { 400 i1 = i+1; 401 i2 = i1 + n4; 402 i3 = i2 + n4; 403 i4 = i3 + n4; 404 t1 = x[i1] - x[i3]; 405 x[i1] += x[i3]; 406 x[i2] += x[i2]; 407 x[i3] = t1 - 2.0*x[i4]; 408 x[i4] = t1 + 2.0*x[i4]; 409 if(n4==1) continue; 410 i1 += n8; 411 i2 += n8; 412 i3 += n8; 413 i4 += n8; 414 t1 = (x[i2]-x[i1])*SQRTHALF; 415 t2 = (x[i4]+x[i3])*SQRTHALF; 416 x[i1] += x[i2]; 417 x[i2] = x[i4]-x[i3]; 418 x[i3] = -2.0*(t2+t1); 419 x[i4] = 2.0*(t1-t2); 420 } 421 is = (iD<<1) - n2; 422 iD <<= 2; 423 } while(is<n-1); 424 dil = n/n2; 425 a = dil; 426 for(j=2;j<=n8;j++) { 427 a3 = (a+(a<<1))&nminus; 428 b = a*expand; 429 b3 = a3*expand; 430 cc1 = s_cos(b); 431 ss1 = s_sin(b); 432 cc3 = s_cos(b3); 433 ss3 = s_sin(b3); 434 a = (a+dil)&nminus; 435 is = 0; 436 iD = n2<<1; 437 do { 438 for(i=is;i<n;i+=iD) { 439 i1 = i+j; 440 i2 = i1+n4; 441 i3 = i2+n4; 442 i4 = i3+n4; 443 i5 = i+n4-j+2; 444 i6 = i5+n4; 445 i7 = i6+n4; 446 i8 = i7+n4; 447 t1 = x[i1] - x[i6]; 448 x[i1] += x[i6]; 449 t2 = x[i5] - x[i2]; 450 x[i5] += x[i2]; 451 t3 = x[i8] + x[i3]; 452 x[i6] = x[i8] - x[i3]; 453 t4 = x[i4] + x[i7]; 454 x[i2] = x[i4] - x[i7]; 455 t5 = t1 - t4; 456 t1 += t4; 457 t4 = t2 - t3; 458 t2 += t3; 459 x[i3] = t5*cc1 + t4*ss1; 460 x[i7] = -t4*cc1 + t5*ss1; 461 x[i4] = t1*cc3 - t2*ss3; 462 x[i8] = t2*cc3 + t1*ss3; 463 } 464 is = (iD<<1) - n2; 465 iD <<= 2; 466 } while(is<n-1); 467 } 468 } 469 is = 1; 470 iD = 4; 471 do { 472 for(i0=is;i0<=n;i0+=iD){ 473 i1 = i0+1; 474 e = x[i0]; 475 x[i0] = e + x[i1]; 476 x[i1] = e - x[i1]; 477 } 478 is = (iD<<1) - 1; 479 iD <<= 2; 480 } while(is<n); 481 scramble_real(zs, n); 482 e = 1/(double)n; 483 for(i=0;i<n;i++) zs[i] *= e; 484} 485 486 487static void mul_hermitian(double *a, double *b, int n) { 488 register int k, half = n>>1; 489 register double aa, bb, am, bm; 490 491 b[0] *= a[0]; 492 b[half] *= a[half]; 493 for(k=1;k<half;k++) { 494 aa = a[k]; bb = b[k]; 495 am = a[n-k]; bm = b[n-k]; 496 b[k] = aa*bb - am*bm; 497 b[n-k] = aa*bm + am*bb; 498 } 499} 500 501static void square_hermitian(double *b, int n) { 502 register int k, half = n>>1; 503 register double c, d; 504 505 b[0] *= b[0]; 506 b[half] *= b[half]; 507 for(k=1;k<half;k++) { 508 c = b[k]; d = b[n-k]; 509 b[n-k] = 2.0*c*d; 510 b[k] = (c+d)*(c-d); 511 } 512} 513 514static void giant_to_double(giant x, int sizex, double *zs, int L) { 515 register int j; 516 for(j=sizex;j<L;j++) zs[j]=0.0; 517 for(j=0;j<sizex;j++) { 518 zs[j] = x->n[j]; 519 } 520} 521