1/* Copyright (c) 1998,2011,2014 Apple Inc. All Rights Reserved. 2 * 3 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT 4 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE 5 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE, INC. AND THE 6 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE, 7 * INC. ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL 8 * EXPOSE YOU TO LIABILITY. 9 *************************************************************************** 10 11 giantFFT.c 12 Library for large-integer arithmetic via FFT. Currently unused 13 in CryptKit. 14 15 16 Revision History 17 ---------------- 18 19 Jan 1998 at Apple 19 Split off from NSGiantIntegers.c. 20 21*/ 22 23/* 24 * FIXME - make sure platform-specific math lib has floor(), fmod(), 25 * sin(), pow() 26 */ 27#include <math.h> 28#include "NSGiantIntegers.h" 29 30#define AUTO_MUL 0 31#define GRAMMAR_MUL 1 32#define FFT_MUL 2 33 34#define TWOPI (double)(2*3.1415926535897932384626433) 35#define SQRT2 (double)(1.414213562373095048801688724209) 36#define SQRTHALF (double)(0.707106781186547524400844362104) 37#define TWO16 (double)(65536.0) 38#define TWOM16 (double)(0.0000152587890625) 39#define BREAK_SHORTS 400 // Number of shorts at which FFT breaks over. 40 41static int lpt(int n, int *lambda); 42static void mul_hermitian(double *a, double *b, int n) ; 43static void square_hermitian(double *b, int n); 44static void addsignal(giant x, double *zs, int n); 45static void scramble_real(double *x, int n); 46static void fft_real_to_hermitian(double *zs, int n); 47static void fftinv_hermitian_to_real(double *zs, int n); 48static void GiantFFTSquare(giant gx); 49static void GiantFFTMul(giant,giant); 50static void giant_to_double(giant x, int sizex, double *zs, int L); 51 52static int mulmode = AUTO_MUL; 53 54void mulg(giant a, giant b) { /* b becomes a*b. */ 55 PROF_START; 56 INCR_MULGS; 57 GiantAuxMul(a,b); 58 #if FEE_DEBUG 59 (void)bitlen(b); // XXX 60 #endif FEE_DEBUG 61 PROF_END(mulgTime); 62 PROF_INCR(numMulg); 63} 64 65static void GiantAuxMul(giant a, giant b) { 66/* Optimized general multiply, b becomes a*b. Modes are: 67 AUTO_MUL: switch according to empirical speed criteria. 68 GRAMMAR_MUL: force grammar-school algorithm. 69 FFT_MUL: force floating point FFT method. 70*/ 71 int square = (a==b); 72 73 if (isZero(b)) return; 74 if (isZero(a)) { 75 gtog(a, b); 76 return; 77 } 78 switch(mulmode) { 79 case GRAMMAR_MUL: 80 GiantGrammarMul(a,b); 81 break; 82 case FFT_MUL: 83 if (square) { 84 GiantFFTSquare(b); 85 } 86 else { 87 GiantFFTMul(a,b); 88 } 89 break; 90 case AUTO_MUL: { 91 int sizea, sizeb; 92 float grammartime; 93 sizea = abs(a->sign); 94 sizeb = abs(b->sign); 95 grammartime = sizea; grammartime *= sizeb; 96 if(grammartime < BREAK_SHORTS*BREAK_SHORTS) { 97 GiantGrammarMul(a,b); 98 } 99 else { 100 if (square) GiantFFTSquare(b); 101 else GiantFFTMul(a,b); 102 } 103 break; 104 } 105 } 106} 107 108/***************** Commence FFT multiply routines ****************/ 109 110static int CurrentRun = 0; 111double *sincos = NULL; 112static void init_sincos(int n) { 113 int j; 114 double e = TWOPI/n; 115 116 if (n <= CurrentRun) return; 117 CurrentRun = n; 118 if (sincos) free(sincos); 119 sincos = (double *)malloc(sizeof(double)*(1+(n>>2))); 120 for(j=0;j<=(n>>2);j++) { 121 sincos[j] = sin(e*j); 122 } 123} 124 125static double s_sin(int n) { 126 int seg = n/(CurrentRun>>2); 127 128 switch(seg) { 129 case 0: return(sincos[n]); 130 case 1: return(sincos[(CurrentRun>>1)-n]); 131 case 2: return(-sincos[n-(CurrentRun>>1)]); 132 case 3: 133 default: return(-sincos[CurrentRun-n]); 134 } 135} 136 137static double s_cos(int n) { 138 int quart = (CurrentRun>>2); 139 140 if (n < quart) return(s_sin(n+quart)); 141 return(-s_sin(n-quart)); 142} 143 144 145static int lpt(int n, int *lambda) { 146/* returns least power of two greater than n */ 147 register int i = 1; 148 149 *lambda = 0; 150 while(i<n) { 151 i<<=1; 152 ++(*lambda); 153 } 154 return(i); 155} 156 157static void addsignal(giant x, double *zs, int n) { 158 register int j, k, m, car; 159 register double f, g; 160 /*double err, maxerr = 0.0;*/ 161 162 for(j=0;j<n;j++) { 163 f = floor(zs[j]+0.5); 164 165 /* err = fabs(zs[j]-f); 166 if(err>maxerr) maxerr = err; 167 */ 168 169 zs[j] =0; 170 k = 0; 171 do{ 172 g = floor(f*TWOM16); 173 zs[j+k] += f-g*TWO16; 174 ++k; 175 f=g; 176 } while(f != 0.0); 177 } 178 car = 0; 179 for(j=0;j<n;j++) { 180 m = zs[j]+car; 181 x->n[j] = m & 0xffff; 182 car = (m>>16); 183 } 184 if(car) x->n[j] = car; 185 else --j; 186 while(!(x->n[j])) --j; 187 x->sign = j+1; 188 if (abs(x->sign) > x->capacity) NSGiantRaise("addsignal overflow"); 189} 190 191static void GiantFFTSquare(giant gx) { 192 int j,size = abs(gx->sign); 193 register int L; 194 195 if(size<4) { GiantGrammarMul(gx,gx); return; } 196 L = lpt(size+size, &j); 197 { 198 //was...double doubles[L]; 199 //is... 200 double *doubles = malloc(sizeof(double) * L); 201 // end 202 giant_to_double(gx, size, doubles, L); 203 fft_real_to_hermitian(doubles, L); 204 square_hermitian(doubles, L); 205 fftinv_hermitian_to_real(doubles, L); 206 addsignal(gx, doubles, L); 207 // new 208 free(doubles); 209 } 210 gx->sign = abs(gx->sign); 211 bitlen(gx); // XXX 212 if (abs(gx->sign) > gx->capacity) NSGiantRaise("GiantFFTSquare overflow"); 213} 214 215static void GiantFFTMul(giant y, giant x) { /* x becomes y*x. */ 216 int lambda, size, sizex = abs(x->sign), sizey = abs(y->sign); 217 int finalsign = gsign(x)*gsign(y); 218 register int L; 219 220 if((sizex<=4)||(sizey<=4)) { GiantGrammarMul(y,x); return; } 221 size = sizex; if(size<sizey) size=sizey; 222 L = lpt(size+size, &lambda); 223 { 224 //double doubles1[L]; 225 //double doubles2[L]; 226 double *doubles1 = malloc(sizeof(double) * L); 227 double *doubles2 = malloc(sizeof(double) * L); 228 229 giant_to_double(x, sizex, doubles1, L); 230 giant_to_double(y, sizey, doubles2, L); 231 fft_real_to_hermitian(doubles1, L); 232 fft_real_to_hermitian(doubles2, L); 233 mul_hermitian(doubles2, doubles1, L); 234 fftinv_hermitian_to_real(doubles1, L); 235 addsignal(x, doubles1, L); 236 237 free(doubles1); 238 free(doubles2); 239 } 240 x->sign = finalsign*abs(x->sign); 241 bitlen(x); // XXX 242 if (abs(x->sign) > x->capacity) NSGiantRaise("GiantFFTMul overflow"); 243} 244 245static void scramble_real(double *x, int n) { 246 register int i,j,k; 247 register double tmp; 248 249 for(i=0,j=0;i<n-1;i++) { 250 if(i<j) { 251 tmp = x[j]; 252 x[j]=x[i]; 253 x[i]=tmp; 254 } 255 k = n/2; 256 while(k<=j) { 257 j -= k; 258 k>>=1; 259 } 260 j += k; 261 } 262} 263 264static void fft_real_to_hermitian(double *zs, int n) { 265/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 266 This is a decimation-in-time, split-radix algorithm. 267 */ 268 register double cc1, ss1, cc3, ss3; 269 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, 270 a, a3, b, b3, nminus = n-1, dil, expand; 271 register double *x, e; 272 int nn = n>>1; 273 double t1, t2, t3, t4, t5, t6; 274 register int n2, n4, n8, i, j; 275 276 init_sincos(n); 277 expand = CurrentRun/n; 278 scramble_real(zs, n); 279 x = zs-1; /* FORTRAN compatibility. */ 280 is = 1; 281 iD = 4; 282 do{ 283 for(i0=is;i0<=n;i0+=iD) { 284 i1 = i0+1; 285 e = x[i0]; 286 x[i0] = e + x[i1]; 287 x[i1] = e - x[i1]; 288 } 289 is = (iD<<1)-1; 290 iD <<= 2; 291 } while(is<n); 292 n2 = 2; 293 while(nn>>=1) { 294 n2 <<= 1; 295 n4 = n2>>2; 296 n8 = n2>>3; 297 is = 0; 298 iD = n2<<1; 299 do { 300 for(i=is;i<n;i+=iD) { 301 i1 = i+1; 302 i2 = i1 + n4; 303 i3 = i2 + n4; 304 i4 = i3 + n4; 305 t1 = x[i4]+x[i3]; 306 x[i4] -= x[i3]; 307 x[i3] = x[i1] - t1; 308 x[i1] += t1; 309 if(n4==1) continue; 310 i1 += n8; 311 i2 += n8; 312 i3 += n8; 313 i4 += n8; 314 t1 = (x[i3]+x[i4])*SQRTHALF; 315 t2 = (x[i3]-x[i4])*SQRTHALF; 316 x[i4] = x[i2] - t1; 317 x[i3] = -x[i2] - t1; 318 x[i2] = x[i1] - t2; 319 x[i1] += t2; 320 } 321 is = (iD<<1) - n2; 322 iD <<= 2; 323 } while(is<n); 324 dil = n/n2; 325 a = dil; 326 for(j=2;j<=n8;j++) { 327 a3 = (a+(a<<1))&nminus; 328 b = a*expand; 329 b3 = a3*expand; 330 cc1 = s_cos(b); 331 ss1 = s_sin(b); 332 cc3 = s_cos(b3); 333 ss3 = s_sin(b3); 334 a = (a+dil)&nminus; 335 is = 0; 336 iD = n2<<1; 337 do { 338 for(i=is;i<n;i+=iD) { 339 i1 = i+j; 340 i2 = i1 + n4; 341 i3 = i2 + n4; 342 i4 = i3 + n4; 343 i5 = i + n4 - j + 2; 344 i6 = i5 + n4; 345 i7 = i6 + n4; 346 i8 = i7 + n4; 347 t1 = x[i3]*cc1 + x[i7]*ss1; 348 t2 = x[i7]*cc1 - x[i3]*ss1; 349 t3 = x[i4]*cc3 + x[i8]*ss3; 350 t4 = x[i8]*cc3 - x[i4]*ss3; 351 t5 = t1 + t3; 352 t6 = t2 + t4; 353 t3 = t1 - t3; 354 t4 = t2 - t4; 355 t2 = x[i6] + t6; 356 x[i3] = t6 - x[i6]; 357 x[i8] = t2; 358 t2 = x[i2] - t3; 359 x[i7] = -x[i2] - t3; 360 x[i4] = t2; 361 t1 = x[i1] + t5; 362 x[i6] = x[i1] - t5; 363 x[i1] = t1; 364 t1 = x[i5] + t4; 365 x[i5] -= t4; 366 x[i2] = t1; 367 } 368 is = (iD<<1) - n2; 369 iD <<= 2; 370 } while(is<n); 371 } 372 } 373} 374 375static void fftinv_hermitian_to_real(double *zs, int n) { 376/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]). 377 This is a decimation-in-frequency, split-radix algorithm. 378 */ 379 register double cc1, ss1, cc3, ss3; 380 register int is, iD, i0, i1, i2, i3, i4, i5, i6, i7, i8, 381 a, a3, b, b3, nminus = n-1, dil, expand; 382 register double *x, e; 383 int nn = n>>1; 384 double t1, t2, t3, t4, t5; 385 int n2, n4, n8, i, j; 386 387 init_sincos(n); 388 expand = CurrentRun/n; 389 x = zs-1; 390 n2 = n<<1; 391 while(nn >>= 1) { 392 is = 0; 393 iD = n2; 394 n2 >>= 1; 395 n4 = n2>>2; 396 n8 = n4>>1; 397 do { 398 for(i=is;i<n;i+=iD) { 399 i1 = i+1; 400 i2 = i1 + n4; 401 i3 = i2 + n4; 402 i4 = i3 + n4; 403 t1 = x[i1] - x[i3]; 404 x[i1] += x[i3]; 405 x[i2] += x[i2]; 406 x[i3] = t1 - 2.0*x[i4]; 407 x[i4] = t1 + 2.0*x[i4]; 408 if(n4==1) continue; 409 i1 += n8; 410 i2 += n8; 411 i3 += n8; 412 i4 += n8; 413 t1 = (x[i2]-x[i1])*SQRTHALF; 414 t2 = (x[i4]+x[i3])*SQRTHALF; 415 x[i1] += x[i2]; 416 x[i2] = x[i4]-x[i3]; 417 x[i3] = -2.0*(t2+t1); 418 x[i4] = 2.0*(t1-t2); 419 } 420 is = (iD<<1) - n2; 421 iD <<= 2; 422 } while(is<n-1); 423 dil = n/n2; 424 a = dil; 425 for(j=2;j<=n8;j++) { 426 a3 = (a+(a<<1))&nminus; 427 b = a*expand; 428 b3 = a3*expand; 429 cc1 = s_cos(b); 430 ss1 = s_sin(b); 431 cc3 = s_cos(b3); 432 ss3 = s_sin(b3); 433 a = (a+dil)&nminus; 434 is = 0; 435 iD = n2<<1; 436 do { 437 for(i=is;i<n;i+=iD) { 438 i1 = i+j; 439 i2 = i1+n4; 440 i3 = i2+n4; 441 i4 = i3+n4; 442 i5 = i+n4-j+2; 443 i6 = i5+n4; 444 i7 = i6+n4; 445 i8 = i7+n4; 446 t1 = x[i1] - x[i6]; 447 x[i1] += x[i6]; 448 t2 = x[i5] - x[i2]; 449 x[i5] += x[i2]; 450 t3 = x[i8] + x[i3]; 451 x[i6] = x[i8] - x[i3]; 452 t4 = x[i4] + x[i7]; 453 x[i2] = x[i4] - x[i7]; 454 t5 = t1 - t4; 455 t1 += t4; 456 t4 = t2 - t3; 457 t2 += t3; 458 x[i3] = t5*cc1 + t4*ss1; 459 x[i7] = -t4*cc1 + t5*ss1; 460 x[i4] = t1*cc3 - t2*ss3; 461 x[i8] = t2*cc3 + t1*ss3; 462 } 463 is = (iD<<1) - n2; 464 iD <<= 2; 465 } while(is<n-1); 466 } 467 } 468 is = 1; 469 iD = 4; 470 do { 471 for(i0=is;i0<=n;i0+=iD){ 472 i1 = i0+1; 473 e = x[i0]; 474 x[i0] = e + x[i1]; 475 x[i1] = e - x[i1]; 476 } 477 is = (iD<<1) - 1; 478 iD <<= 2; 479 } while(is<n); 480 scramble_real(zs, n); 481 e = 1/(double)n; 482 for(i=0;i<n;i++) zs[i] *= e; 483} 484 485 486static void mul_hermitian(double *a, double *b, int n) { 487 register int k, half = n>>1; 488 register double aa, bb, am, bm; 489 490 b[0] *= a[0]; 491 b[half] *= a[half]; 492 for(k=1;k<half;k++) { 493 aa = a[k]; bb = b[k]; 494 am = a[n-k]; bm = b[n-k]; 495 b[k] = aa*bb - am*bm; 496 b[n-k] = aa*bm + am*bb; 497 } 498} 499 500static void square_hermitian(double *b, int n) { 501 register int k, half = n>>1; 502 register double c, d; 503 504 b[0] *= b[0]; 505 b[half] *= b[half]; 506 for(k=1;k<half;k++) { 507 c = b[k]; d = b[n-k]; 508 b[n-k] = 2.0*c*d; 509 b[k] = (c+d)*(c-d); 510 } 511} 512 513static void giant_to_double(giant x, int sizex, double *zs, int L) { 514 register int j; 515 for(j=sizex;j<L;j++) zs[j]=0.0; 516 for(j=0;j<sizex;j++) { 517 zs[j] = x->n[j]; 518 } 519} 520