1
2/*
3 *  M_APM  -  mapm_fft.c
4 *
5 *  This FFT (Fast Fourier Transform) is from Takuya OOURA
6 *
7 *  Copyright(C) 1996-1999 Takuya OOURA
8 *  email: ooura@mmm.t.u-tokyo.ac.jp
9 *
10 *  See full FFT documentation below ...  (MCR)
11 *
12 *  This software is provided "as is" without express or implied warranty.
13 */
14
15/*
16 *      $Id: mapm_fft.c,v 1.15 2007/12/03 01:37:42 mike Exp $
17 *
18 *      This file contains the FFT based FAST MULTIPLICATION function
19 *      as well as its support functions.
20 *
21 *      $Log: mapm_fft.c,v $
22 *      Revision 1.15  2007/12/03 01:37:42  mike
23 *      no changes
24 *
25 *      Revision 1.14  2003/07/28 19:39:01  mike
26 *      change 16 bit constant
27 *
28 *      Revision 1.13  2003/07/21 20:11:55  mike
29 *      Modify error messages to be in a consistent format.
30 *
31 *      Revision 1.12  2003/05/01 21:55:36  mike
32 *      remove math.h
33 *
34 *      Revision 1.11  2003/03/31 22:10:09  mike
35 *      call generic error handling function
36 *
37 *      Revision 1.10  2002/11/03 22:11:48  mike
38 *      Updated function parameters to use the modern style
39 *
40 *      Revision 1.9  2001/07/16 19:16:15  mike
41 *      add function M_free_all_fft
42 *
43 *      Revision 1.8  2000/08/01 22:23:24  mike
44 *      use sizeof(int) from function call to stop
45 *      some compilers from complaining.
46 *
47 *      Revision 1.7  2000/07/30 22:39:21  mike
48 *      lower 16 bit malloc size
49 *
50 *      Revision 1.6  2000/07/10 22:54:26  mike
51 *      malloc the local data arrays
52 *
53 *      Revision 1.5  2000/07/10 00:09:02  mike
54 *      use local static arrays for smaller numbers
55 *
56 *      Revision 1.4  2000/07/08 18:24:23  mike
57 *      minor optimization tweak
58 *
59 *      Revision 1.3  2000/07/08 17:52:49  mike
60 *      do the FFT in base 10000 instead of MAPM numbers base 100
61 *      this runs faster and uses 1/2 the RAM
62 *
63 *      Revision 1.2  2000/07/06 21:04:34  mike
64 *      added more comments
65 *
66 *      Revision 1.1  2000/07/06 20:42:05  mike
67 *      Initial revision
68 */
69
70#include "m_apm_lc.h"
71
72#ifndef MM_PI_2
73#define MM_PI_2      1.570796326794896619231321691639751442098584699687
74#endif
75
76#ifndef WR5000       /* cos(MM_PI_2*0.5000) */
77#define WR5000       0.707106781186547524400844362104849039284835937688
78#endif
79
80#ifndef RDFT_LOOP_DIV     /* control of the RDFT's speed & tolerance */
81#define RDFT_LOOP_DIV 64
82#endif
83
84extern void   M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
85
86extern void   M_rdft(int, int, double *);
87extern void   M_bitrv2(int, double *);
88extern void   M_cftfsub(int, double *);
89extern void   M_cftbsub(int, double *);
90extern void   M_rftfsub(int, double *);
91extern void   M_rftbsub(int, double *);
92extern void   M_cft1st(int, double *);
93extern void   M_cftmdl(int, int, double *);
94
95static double *M_aa_array, *M_bb_array;
96static int    M_size = -1;
97
98static char   *M_fft_error_msg = "\'M_fast_mul_fft\', Out of memory";
99
100/****************************************************************************/
101void	M_free_all_fft()
102{
103if (M_size > 0)
104  {
105   MAPM_FREE(M_aa_array);
106   MAPM_FREE(M_bb_array);
107   M_size = -1;
108  }
109}
110/****************************************************************************/
111/*
112 *      multiply 'uu' by 'vv' with nbytes each
113 *      yielding a 2*nbytes result in 'ww'.
114 *      each byte contains a base 100 'digit',
115 *      i.e.: range from 0-99.
116 *
117 *             MSB              LSB
118 *
119 *   uu,vv     [0] [1] [2] ... [N-1]
120 *   ww        [0] [1] [2] ... [2N-1]
121 */
122
123void	M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes)
124{
125int             mflag, i, j, nn2, nn;
126double          carry, nnr, dtemp, *a, *b;
127UCHAR           *w0;
128unsigned long   ul;
129
130if (M_size < 0)                  /* if first time in, setup working arrays */
131  {
132   if (M_get_sizeof_int() == 2)  /* if still using 16 bit compilers */
133     M_size = 516;
134   else
135     M_size = 8200;
136
137   M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
138   M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
139
140   if ((M_aa_array == NULL) || (M_bb_array == NULL))
141     {
142      /* fatal, this does not return */
143
144      M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
145     }
146  }
147
148nn  = nbytes;
149nn2 = nbytes >> 1;
150
151if (nn > M_size)
152  {
153   mflag = TRUE;
154
155   a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
156   b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
157
158   if ((a == NULL) || (b == NULL))
159     {
160      /* fatal, this does not return */
161
162      M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
163     }
164  }
165else
166  {
167   mflag = FALSE;
168
169   a = M_aa_array;
170   b = M_bb_array;
171  }
172
173/*
174 *   convert normal base 100 MAPM numbers to base 10000
175 *   for the FFT operation.
176 */
177
178i = 0;
179for (j=0; j < nn2; j++)
180  {
181   a[j] = (double)((int)uu[i] * 100 + uu[i+1]);
182   b[j] = (double)((int)vv[i] * 100 + vv[i+1]);
183   i += 2;
184  }
185
186/* zero fill the second half of the arrays */
187
188for (j=nn2; j < nn; j++)
189  {
190   a[j] = 0.0;
191   b[j] = 0.0;
192  }
193
194/* perform the forward Fourier transforms for both numbers */
195
196M_rdft(nn, 1, a);
197M_rdft(nn, 1, b);
198
199/* perform the convolution ... */
200
201b[0] *= a[0];
202b[1] *= a[1];
203
204for (j=3; j <= nn; j += 2)
205  {
206   dtemp  = b[j-1];
207   b[j-1] = dtemp * a[j-1] - b[j] * a[j];
208   b[j]   = dtemp * a[j] + b[j] * a[j-1];
209  }
210
211/* perform the inverse transform on the result */
212
213M_rdft(nn, -1, b);
214
215/* perform a final pass to release all the carries */
216/* we are still in base 10000 at this point        */
217
218carry = 0.0;
219j     = nn;
220nnr   = 2.0 / (double)nn;
221
222while (1)
223  {
224   dtemp = b[--j] * nnr + carry + 0.5;
225   ul    = (unsigned long)(dtemp * 1.0E-4);
226   carry = (double)ul;
227   b[j]  = dtemp - carry * 10000.0;
228
229   if (j == 0)
230     break;
231  }
232
233/* copy result to our destination after converting back to base 100 */
234
235w0 = ww;
236M_get_div_rem((int)ul, w0, (w0 + 1));
237
238for (j=0; j <= (nn - 2); j++)
239  {
240   w0 += 2;
241   M_get_div_rem((int)b[j], w0, (w0 + 1));
242  }
243
244if (mflag)
245  {
246   MAPM_FREE(b);
247   MAPM_FREE(a);
248  }
249}
250/****************************************************************************/
251
252/*
253 *    The following info is from Takuya OOURA's documentation :
254 *
255 *    NOTE : MAPM only uses the 'RDFT' function (as well as the
256 *           functions RDFT calls). All the code from here down
257 *           in this file is from Takuya OOURA. The only change I
258 *           made was to add 'M_' in front of all the functions
259 *           I used. This was to guard against any possible
260 *           name collisions in the future.
261 *
262 *    MCR  06 July 2000
263 *
264 *
265 *    General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
266 *
267 *    Description:
268 *        A package to calculate Discrete Fourier/Cosine/Sine Transforms of
269 *        1-dimensional sequences of length 2^N.
270 *
271 *        fft4g_h.c  : FFT Package in C       - Simple Version I   (radix 4,2)
272 *
273 *        rdft: Real Discrete Fourier Transform
274 *
275 *    Method:
276 *        -------- rdft --------
277 *        A method with a following butterfly operation appended to "cdft".
278 *        In forward transform :
279 *            A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2,
280 *                W(n) = exp(2*pi*i/n),
281 *        this routine makes an array x[] :
282 *            x[j] = a[2*j] + i*a[2*j+1], 0<=j<n/2
283 *        and calls "cdft" of length n/2 :
284 *            X[k] = sum_j=0^n/2-1 x[j] * W(n/2)^(j*k), 0<=k<n.
285 *        The result A[k] are :
286 *            A[k]     = X[k]     - (1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k])),
287 *            A[n/2-k] = X[n/2-k] +
288 *                            conjg((1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k]))),
289 *                0<=k<=n/2
290 *            (notes: conjg() is a complex conjugate, X[n/2]=X[0]).
291 *        ----------------------
292 *
293 *    Reference:
294 *        * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan,
295 *          Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
296 *        * Henri J. Nussbaumer: Fast Fourier Transform and Convolution
297 *          Algorithms, Springer Verlag, 1982
298 *        * C. S. Burrus, Notes on the FFT (with large FFT paper list)
299 *          http://www-dsp.rice.edu/research/fft/fftnote.asc
300 *
301 *    Copyright:
302 *        Copyright(C) 1996-1999 Takuya OOURA
303 *        email: ooura@mmm.t.u-tokyo.ac.jp
304 *        download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
305 *        You may use, copy, modify this code for any purpose and
306 *        without fee. You may distribute this ORIGINAL package.
307 */
308
309/*
310
311functions
312    rdft: Real Discrete Fourier Transform
313
314function prototypes
315    void rdft(int, int, double *);
316
317-------- Real DFT / Inverse of Real DFT --------
318    [definition]
319        <case1> RDFT
320            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
321            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
322        <case2> IRDFT (excluding scale)
323            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
324                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
325                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
326    [usage]
327        <case1>
328            rdft(n, 1, a);
329        <case2>
330            rdft(n, -1, a);
331    [parameters]
332        n              :data length (int)
333                        n >= 2, n = power of 2
334        a[0...n-1]     :input/output data (double *)
335                        <case1>
336                            output data
337                                a[2*k] = R[k], 0<=k<n/2
338                                a[2*k+1] = I[k], 0<k<n/2
339                                a[1] = R[n/2]
340                        <case2>
341                            input data
342                                a[2*j] = R[j], 0<=j<n/2
343                                a[2*j+1] = I[j], 0<j<n/2
344                                a[1] = R[n/2]
345    [remark]
346        Inverse of
347            rdft(n, 1, a);
348        is
349            rdft(n, -1, a);
350            for (j = 0; j <= n - 1; j++) {
351                a[j] *= 2.0 / n;
352            }
353*/
354
355
356void	M_rdft(int n, int isgn, double *a)
357{
358    double xi;
359
360    if (isgn >= 0) {
361        if (n > 4) {
362            M_bitrv2(n, a);
363            M_cftfsub(n, a);
364            M_rftfsub(n, a);
365        } else if (n == 4) {
366            M_cftfsub(n, a);
367        }
368        xi = a[0] - a[1];
369        a[0] += a[1];
370        a[1] = xi;
371    } else {
372        a[1] = 0.5 * (a[0] - a[1]);
373        a[0] -= a[1];
374        if (n > 4) {
375            M_rftbsub(n, a);
376            M_bitrv2(n, a);
377            M_cftbsub(n, a);
378        } else if (n == 4) {
379            M_cftfsub(n, a);
380        }
381    }
382}
383
384
385
386void    M_bitrv2(int n, double *a)
387{
388    int j0, k0, j1, k1, l, m, i, j, k;
389    double xr, xi, yr, yi;
390
391    l = n >> 2;
392    m = 2;
393    while (m < l) {
394        l >>= 1;
395        m <<= 1;
396    }
397    if (m == l) {
398        j0 = 0;
399        for (k0 = 0; k0 < m; k0 += 2) {
400            k = k0;
401            for (j = j0; j < j0 + k0; j += 2) {
402                xr = a[j];
403                xi = a[j + 1];
404                yr = a[k];
405                yi = a[k + 1];
406                a[j] = yr;
407                a[j + 1] = yi;
408                a[k] = xr;
409                a[k + 1] = xi;
410                j1 = j + m;
411                k1 = k + 2 * m;
412                xr = a[j1];
413                xi = a[j1 + 1];
414                yr = a[k1];
415                yi = a[k1 + 1];
416                a[j1] = yr;
417                a[j1 + 1] = yi;
418                a[k1] = xr;
419                a[k1 + 1] = xi;
420                j1 += m;
421                k1 -= m;
422                xr = a[j1];
423                xi = a[j1 + 1];
424                yr = a[k1];
425                yi = a[k1 + 1];
426                a[j1] = yr;
427                a[j1 + 1] = yi;
428                a[k1] = xr;
429                a[k1 + 1] = xi;
430                j1 += m;
431                k1 += 2 * m;
432                xr = a[j1];
433                xi = a[j1 + 1];
434                yr = a[k1];
435                yi = a[k1 + 1];
436                a[j1] = yr;
437                a[j1 + 1] = yi;
438                a[k1] = xr;
439                a[k1 + 1] = xi;
440                for (i = n >> 1; i > (k ^= i); i >>= 1);
441            }
442            j1 = j0 + k0 + m;
443            k1 = j1 + m;
444            xr = a[j1];
445            xi = a[j1 + 1];
446            yr = a[k1];
447            yi = a[k1 + 1];
448            a[j1] = yr;
449            a[j1 + 1] = yi;
450            a[k1] = xr;
451            a[k1 + 1] = xi;
452            for (i = n >> 1; i > (j0 ^= i); i >>= 1);
453        }
454    } else {
455        j0 = 0;
456        for (k0 = 2; k0 < m; k0 += 2) {
457            for (i = n >> 1; i > (j0 ^= i); i >>= 1);
458            k = k0;
459            for (j = j0; j < j0 + k0; j += 2) {
460                xr = a[j];
461                xi = a[j + 1];
462                yr = a[k];
463                yi = a[k + 1];
464                a[j] = yr;
465                a[j + 1] = yi;
466                a[k] = xr;
467                a[k + 1] = xi;
468                j1 = j + m;
469                k1 = k + m;
470                xr = a[j1];
471                xi = a[j1 + 1];
472                yr = a[k1];
473                yi = a[k1 + 1];
474                a[j1] = yr;
475                a[j1 + 1] = yi;
476                a[k1] = xr;
477                a[k1 + 1] = xi;
478                for (i = n >> 1; i > (k ^= i); i >>= 1);
479            }
480        }
481    }
482}
483
484
485
486void    M_cftfsub(int n, double *a)
487{
488    int j, j1, j2, j3, l;
489    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
490
491    l = 2;
492    if (n > 8) {
493        M_cft1st(n, a);
494        l = 8;
495        while ((l << 2) < n) {
496            M_cftmdl(n, l, a);
497            l <<= 2;
498        }
499    }
500    if ((l << 2) == n) {
501        for (j = 0; j < l; j += 2) {
502            j1 = j + l;
503            j2 = j1 + l;
504            j3 = j2 + l;
505            x0r = a[j] + a[j1];
506            x0i = a[j + 1] + a[j1 + 1];
507            x1r = a[j] - a[j1];
508            x1i = a[j + 1] - a[j1 + 1];
509            x2r = a[j2] + a[j3];
510            x2i = a[j2 + 1] + a[j3 + 1];
511            x3r = a[j2] - a[j3];
512            x3i = a[j2 + 1] - a[j3 + 1];
513            a[j] = x0r + x2r;
514            a[j + 1] = x0i + x2i;
515            a[j2] = x0r - x2r;
516            a[j2 + 1] = x0i - x2i;
517            a[j1] = x1r - x3i;
518            a[j1 + 1] = x1i + x3r;
519            a[j3] = x1r + x3i;
520            a[j3 + 1] = x1i - x3r;
521        }
522    } else {
523        for (j = 0; j < l; j += 2) {
524            j1 = j + l;
525            x0r = a[j] - a[j1];
526            x0i = a[j + 1] - a[j1 + 1];
527            a[j] += a[j1];
528            a[j + 1] += a[j1 + 1];
529            a[j1] = x0r;
530            a[j1 + 1] = x0i;
531        }
532    }
533}
534
535
536
537void 	M_cftbsub(int n, double *a)
538{
539    int j, j1, j2, j3, l;
540    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
541
542    l = 2;
543    if (n > 8) {
544        M_cft1st(n, a);
545        l = 8;
546        while ((l << 2) < n) {
547            M_cftmdl(n, l, a);
548            l <<= 2;
549        }
550    }
551    if ((l << 2) == n) {
552        for (j = 0; j < l; j += 2) {
553            j1 = j + l;
554            j2 = j1 + l;
555            j3 = j2 + l;
556            x0r = a[j] + a[j1];
557            x0i = -a[j + 1] - a[j1 + 1];
558            x1r = a[j] - a[j1];
559            x1i = -a[j + 1] + a[j1 + 1];
560            x2r = a[j2] + a[j3];
561            x2i = a[j2 + 1] + a[j3 + 1];
562            x3r = a[j2] - a[j3];
563            x3i = a[j2 + 1] - a[j3 + 1];
564            a[j] = x0r + x2r;
565            a[j + 1] = x0i - x2i;
566            a[j2] = x0r - x2r;
567            a[j2 + 1] = x0i + x2i;
568            a[j1] = x1r - x3i;
569            a[j1 + 1] = x1i - x3r;
570            a[j3] = x1r + x3i;
571            a[j3 + 1] = x1i + x3r;
572        }
573    } else {
574        for (j = 0; j < l; j += 2) {
575            j1 = j + l;
576            x0r = a[j] - a[j1];
577            x0i = -a[j + 1] + a[j1 + 1];
578            a[j] += a[j1];
579            a[j + 1] = -a[j + 1] - a[j1 + 1];
580            a[j1] = x0r;
581            a[j1 + 1] = x0i;
582        }
583    }
584}
585
586
587
588void 	M_cft1st(int n, double *a)
589{
590    int j, kj, kr;
591    double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
592    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
593
594    x0r = a[0] + a[2];
595    x0i = a[1] + a[3];
596    x1r = a[0] - a[2];
597    x1i = a[1] - a[3];
598    x2r = a[4] + a[6];
599    x2i = a[5] + a[7];
600    x3r = a[4] - a[6];
601    x3i = a[5] - a[7];
602    a[0] = x0r + x2r;
603    a[1] = x0i + x2i;
604    a[4] = x0r - x2r;
605    a[5] = x0i - x2i;
606    a[2] = x1r - x3i;
607    a[3] = x1i + x3r;
608    a[6] = x1r + x3i;
609    a[7] = x1i - x3r;
610    wn4r = WR5000;
611    x0r = a[8] + a[10];
612    x0i = a[9] + a[11];
613    x1r = a[8] - a[10];
614    x1i = a[9] - a[11];
615    x2r = a[12] + a[14];
616    x2i = a[13] + a[15];
617    x3r = a[12] - a[14];
618    x3i = a[13] - a[15];
619    a[8] = x0r + x2r;
620    a[9] = x0i + x2i;
621    a[12] = x2i - x0i;
622    a[13] = x0r - x2r;
623    x0r = x1r - x3i;
624    x0i = x1i + x3r;
625    a[10] = wn4r * (x0r - x0i);
626    a[11] = wn4r * (x0r + x0i);
627    x0r = x3i + x1r;
628    x0i = x3r - x1i;
629    a[14] = wn4r * (x0i - x0r);
630    a[15] = wn4r * (x0i + x0r);
631    ew = MM_PI_2 / n;
632    kr = 0;
633    for (j = 16; j < n; j += 16) {
634        for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
635        wk1r = cos(ew * kr);
636        wk1i = sin(ew * kr);
637        wk2r = 1 - 2 * wk1i * wk1i;
638        wk2i = 2 * wk1i * wk1r;
639        wk3r = wk1r - 2 * wk2i * wk1i;
640        wk3i = 2 * wk2i * wk1r - wk1i;
641        x0r = a[j] + a[j + 2];
642        x0i = a[j + 1] + a[j + 3];
643        x1r = a[j] - a[j + 2];
644        x1i = a[j + 1] - a[j + 3];
645        x2r = a[j + 4] + a[j + 6];
646        x2i = a[j + 5] + a[j + 7];
647        x3r = a[j + 4] - a[j + 6];
648        x3i = a[j + 5] - a[j + 7];
649        a[j] = x0r + x2r;
650        a[j + 1] = x0i + x2i;
651        x0r -= x2r;
652        x0i -= x2i;
653        a[j + 4] = wk2r * x0r - wk2i * x0i;
654        a[j + 5] = wk2r * x0i + wk2i * x0r;
655        x0r = x1r - x3i;
656        x0i = x1i + x3r;
657        a[j + 2] = wk1r * x0r - wk1i * x0i;
658        a[j + 3] = wk1r * x0i + wk1i * x0r;
659        x0r = x1r + x3i;
660        x0i = x1i - x3r;
661        a[j + 6] = wk3r * x0r - wk3i * x0i;
662        a[j + 7] = wk3r * x0i + wk3i * x0r;
663        x0r = wn4r * (wk1r - wk1i);
664        wk1i = wn4r * (wk1r + wk1i);
665        wk1r = x0r;
666        wk3r = wk1r - 2 * wk2r * wk1i;
667        wk3i = 2 * wk2r * wk1r - wk1i;
668        x0r = a[j + 8] + a[j + 10];
669        x0i = a[j + 9] + a[j + 11];
670        x1r = a[j + 8] - a[j + 10];
671        x1i = a[j + 9] - a[j + 11];
672        x2r = a[j + 12] + a[j + 14];
673        x2i = a[j + 13] + a[j + 15];
674        x3r = a[j + 12] - a[j + 14];
675        x3i = a[j + 13] - a[j + 15];
676        a[j + 8] = x0r + x2r;
677        a[j + 9] = x0i + x2i;
678        x0r -= x2r;
679        x0i -= x2i;
680        a[j + 12] = -wk2i * x0r - wk2r * x0i;
681        a[j + 13] = -wk2i * x0i + wk2r * x0r;
682        x0r = x1r - x3i;
683        x0i = x1i + x3r;
684        a[j + 10] = wk1r * x0r - wk1i * x0i;
685        a[j + 11] = wk1r * x0i + wk1i * x0r;
686        x0r = x1r + x3i;
687        x0i = x1i - x3r;
688        a[j + 14] = wk3r * x0r - wk3i * x0i;
689        a[j + 15] = wk3r * x0i + wk3i * x0r;
690    }
691}
692
693
694
695void 	M_cftmdl(int n, int l, double *a)
696{
697    int j, j1, j2, j3, k, kj, kr, m, m2;
698    double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
699    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
700
701    m = l << 2;
702    for (j = 0; j < l; j += 2) {
703        j1 = j + l;
704        j2 = j1 + l;
705        j3 = j2 + l;
706        x0r = a[j] + a[j1];
707        x0i = a[j + 1] + a[j1 + 1];
708        x1r = a[j] - a[j1];
709        x1i = a[j + 1] - a[j1 + 1];
710        x2r = a[j2] + a[j3];
711        x2i = a[j2 + 1] + a[j3 + 1];
712        x3r = a[j2] - a[j3];
713        x3i = a[j2 + 1] - a[j3 + 1];
714        a[j] = x0r + x2r;
715        a[j + 1] = x0i + x2i;
716        a[j2] = x0r - x2r;
717        a[j2 + 1] = x0i - x2i;
718        a[j1] = x1r - x3i;
719        a[j1 + 1] = x1i + x3r;
720        a[j3] = x1r + x3i;
721        a[j3 + 1] = x1i - x3r;
722    }
723    wn4r = WR5000;
724    for (j = m; j < l + m; j += 2) {
725        j1 = j + l;
726        j2 = j1 + l;
727        j3 = j2 + l;
728        x0r = a[j] + a[j1];
729        x0i = a[j + 1] + a[j1 + 1];
730        x1r = a[j] - a[j1];
731        x1i = a[j + 1] - a[j1 + 1];
732        x2r = a[j2] + a[j3];
733        x2i = a[j2 + 1] + a[j3 + 1];
734        x3r = a[j2] - a[j3];
735        x3i = a[j2 + 1] - a[j3 + 1];
736        a[j] = x0r + x2r;
737        a[j + 1] = x0i + x2i;
738        a[j2] = x2i - x0i;
739        a[j2 + 1] = x0r - x2r;
740        x0r = x1r - x3i;
741        x0i = x1i + x3r;
742        a[j1] = wn4r * (x0r - x0i);
743        a[j1 + 1] = wn4r * (x0r + x0i);
744        x0r = x3i + x1r;
745        x0i = x3r - x1i;
746        a[j3] = wn4r * (x0i - x0r);
747        a[j3 + 1] = wn4r * (x0i + x0r);
748    }
749    ew = MM_PI_2 / n;
750    kr = 0;
751    m2 = 2 * m;
752    for (k = m2; k < n; k += m2) {
753        for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
754        wk1r = cos(ew * kr);
755        wk1i = sin(ew * kr);
756        wk2r = 1 - 2 * wk1i * wk1i;
757        wk2i = 2 * wk1i * wk1r;
758        wk3r = wk1r - 2 * wk2i * wk1i;
759        wk3i = 2 * wk2i * wk1r - wk1i;
760        for (j = k; j < l + k; j += 2) {
761            j1 = j + l;
762            j2 = j1 + l;
763            j3 = j2 + l;
764            x0r = a[j] + a[j1];
765            x0i = a[j + 1] + a[j1 + 1];
766            x1r = a[j] - a[j1];
767            x1i = a[j + 1] - a[j1 + 1];
768            x2r = a[j2] + a[j3];
769            x2i = a[j2 + 1] + a[j3 + 1];
770            x3r = a[j2] - a[j3];
771            x3i = a[j2 + 1] - a[j3 + 1];
772            a[j] = x0r + x2r;
773            a[j + 1] = x0i + x2i;
774            x0r -= x2r;
775            x0i -= x2i;
776            a[j2] = wk2r * x0r - wk2i * x0i;
777            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
778            x0r = x1r - x3i;
779            x0i = x1i + x3r;
780            a[j1] = wk1r * x0r - wk1i * x0i;
781            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
782            x0r = x1r + x3i;
783            x0i = x1i - x3r;
784            a[j3] = wk3r * x0r - wk3i * x0i;
785            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
786        }
787        x0r = wn4r * (wk1r - wk1i);
788        wk1i = wn4r * (wk1r + wk1i);
789        wk1r = x0r;
790        wk3r = wk1r - 2 * wk2r * wk1i;
791        wk3i = 2 * wk2r * wk1r - wk1i;
792        for (j = k + m; j < l + (k + m); j += 2) {
793            j1 = j + l;
794            j2 = j1 + l;
795            j3 = j2 + l;
796            x0r = a[j] + a[j1];
797            x0i = a[j + 1] + a[j1 + 1];
798            x1r = a[j] - a[j1];
799            x1i = a[j + 1] - a[j1 + 1];
800            x2r = a[j2] + a[j3];
801            x2i = a[j2 + 1] + a[j3 + 1];
802            x3r = a[j2] - a[j3];
803            x3i = a[j2 + 1] - a[j3 + 1];
804            a[j] = x0r + x2r;
805            a[j + 1] = x0i + x2i;
806            x0r -= x2r;
807            x0i -= x2i;
808            a[j2] = -wk2i * x0r - wk2r * x0i;
809            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
810            x0r = x1r - x3i;
811            x0i = x1i + x3r;
812            a[j1] = wk1r * x0r - wk1i * x0i;
813            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
814            x0r = x1r + x3i;
815            x0i = x1i - x3r;
816            a[j3] = wk3r * x0r - wk3i * x0i;
817            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
818        }
819    }
820}
821
822
823
824void 	M_rftfsub(int n, double *a)
825{
826    int i, i0, j, k;
827    double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
828
829    ec = 2 * MM_PI_2 / n;
830    wkr = 0;
831    wki = 0;
832    wdi = cos(ec);
833    wdr = sin(ec);
834    wdi *= wdr;
835    wdr *= wdr;
836    w1r = 1 - 2 * wdr;
837    w1i = 2 * wdi;
838    ss = 2 * w1i;
839    i = n >> 1;
840    while (1) {
841        i0 = i - 4 * RDFT_LOOP_DIV;
842        if (i0 < 4) {
843            i0 = 4;
844        }
845        for (j = i - 4; j >= i0; j -= 4) {
846            k = n - j;
847            xr = a[j + 2] - a[k - 2];
848            xi = a[j + 3] + a[k - 1];
849            yr = wdr * xr - wdi * xi;
850            yi = wdr * xi + wdi * xr;
851            a[j + 2] -= yr;
852            a[j + 3] -= yi;
853            a[k - 2] += yr;
854            a[k - 1] -= yi;
855            wkr += ss * wdi;
856            wki += ss * (0.5 - wdr);
857            xr = a[j] - a[k];
858            xi = a[j + 1] + a[k + 1];
859            yr = wkr * xr - wki * xi;
860            yi = wkr * xi + wki * xr;
861            a[j] -= yr;
862            a[j + 1] -= yi;
863            a[k] += yr;
864            a[k + 1] -= yi;
865            wdr += ss * wki;
866            wdi += ss * (0.5 - wkr);
867        }
868        if (i0 == 4) {
869            break;
870        }
871        wkr = 0.5 * sin(ec * i0);
872        wki = 0.5 * cos(ec * i0);
873        wdr = 0.5 - (wkr * w1r - wki * w1i);
874        wdi = wkr * w1i + wki * w1r;
875        wkr = 0.5 - wkr;
876        i = i0;
877    }
878    xr = a[2] - a[n - 2];
879    xi = a[3] + a[n - 1];
880    yr = wdr * xr - wdi * xi;
881    yi = wdr * xi + wdi * xr;
882    a[2] -= yr;
883    a[3] -= yi;
884    a[n - 2] += yr;
885    a[n - 1] -= yi;
886}
887
888
889
890void 	M_rftbsub(int n, double *a)
891{
892    int i, i0, j, k;
893    double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
894
895    ec = 2 * MM_PI_2 / n;
896    wkr = 0;
897    wki = 0;
898    wdi = cos(ec);
899    wdr = sin(ec);
900    wdi *= wdr;
901    wdr *= wdr;
902    w1r = 1 - 2 * wdr;
903    w1i = 2 * wdi;
904    ss = 2 * w1i;
905    i = n >> 1;
906    a[i + 1] = -a[i + 1];
907    while (1) {
908        i0 = i - 4 * RDFT_LOOP_DIV;
909        if (i0 < 4) {
910            i0 = 4;
911        }
912        for (j = i - 4; j >= i0; j -= 4) {
913            k = n - j;
914            xr = a[j + 2] - a[k - 2];
915            xi = a[j + 3] + a[k - 1];
916            yr = wdr * xr + wdi * xi;
917            yi = wdr * xi - wdi * xr;
918            a[j + 2] -= yr;
919            a[j + 3] = yi - a[j + 3];
920            a[k - 2] += yr;
921            a[k - 1] = yi - a[k - 1];
922            wkr += ss * wdi;
923            wki += ss * (0.5 - wdr);
924            xr = a[j] - a[k];
925            xi = a[j + 1] + a[k + 1];
926            yr = wkr * xr + wki * xi;
927            yi = wkr * xi - wki * xr;
928            a[j] -= yr;
929            a[j + 1] = yi - a[j + 1];
930            a[k] += yr;
931            a[k + 1] = yi - a[k + 1];
932            wdr += ss * wki;
933            wdi += ss * (0.5 - wkr);
934        }
935        if (i0 == 4) {
936            break;
937        }
938        wkr = 0.5 * sin(ec * i0);
939        wki = 0.5 * cos(ec * i0);
940        wdr = 0.5 - (wkr * w1r - wki * w1i);
941        wdi = wkr * w1i + wki * w1r;
942        wkr = 0.5 - wkr;
943        i = i0;
944    }
945    xr = a[2] - a[n - 2];
946    xi = a[3] + a[n - 1];
947    yr = wdr * xr + wdi * xi;
948    yi = wdr * xi - wdi * xr;
949    a[2] -= yr;
950    a[3] = yi - a[3];
951    a[n - 2] += yr;
952    a[n - 1] = yi - a[n - 1];
953    a[1] = -a[1];
954}
955
956