1/*
2 * principal component analysis (PCA)
3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file libavutil/pca.c
24 * principal component analysis (PCA)
25 */
26
27#include "common.h"
28#include "pca.h"
29
30typedef struct PCA{
31    int count;
32    int n;
33    double *covariance;
34    double *mean;
35}PCA;
36
37PCA *ff_pca_init(int n){
38    PCA *pca;
39    if(n<=0)
40        return NULL;
41
42    pca= av_mallocz(sizeof(PCA));
43    pca->n= n;
44    pca->count=0;
45    pca->covariance= av_mallocz(sizeof(double)*n*n);
46    pca->mean= av_mallocz(sizeof(double)*n);
47
48    return pca;
49}
50
51void ff_pca_free(PCA *pca){
52    av_freep(&pca->covariance);
53    av_freep(&pca->mean);
54    av_free(pca);
55}
56
57void ff_pca_add(PCA *pca, double *v){
58    int i, j;
59    const int n= pca->n;
60
61    for(i=0; i<n; i++){
62        pca->mean[i] += v[i];
63        for(j=i; j<n; j++)
64            pca->covariance[j + i*n] += v[i]*v[j];
65    }
66    pca->count++;
67}
68
69int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
70    int i, j, pass;
71    int k=0;
72    const int n= pca->n;
73    double z[n];
74
75    memset(eigenvector, 0, sizeof(double)*n*n);
76
77    for(j=0; j<n; j++){
78        pca->mean[j] /= pca->count;
79        eigenvector[j + j*n] = 1.0;
80        for(i=0; i<=j; i++){
81            pca->covariance[j + i*n] /= pca->count;
82            pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j];
83            pca->covariance[i + j*n] = pca->covariance[j + i*n];
84        }
85        eigenvalue[j]= pca->covariance[j + j*n];
86        z[j]= 0;
87    }
88
89    for(pass=0; pass < 50; pass++){
90        double sum=0;
91
92        for(i=0; i<n; i++)
93            for(j=i+1; j<n; j++)
94                sum += fabs(pca->covariance[j + i*n]);
95
96        if(sum == 0){
97            for(i=0; i<n; i++){
98                double maxvalue= -1;
99                for(j=i; j<n; j++){
100                    if(eigenvalue[j] > maxvalue){
101                        maxvalue= eigenvalue[j];
102                        k= j;
103                    }
104                }
105                eigenvalue[k]= eigenvalue[i];
106                eigenvalue[i]= maxvalue;
107                for(j=0; j<n; j++){
108                    double tmp= eigenvector[k + j*n];
109                    eigenvector[k + j*n]= eigenvector[i + j*n];
110                    eigenvector[i + j*n]= tmp;
111                }
112            }
113            return pass;
114        }
115
116        for(i=0; i<n; i++){
117            for(j=i+1; j<n; j++){
118                double covar= pca->covariance[j + i*n];
119                double t,c,s,tau,theta, h;
120
121                if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3
122                    continue;
123                if(fabs(covar) == 0.0) //FIXME should not be needed
124                    continue;
125                if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){
126                    pca->covariance[j + i*n]=0.0;
127                    continue;
128                }
129
130                h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]);
131                theta=0.5*h/covar;
132                t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
133                if(theta < 0.0) t = -t;
134
135                c=1.0/sqrt(1+t*t);
136                s=t*c;
137                tau=s/(1.0+c);
138                z[i] -= t*covar;
139                z[j] += t*covar;
140
141#define ROTATE(a,i,j,k,l) {\
142    double g=a[j + i*n];\
143    double h=a[l + k*n];\
144    a[j + i*n]=g-s*(h+g*tau);\
145    a[l + k*n]=h+s*(g-h*tau); }
146                for(k=0; k<n; k++) {
147                    if(k!=i && k!=j){
148                        ROTATE(pca->covariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j))
149                    }
150                    ROTATE(eigenvector,k,i,k,j)
151                }
152                pca->covariance[j + i*n]=0.0;
153            }
154        }
155        for (i=0; i<n; i++) {
156            eigenvalue[i] += z[i];
157            z[i]=0.0;
158        }
159    }
160
161    return -1;
162}
163
164#ifdef TEST
165
166#undef printf
167#undef random
168#include <stdio.h>
169#include <stdlib.h>
170
171int main(void){
172    PCA *pca;
173    int i, j, k;
174#define LEN 8
175    double eigenvector[LEN*LEN];
176    double eigenvalue[LEN];
177
178    pca= ff_pca_init(LEN);
179
180    for(i=0; i<9000000; i++){
181        double v[2*LEN+100];
182        double sum=0;
183        int pos= random()%LEN;
184        int v2= (random()%101) - 50;
185        v[0]= (random()%101) - 50;
186        for(j=1; j<8; j++){
187            if(j<=pos) v[j]= v[0];
188            else       v[j]= v2;
189            sum += v[j];
190        }
191/*        for(j=0; j<LEN; j++){
192            v[j] -= v[pos];
193        }*/
194//        sum += random()%10;
195/*        for(j=0; j<LEN; j++){
196            v[j] -= sum/LEN;
197        }*/
198//        lbt1(v+100,v+100,LEN);
199        ff_pca_add(pca, v);
200    }
201
202
203    ff_pca(pca, eigenvector, eigenvalue);
204    for(i=0; i<LEN; i++){
205        pca->count= 1;
206        pca->mean[i]= 0;
207
208//        (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x|
209
210
211//        pca.covariance[i + i*LEN]= pow(0.5, fabs
212        for(j=i; j<LEN; j++){
213            printf("%f ", pca->covariance[i + j*LEN]);
214        }
215        printf("\n");
216    }
217
218#if 1
219    for(i=0; i<LEN; i++){
220        double v[LEN];
221        double error=0;
222        memset(v, 0, sizeof(v));
223        for(j=0; j<LEN; j++){
224            for(k=0; k<LEN; k++){
225                v[j] += pca->covariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN];
226            }
227            v[j] /= eigenvalue[i];
228            error += fabs(v[j] - eigenvector[i + j*LEN]);
229        }
230        printf("%f ", error);
231    }
232    printf("\n");
233#endif
234    for(i=0; i<LEN; i++){
235        for(j=0; j<LEN; j++){
236            printf("%9.6f ", eigenvector[i + j*LEN]);
237        }
238        printf("  %9.1f %f\n", eigenvalue[i], eigenvalue[i]/eigenvalue[0]);
239    }
240
241    return 0;
242}
243#endif
244