1/********************************************************************
2 *                                                                  *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7 *                                                                  *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
9 * by the Xiph.Org Foundation http://www.xiph.org/                  *
10 *                                                                  *
11 ********************************************************************
12
13 function: psychoacoustics not including preecho
14 last mod: $Id: psy.c 16227 2009-07-08 06:58:46Z xiphmont $
15
16 ********************************************************************/
17
18#include <stdlib.h>
19#include <math.h>
20#include <string.h>
21#include "vorbis/codec.h"
22#include "codec_internal.h"
23
24#include "masking.h"
25#include "psy.h"
26#include "os.h"
27#include "lpc.h"
28#include "smallft.h"
29#include "scales.h"
30#include "misc.h"
31
32#define NEGINF -9999.f
33static const double stereo_threshholds[]={0.0, .5, 1.0, 1.5, 2.5, 4.5, 8.5, 16.5, 9e10};
34static const double stereo_threshholds_limited[]={0.0, .5, 1.0, 1.5, 2.0, 2.5, 4.5, 8.5, 9e10};
35
36vorbis_look_psy_global *_vp_global_look(vorbis_info *vi){
37  codec_setup_info *ci=vi->codec_setup;
38  vorbis_info_psy_global *gi=&ci->psy_g_param;
39  vorbis_look_psy_global *look=_ogg_calloc(1,sizeof(*look));
40
41  look->channels=vi->channels;
42
43  look->ampmax=-9999.;
44  look->gi=gi;
45  return(look);
46}
47
48void _vp_global_free(vorbis_look_psy_global *look){
49  if(look){
50    memset(look,0,sizeof(*look));
51    _ogg_free(look);
52  }
53}
54
55void _vi_gpsy_free(vorbis_info_psy_global *i){
56  if(i){
57    memset(i,0,sizeof(*i));
58    _ogg_free(i);
59  }
60}
61
62void _vi_psy_free(vorbis_info_psy *i){
63  if(i){
64    memset(i,0,sizeof(*i));
65    _ogg_free(i);
66  }
67}
68
69static void min_curve(float *c,
70                       float *c2){
71  int i;
72  for(i=0;i<EHMER_MAX;i++)if(c2[i]<c[i])c[i]=c2[i];
73}
74static void max_curve(float *c,
75                       float *c2){
76  int i;
77  for(i=0;i<EHMER_MAX;i++)if(c2[i]>c[i])c[i]=c2[i];
78}
79
80static void attenuate_curve(float *c,float att){
81  int i;
82  for(i=0;i<EHMER_MAX;i++)
83    c[i]+=att;
84}
85
86static float ***setup_tone_curves(float curveatt_dB[P_BANDS],float binHz,int n,
87                                  float center_boost, float center_decay_rate){
88  int i,j,k,m;
89  float ath[EHMER_MAX];
90  float workc[P_BANDS][P_LEVELS][EHMER_MAX];
91  float athc[P_LEVELS][EHMER_MAX];
92  float *brute_buffer=alloca(n*sizeof(*brute_buffer));
93
94  float ***ret=_ogg_malloc(sizeof(*ret)*P_BANDS);
95
96  memset(workc,0,sizeof(workc));
97
98  for(i=0;i<P_BANDS;i++){
99    /* we add back in the ATH to avoid low level curves falling off to
100       -infinity and unnecessarily cutting off high level curves in the
101       curve limiting (last step). */
102
103    /* A half-band's settings must be valid over the whole band, and
104       it's better to mask too little than too much */
105    int ath_offset=i*4;
106    for(j=0;j<EHMER_MAX;j++){
107      float min=999.;
108      for(k=0;k<4;k++)
109        if(j+k+ath_offset<MAX_ATH){
110          if(min>ATH[j+k+ath_offset])min=ATH[j+k+ath_offset];
111        }else{
112          if(min>ATH[MAX_ATH-1])min=ATH[MAX_ATH-1];
113        }
114      ath[j]=min;
115    }
116
117    /* copy curves into working space, replicate the 50dB curve to 30
118       and 40, replicate the 100dB curve to 110 */
119    for(j=0;j<6;j++)
120      memcpy(workc[i][j+2],tonemasks[i][j],EHMER_MAX*sizeof(*tonemasks[i][j]));
121    memcpy(workc[i][0],tonemasks[i][0],EHMER_MAX*sizeof(*tonemasks[i][0]));
122    memcpy(workc[i][1],tonemasks[i][0],EHMER_MAX*sizeof(*tonemasks[i][0]));
123
124    /* apply centered curve boost/decay */
125    for(j=0;j<P_LEVELS;j++){
126      for(k=0;k<EHMER_MAX;k++){
127        float adj=center_boost+abs(EHMER_OFFSET-k)*center_decay_rate;
128        if(adj<0. && center_boost>0)adj=0.;
129        if(adj>0. && center_boost<0)adj=0.;
130        workc[i][j][k]+=adj;
131      }
132    }
133
134    /* normalize curves so the driving amplitude is 0dB */
135    /* make temp curves with the ATH overlayed */
136    for(j=0;j<P_LEVELS;j++){
137      attenuate_curve(workc[i][j],curveatt_dB[i]+100.-(j<2?2:j)*10.-P_LEVEL_0);
138      memcpy(athc[j],ath,EHMER_MAX*sizeof(**athc));
139      attenuate_curve(athc[j],+100.-j*10.f-P_LEVEL_0);
140      max_curve(athc[j],workc[i][j]);
141    }
142
143    /* Now limit the louder curves.
144
145       the idea is this: We don't know what the playback attenuation
146       will be; 0dB SL moves every time the user twiddles the volume
147       knob. So that means we have to use a single 'most pessimal' curve
148       for all masking amplitudes, right?  Wrong.  The *loudest* sound
149       can be in (we assume) a range of ...+100dB] SL.  However, sounds
150       20dB down will be in a range ...+80], 40dB down is from ...+60],
151       etc... */
152
153    for(j=1;j<P_LEVELS;j++){
154      min_curve(athc[j],athc[j-1]);
155      min_curve(workc[i][j],athc[j]);
156    }
157  }
158
159  for(i=0;i<P_BANDS;i++){
160    int hi_curve,lo_curve,bin;
161    ret[i]=_ogg_malloc(sizeof(**ret)*P_LEVELS);
162
163    /* low frequency curves are measured with greater resolution than
164       the MDCT/FFT will actually give us; we want the curve applied
165       to the tone data to be pessimistic and thus apply the minimum
166       masking possible for a given bin.  That means that a single bin
167       could span more than one octave and that the curve will be a
168       composite of multiple octaves.  It also may mean that a single
169       bin may span > an eighth of an octave and that the eighth
170       octave values may also be composited. */
171
172    /* which octave curves will we be compositing? */
173    bin=floor(fromOC(i*.5)/binHz);
174    lo_curve=  ceil(toOC(bin*binHz+1)*2);
175    hi_curve=  floor(toOC((bin+1)*binHz)*2);
176    if(lo_curve>i)lo_curve=i;
177    if(lo_curve<0)lo_curve=0;
178    if(hi_curve>=P_BANDS)hi_curve=P_BANDS-1;
179
180    for(m=0;m<P_LEVELS;m++){
181      ret[i][m]=_ogg_malloc(sizeof(***ret)*(EHMER_MAX+2));
182
183      for(j=0;j<n;j++)brute_buffer[j]=999.;
184
185      /* render the curve into bins, then pull values back into curve.
186         The point is that any inherent subsampling aliasing results in
187         a safe minimum */
188      for(k=lo_curve;k<=hi_curve;k++){
189        int l=0;
190
191        for(j=0;j<EHMER_MAX;j++){
192          int lo_bin= fromOC(j*.125+k*.5-2.0625)/binHz;
193          int hi_bin= fromOC(j*.125+k*.5-1.9375)/binHz+1;
194
195          if(lo_bin<0)lo_bin=0;
196          if(lo_bin>n)lo_bin=n;
197          if(lo_bin<l)l=lo_bin;
198          if(hi_bin<0)hi_bin=0;
199          if(hi_bin>n)hi_bin=n;
200
201          for(;l<hi_bin && l<n;l++)
202            if(brute_buffer[l]>workc[k][m][j])
203              brute_buffer[l]=workc[k][m][j];
204        }
205
206        for(;l<n;l++)
207          if(brute_buffer[l]>workc[k][m][EHMER_MAX-1])
208            brute_buffer[l]=workc[k][m][EHMER_MAX-1];
209
210      }
211
212      /* be equally paranoid about being valid up to next half ocatve */
213      if(i+1<P_BANDS){
214        int l=0;
215        k=i+1;
216        for(j=0;j<EHMER_MAX;j++){
217          int lo_bin= fromOC(j*.125+i*.5-2.0625)/binHz;
218          int hi_bin= fromOC(j*.125+i*.5-1.9375)/binHz+1;
219
220          if(lo_bin<0)lo_bin=0;
221          if(lo_bin>n)lo_bin=n;
222          if(lo_bin<l)l=lo_bin;
223          if(hi_bin<0)hi_bin=0;
224          if(hi_bin>n)hi_bin=n;
225
226          for(;l<hi_bin && l<n;l++)
227            if(brute_buffer[l]>workc[k][m][j])
228              brute_buffer[l]=workc[k][m][j];
229        }
230
231        for(;l<n;l++)
232          if(brute_buffer[l]>workc[k][m][EHMER_MAX-1])
233            brute_buffer[l]=workc[k][m][EHMER_MAX-1];
234
235      }
236
237
238      for(j=0;j<EHMER_MAX;j++){
239        int bin=fromOC(j*.125+i*.5-2.)/binHz;
240        if(bin<0){
241          ret[i][m][j+2]=-999.;
242        }else{
243          if(bin>=n){
244            ret[i][m][j+2]=-999.;
245          }else{
246            ret[i][m][j+2]=brute_buffer[bin];
247          }
248        }
249      }
250
251      /* add fenceposts */
252      for(j=0;j<EHMER_OFFSET;j++)
253        if(ret[i][m][j+2]>-200.f)break;
254      ret[i][m][0]=j;
255
256      for(j=EHMER_MAX-1;j>EHMER_OFFSET+1;j--)
257        if(ret[i][m][j+2]>-200.f)
258          break;
259      ret[i][m][1]=j;
260
261    }
262  }
263
264  return(ret);
265}
266
267void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,
268                  vorbis_info_psy_global *gi,int n,long rate){
269  long i,j,lo=-99,hi=1;
270  long maxoc;
271  memset(p,0,sizeof(*p));
272
273  p->eighth_octave_lines=gi->eighth_octave_lines;
274  p->shiftoc=rint(log(gi->eighth_octave_lines*8.f)/log(2.f))-1;
275
276  p->firstoc=toOC(.25f*rate*.5/n)*(1<<(p->shiftoc+1))-gi->eighth_octave_lines;
277  maxoc=toOC((n+.25f)*rate*.5/n)*(1<<(p->shiftoc+1))+.5f;
278  p->total_octave_lines=maxoc-p->firstoc+1;
279  p->ath=_ogg_malloc(n*sizeof(*p->ath));
280
281  p->octave=_ogg_malloc(n*sizeof(*p->octave));
282  p->bark=_ogg_malloc(n*sizeof(*p->bark));
283  p->vi=vi;
284  p->n=n;
285  p->rate=rate;
286
287  /* AoTuV HF weighting */
288  p->m_val = 1.;
289  if(rate < 26000) p->m_val = 0;
290  else if(rate < 38000) p->m_val = .94;   /* 32kHz */
291  else if(rate > 46000) p->m_val = 1.275; /* 48kHz */
292
293  /* set up the lookups for a given blocksize and sample rate */
294
295  for(i=0,j=0;i<MAX_ATH-1;i++){
296    int endpos=rint(fromOC((i+1)*.125-2.)*2*n/rate);
297    float base=ATH[i];
298    if(j<endpos){
299      float delta=(ATH[i+1]-base)/(endpos-j);
300      for(;j<endpos && j<n;j++){
301        p->ath[j]=base+100.;
302        base+=delta;
303      }
304    }
305  }
306
307  for(;j<n;j++){
308    p->ath[j]=p->ath[j-1];
309  }
310
311  for(i=0;i<n;i++){
312    float bark=toBARK(rate/(2*n)*i);
313
314    for(;lo+vi->noisewindowlomin<i &&
315          toBARK(rate/(2*n)*lo)<(bark-vi->noisewindowlo);lo++);
316
317    for(;hi<=n && (hi<i+vi->noisewindowhimin ||
318          toBARK(rate/(2*n)*hi)<(bark+vi->noisewindowhi));hi++);
319
320    p->bark[i]=((lo-1)<<16)+(hi-1);
321
322  }
323
324  for(i=0;i<n;i++)
325    p->octave[i]=toOC((i+.25f)*.5*rate/n)*(1<<(p->shiftoc+1))+.5f;
326
327  p->tonecurves=setup_tone_curves(vi->toneatt,rate*.5/n,n,
328                                  vi->tone_centerboost,vi->tone_decay);
329
330  /* set up rolling noise median */
331  p->noiseoffset=_ogg_malloc(P_NOISECURVES*sizeof(*p->noiseoffset));
332  for(i=0;i<P_NOISECURVES;i++)
333    p->noiseoffset[i]=_ogg_malloc(n*sizeof(**p->noiseoffset));
334
335  for(i=0;i<n;i++){
336    float halfoc=toOC((i+.5)*rate/(2.*n))*2.;
337    int inthalfoc;
338    float del;
339
340    if(halfoc<0)halfoc=0;
341    if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1;
342    inthalfoc=(int)halfoc;
343    del=halfoc-inthalfoc;
344
345    for(j=0;j<P_NOISECURVES;j++)
346      p->noiseoffset[j][i]=
347        p->vi->noiseoff[j][inthalfoc]*(1.-del) +
348        p->vi->noiseoff[j][inthalfoc+1]*del;
349
350  }
351#if 0
352  {
353    static int ls=0;
354    _analysis_output_always("noiseoff0",ls,p->noiseoffset[0],n,1,0,0);
355    _analysis_output_always("noiseoff1",ls,p->noiseoffset[1],n,1,0,0);
356    _analysis_output_always("noiseoff2",ls++,p->noiseoffset[2],n,1,0,0);
357  }
358#endif
359}
360
361void _vp_psy_clear(vorbis_look_psy *p){
362  int i,j;
363  if(p){
364    if(p->ath)_ogg_free(p->ath);
365    if(p->octave)_ogg_free(p->octave);
366    if(p->bark)_ogg_free(p->bark);
367    if(p->tonecurves){
368      for(i=0;i<P_BANDS;i++){
369        for(j=0;j<P_LEVELS;j++){
370          _ogg_free(p->tonecurves[i][j]);
371        }
372        _ogg_free(p->tonecurves[i]);
373      }
374      _ogg_free(p->tonecurves);
375    }
376    if(p->noiseoffset){
377      for(i=0;i<P_NOISECURVES;i++){
378        _ogg_free(p->noiseoffset[i]);
379      }
380      _ogg_free(p->noiseoffset);
381    }
382    memset(p,0,sizeof(*p));
383  }
384}
385
386/* octave/(8*eighth_octave_lines) x scale and dB y scale */
387static void seed_curve(float *seed,
388                       const float **curves,
389                       float amp,
390                       int oc, int n,
391                       int linesper,float dBoffset){
392  int i,post1;
393  int seedptr;
394  const float *posts,*curve;
395
396  int choice=(int)((amp+dBoffset-P_LEVEL_0)*.1f);
397  choice=max(choice,0);
398  choice=min(choice,P_LEVELS-1);
399  posts=curves[choice];
400  curve=posts+2;
401  post1=(int)posts[1];
402  seedptr=oc+(posts[0]-EHMER_OFFSET)*linesper-(linesper>>1);
403
404  for(i=posts[0];i<post1;i++){
405    if(seedptr>0){
406      float lin=amp+curve[i];
407      if(seed[seedptr]<lin)seed[seedptr]=lin;
408    }
409    seedptr+=linesper;
410    if(seedptr>=n)break;
411  }
412}
413
414static void seed_loop(vorbis_look_psy *p,
415                      const float ***curves,
416                      const float *f,
417                      const float *flr,
418                      float *seed,
419                      float specmax){
420  vorbis_info_psy *vi=p->vi;
421  long n=p->n,i;
422  float dBoffset=vi->max_curve_dB-specmax;
423
424  /* prime the working vector with peak values */
425
426  for(i=0;i<n;i++){
427    float max=f[i];
428    long oc=p->octave[i];
429    while(i+1<n && p->octave[i+1]==oc){
430      i++;
431      if(f[i]>max)max=f[i];
432    }
433
434    if(max+6.f>flr[i]){
435      oc=oc>>p->shiftoc;
436
437      if(oc>=P_BANDS)oc=P_BANDS-1;
438      if(oc<0)oc=0;
439
440      seed_curve(seed,
441                 curves[oc],
442                 max,
443                 p->octave[i]-p->firstoc,
444                 p->total_octave_lines,
445                 p->eighth_octave_lines,
446                 dBoffset);
447    }
448  }
449}
450
451static void seed_chase(float *seeds, int linesper, long n){
452  long  *posstack=alloca(n*sizeof(*posstack));
453  float *ampstack=alloca(n*sizeof(*ampstack));
454  long   stack=0;
455  long   pos=0;
456  long   i;
457
458  for(i=0;i<n;i++){
459    if(stack<2){
460      posstack[stack]=i;
461      ampstack[stack++]=seeds[i];
462    }else{
463      while(1){
464        if(seeds[i]<ampstack[stack-1]){
465          posstack[stack]=i;
466          ampstack[stack++]=seeds[i];
467          break;
468        }else{
469          if(i<posstack[stack-1]+linesper){
470            if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] &&
471               i<posstack[stack-2]+linesper){
472              /* we completely overlap, making stack-1 irrelevant.  pop it */
473              stack--;
474              continue;
475            }
476          }
477          posstack[stack]=i;
478          ampstack[stack++]=seeds[i];
479          break;
480
481        }
482      }
483    }
484  }
485
486  /* the stack now contains only the positions that are relevant. Scan
487     'em straight through */
488
489  for(i=0;i<stack;i++){
490    long endpos;
491    if(i<stack-1 && ampstack[i+1]>ampstack[i]){
492      endpos=posstack[i+1];
493    }else{
494      endpos=posstack[i]+linesper+1; /* +1 is important, else bin 0 is
495                                        discarded in short frames */
496    }
497    if(endpos>n)endpos=n;
498    for(;pos<endpos;pos++)
499      seeds[pos]=ampstack[i];
500  }
501
502  /* there.  Linear time.  I now remember this was on a problem set I
503     had in Grad Skool... I didn't solve it at the time ;-) */
504
505}
506
507/* bleaugh, this is more complicated than it needs to be */
508#include<stdio.h>
509static void max_seeds(vorbis_look_psy *p,
510                      float *seed,
511                      float *flr){
512  long   n=p->total_octave_lines;
513  int    linesper=p->eighth_octave_lines;
514  long   linpos=0;
515  long   pos;
516
517  seed_chase(seed,linesper,n); /* for masking */
518
519  pos=p->octave[0]-p->firstoc-(linesper>>1);
520
521  while(linpos+1<p->n){
522    float minV=seed[pos];
523    long end=((p->octave[linpos]+p->octave[linpos+1])>>1)-p->firstoc;
524    if(minV>p->vi->tone_abs_limit)minV=p->vi->tone_abs_limit;
525    while(pos+1<=end){
526      pos++;
527      if((seed[pos]>NEGINF && seed[pos]<minV) || minV==NEGINF)
528        minV=seed[pos];
529    }
530
531    end=pos+p->firstoc;
532    for(;linpos<p->n && p->octave[linpos]<=end;linpos++)
533      if(flr[linpos]<minV)flr[linpos]=minV;
534  }
535
536  {
537    float minV=seed[p->total_octave_lines-1];
538    for(;linpos<p->n;linpos++)
539      if(flr[linpos]<minV)flr[linpos]=minV;
540  }
541
542}
543
544static void bark_noise_hybridmp(int n,const long *b,
545                                const float *f,
546                                float *noise,
547                                const float offset,
548                                const int fixed){
549
550  float *N=alloca(n*sizeof(*N));
551  float *X=alloca(n*sizeof(*N));
552  float *XX=alloca(n*sizeof(*N));
553  float *Y=alloca(n*sizeof(*N));
554  float *XY=alloca(n*sizeof(*N));
555
556  float tN, tX, tXX, tY, tXY;
557  int i;
558
559  int lo, hi;
560  float R=0.f;
561  float A=0.f;
562  float B=0.f;
563  float D=1.f;
564  float w, x, y;
565
566  tN = tX = tXX = tY = tXY = 0.f;
567
568  y = f[0] + offset;
569  if (y < 1.f) y = 1.f;
570
571  w = y * y * .5;
572
573  tN += w;
574  tX += w;
575  tY += w * y;
576
577  N[0] = tN;
578  X[0] = tX;
579  XX[0] = tXX;
580  Y[0] = tY;
581  XY[0] = tXY;
582
583  for (i = 1, x = 1.f; i < n; i++, x += 1.f) {
584
585    y = f[i] + offset;
586    if (y < 1.f) y = 1.f;
587
588    w = y * y;
589
590    tN += w;
591    tX += w * x;
592    tXX += w * x * x;
593    tY += w * y;
594    tXY += w * x * y;
595
596    N[i] = tN;
597    X[i] = tX;
598    XX[i] = tXX;
599    Y[i] = tY;
600    XY[i] = tXY;
601  }
602
603  for (i = 0, x = 0.f;; i++, x += 1.f) {
604
605    lo = b[i] >> 16;
606    if( lo>=0 ) break;
607    hi = b[i] & 0xffff;
608
609    tN = N[hi] + N[-lo];
610    tX = X[hi] - X[-lo];
611    tXX = XX[hi] + XX[-lo];
612    tY = Y[hi] + Y[-lo];
613    tXY = XY[hi] - XY[-lo];
614
615    A = tY * tXX - tX * tXY;
616    B = tN * tXY - tX * tY;
617    D = tN * tXX - tX * tX;
618    R = (A + x * B) / D;
619    if (R < 0.f)
620      R = 0.f;
621
622    noise[i] = R - offset;
623  }
624
625  for ( ;; i++, x += 1.f) {
626
627    lo = b[i] >> 16;
628    hi = b[i] & 0xffff;
629    if(hi>=n)break;
630
631    tN = N[hi] - N[lo];
632    tX = X[hi] - X[lo];
633    tXX = XX[hi] - XX[lo];
634    tY = Y[hi] - Y[lo];
635    tXY = XY[hi] - XY[lo];
636
637    A = tY * tXX - tX * tXY;
638    B = tN * tXY - tX * tY;
639    D = tN * tXX - tX * tX;
640    R = (A + x * B) / D;
641    if (R < 0.f) R = 0.f;
642
643    noise[i] = R - offset;
644  }
645  for ( ; i < n; i++, x += 1.f) {
646
647    R = (A + x * B) / D;
648    if (R < 0.f) R = 0.f;
649
650    noise[i] = R - offset;
651  }
652
653  if (fixed <= 0) return;
654
655  for (i = 0, x = 0.f;; i++, x += 1.f) {
656    hi = i + fixed / 2;
657    lo = hi - fixed;
658    if(lo>=0)break;
659
660    tN = N[hi] + N[-lo];
661    tX = X[hi] - X[-lo];
662    tXX = XX[hi] + XX[-lo];
663    tY = Y[hi] + Y[-lo];
664    tXY = XY[hi] - XY[-lo];
665
666
667    A = tY * tXX - tX * tXY;
668    B = tN * tXY - tX * tY;
669    D = tN * tXX - tX * tX;
670    R = (A + x * B) / D;
671
672    if (R - offset < noise[i]) noise[i] = R - offset;
673  }
674  for ( ;; i++, x += 1.f) {
675
676    hi = i + fixed / 2;
677    lo = hi - fixed;
678    if(hi>=n)break;
679
680    tN = N[hi] - N[lo];
681    tX = X[hi] - X[lo];
682    tXX = XX[hi] - XX[lo];
683    tY = Y[hi] - Y[lo];
684    tXY = XY[hi] - XY[lo];
685
686    A = tY * tXX - tX * tXY;
687    B = tN * tXY - tX * tY;
688    D = tN * tXX - tX * tX;
689    R = (A + x * B) / D;
690
691    if (R - offset < noise[i]) noise[i] = R - offset;
692  }
693  for ( ; i < n; i++, x += 1.f) {
694    R = (A + x * B) / D;
695    if (R - offset < noise[i]) noise[i] = R - offset;
696  }
697}
698
699static const float FLOOR1_fromdB_INV_LOOKUP[256]={
700  0.F, 8.81683e+06F, 8.27882e+06F, 7.77365e+06F,
701  7.29930e+06F, 6.85389e+06F, 6.43567e+06F, 6.04296e+06F,
702  5.67422e+06F, 5.32798e+06F, 5.00286e+06F, 4.69759e+06F,
703  4.41094e+06F, 4.14178e+06F, 3.88905e+06F, 3.65174e+06F,
704  3.42891e+06F, 3.21968e+06F, 3.02321e+06F, 2.83873e+06F,
705  2.66551e+06F, 2.50286e+06F, 2.35014e+06F, 2.20673e+06F,
706  2.07208e+06F, 1.94564e+06F, 1.82692e+06F, 1.71544e+06F,
707  1.61076e+06F, 1.51247e+06F, 1.42018e+06F, 1.33352e+06F,
708  1.25215e+06F, 1.17574e+06F, 1.10400e+06F, 1.03663e+06F,
709  973377.F, 913981.F, 858210.F, 805842.F,
710  756669.F, 710497.F, 667142.F, 626433.F,
711  588208.F, 552316.F, 518613.F, 486967.F,
712  457252.F, 429351.F, 403152.F, 378551.F,
713  355452.F, 333762.F, 313396.F, 294273.F,
714  276316.F, 259455.F, 243623.F, 228757.F,
715  214798.F, 201691.F, 189384.F, 177828.F,
716  166977.F, 156788.F, 147221.F, 138237.F,
717  129802.F, 121881.F, 114444.F, 107461.F,
718  100903.F, 94746.3F, 88964.9F, 83536.2F,
719  78438.8F, 73652.5F, 69158.2F, 64938.1F,
720  60975.6F, 57254.9F, 53761.2F, 50480.6F,
721  47400.3F, 44507.9F, 41792.0F, 39241.9F,
722  36847.3F, 34598.9F, 32487.7F, 30505.3F,
723  28643.8F, 26896.0F, 25254.8F, 23713.7F,
724  22266.7F, 20908.0F, 19632.2F, 18434.2F,
725  17309.4F, 16253.1F, 15261.4F, 14330.1F,
726  13455.7F, 12634.6F, 11863.7F, 11139.7F,
727  10460.0F, 9821.72F, 9222.39F, 8659.64F,
728  8131.23F, 7635.06F, 7169.17F, 6731.70F,
729  6320.93F, 5935.23F, 5573.06F, 5232.99F,
730  4913.67F, 4613.84F, 4332.30F, 4067.94F,
731  3819.72F, 3586.64F, 3367.78F, 3162.28F,
732  2969.31F, 2788.13F, 2617.99F, 2458.24F,
733  2308.24F, 2167.39F, 2035.14F, 1910.95F,
734  1794.35F, 1684.85F, 1582.04F, 1485.51F,
735  1394.86F, 1309.75F, 1229.83F, 1154.78F,
736  1084.32F, 1018.15F, 956.024F, 897.687F,
737  842.910F, 791.475F, 743.179F, 697.830F,
738  655.249F, 615.265F, 577.722F, 542.469F,
739  509.367F, 478.286F, 449.101F, 421.696F,
740  395.964F, 371.803F, 349.115F, 327.812F,
741  307.809F, 289.026F, 271.390F, 254.830F,
742  239.280F, 224.679F, 210.969F, 198.096F,
743  186.008F, 174.658F, 164.000F, 153.993F,
744  144.596F, 135.773F, 127.488F, 119.708F,
745  112.404F, 105.545F, 99.1046F, 93.0572F,
746  87.3788F, 82.0469F, 77.0404F, 72.3394F,
747  67.9252F, 63.7804F, 59.8885F, 56.2341F,
748  52.8027F, 49.5807F, 46.5553F, 43.7144F,
749  41.0470F, 38.5423F, 36.1904F, 33.9821F,
750  31.9085F, 29.9614F, 28.1332F, 26.4165F,
751  24.8045F, 23.2910F, 21.8697F, 20.5352F,
752  19.2822F, 18.1056F, 17.0008F, 15.9634F,
753  14.9893F, 14.0746F, 13.2158F, 12.4094F,
754  11.6522F, 10.9411F, 10.2735F, 9.64662F,
755  9.05798F, 8.50526F, 7.98626F, 7.49894F,
756  7.04135F, 6.61169F, 6.20824F, 5.82941F,
757  5.47370F, 5.13970F, 4.82607F, 4.53158F,
758  4.25507F, 3.99542F, 3.75162F, 3.52269F,
759  3.30774F, 3.10590F, 2.91638F, 2.73842F,
760  2.57132F, 2.41442F, 2.26709F, 2.12875F,
761  1.99885F, 1.87688F, 1.76236F, 1.65482F,
762  1.55384F, 1.45902F, 1.36999F, 1.28640F,
763  1.20790F, 1.13419F, 1.06499F, 1.F
764};
765
766void _vp_remove_floor(vorbis_look_psy *p,
767                      float *mdct,
768                      int *codedflr,
769                      float *residue,
770                      int sliding_lowpass){
771
772  int i,n=p->n;
773
774  if(sliding_lowpass>n)sliding_lowpass=n;
775
776  for(i=0;i<sliding_lowpass;i++){
777    residue[i]=
778      mdct[i]*FLOOR1_fromdB_INV_LOOKUP[codedflr[i]];
779  }
780
781  for(;i<n;i++)
782    residue[i]=0.;
783}
784
785void _vp_noisemask(vorbis_look_psy *p,
786                   float *logmdct,
787                   float *logmask){
788
789  int i,n=p->n;
790  float *work=alloca(n*sizeof(*work));
791
792  bark_noise_hybridmp(n,p->bark,logmdct,logmask,
793                      140.,-1);
794
795  for(i=0;i<n;i++)work[i]=logmdct[i]-logmask[i];
796
797  bark_noise_hybridmp(n,p->bark,work,logmask,0.,
798                      p->vi->noisewindowfixed);
799
800  for(i=0;i<n;i++)work[i]=logmdct[i]-work[i];
801
802#if 0
803  {
804    static int seq=0;
805
806    float work2[n];
807    for(i=0;i<n;i++){
808      work2[i]=logmask[i]+work[i];
809    }
810
811    if(seq&1)
812      _analysis_output("median2R",seq/2,work,n,1,0,0);
813    else
814      _analysis_output("median2L",seq/2,work,n,1,0,0);
815
816    if(seq&1)
817      _analysis_output("envelope2R",seq/2,work2,n,1,0,0);
818    else
819      _analysis_output("envelope2L",seq/2,work2,n,1,0,0);
820    seq++;
821  }
822#endif
823
824  for(i=0;i<n;i++){
825    int dB=logmask[i]+.5;
826    if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
827    if(dB<0)dB=0;
828    logmask[i]= work[i]+p->vi->noisecompand[dB];
829  }
830
831}
832
833void _vp_tonemask(vorbis_look_psy *p,
834                  float *logfft,
835                  float *logmask,
836                  float global_specmax,
837                  float local_specmax){
838
839  int i,n=p->n;
840
841  float *seed=alloca(sizeof(*seed)*p->total_octave_lines);
842  float att=local_specmax+p->vi->ath_adjatt;
843  for(i=0;i<p->total_octave_lines;i++)seed[i]=NEGINF;
844
845  /* set the ATH (floating below localmax, not global max by a
846     specified att) */
847  if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
848
849  for(i=0;i<n;i++)
850    logmask[i]=p->ath[i]+att;
851
852  /* tone masking */
853  seed_loop(p,(const float ***)p->tonecurves,logfft,logmask,seed,global_specmax);
854  max_seeds(p,seed,logmask);
855
856}
857
858void _vp_offset_and_mix(vorbis_look_psy *p,
859                        float *noise,
860                        float *tone,
861                        int offset_select,
862                        float *logmask,
863                        float *mdct,
864                        float *logmdct){
865  int i,n=p->n;
866  float de, coeffi, cx;/* AoTuV */
867  float toneatt=p->vi->tone_masteratt[offset_select];
868
869  cx = p->m_val;
870
871  for(i=0;i<n;i++){
872    float val= noise[i]+p->noiseoffset[offset_select][i];
873    if(val>p->vi->noisemaxsupp)val=p->vi->noisemaxsupp;
874    logmask[i]=max(val,tone[i]+toneatt);
875
876
877    /* AoTuV */
878    /** @ M1 **
879        The following codes improve a noise problem.
880        A fundamental idea uses the value of masking and carries out
881        the relative compensation of the MDCT.
882        However, this code is not perfect and all noise problems cannot be solved.
883        by Aoyumi @ 2004/04/18
884    */
885
886    if(offset_select == 1) {
887      coeffi = -17.2;       /* coeffi is a -17.2dB threshold */
888      val = val - logmdct[i];  /* val == mdct line value relative to floor in dB */
889
890      if(val > coeffi){
891        /* mdct value is > -17.2 dB below floor */
892
893        de = 1.0-((val-coeffi)*0.005*cx);
894        /* pro-rated attenuation:
895           -0.00 dB boost if mdct value is -17.2dB (relative to floor)
896           -0.77 dB boost if mdct value is 0dB (relative to floor)
897           -1.64 dB boost if mdct value is +17.2dB (relative to floor)
898           etc... */
899
900        if(de < 0) de = 0.0001;
901      }else
902        /* mdct value is <= -17.2 dB below floor */
903
904        de = 1.0-((val-coeffi)*0.0003*cx);
905      /* pro-rated attenuation:
906         +0.00 dB atten if mdct value is -17.2dB (relative to floor)
907         +0.45 dB atten if mdct value is -34.4dB (relative to floor)
908         etc... */
909
910      mdct[i] *= de;
911
912    }
913  }
914}
915
916float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd){
917  vorbis_info *vi=vd->vi;
918  codec_setup_info *ci=vi->codec_setup;
919  vorbis_info_psy_global *gi=&ci->psy_g_param;
920
921  int n=ci->blocksizes[vd->W]/2;
922  float secs=(float)n/vi->rate;
923
924  amp+=secs*gi->ampmax_att_per_sec;
925  if(amp<-9999)amp=-9999;
926  return(amp);
927}
928
929static void couple_lossless(float A, float B,
930                            float *qA, float *qB){
931  int test1=fabs(*qA)>fabs(*qB);
932  test1-= fabs(*qA)<fabs(*qB);
933
934  if(!test1)test1=((fabs(A)>fabs(B))<<1)-1;
935  if(test1==1){
936    *qB=(*qA>0.f?*qA-*qB:*qB-*qA);
937  }else{
938    float temp=*qB;
939    *qB=(*qB>0.f?*qA-*qB:*qB-*qA);
940    *qA=temp;
941  }
942
943  if(*qB>fabs(*qA)*1.9999f){
944    *qB= -fabs(*qA)*2.f;
945    *qA= -*qA;
946  }
947}
948
949static const float hypot_lookup[32]={
950  -0.009935, -0.011245, -0.012726, -0.014397,
951  -0.016282, -0.018407, -0.020800, -0.023494,
952  -0.026522, -0.029923, -0.033737, -0.038010,
953  -0.042787, -0.048121, -0.054064, -0.060671,
954  -0.068000, -0.076109, -0.085054, -0.094892,
955  -0.105675, -0.117451, -0.130260, -0.144134,
956  -0.159093, -0.175146, -0.192286, -0.210490,
957  -0.229718, -0.249913, -0.271001, -0.292893};
958
959static void precomputed_couple_point(float premag,
960                                     int floorA,int floorB,
961                                     float *mag, float *ang){
962
963  int test=(floorA>floorB)-1;
964  int offset=31-abs(floorA-floorB);
965  float floormag=hypot_lookup[((offset<0)-1)&offset]+1.f;
966
967  floormag*=FLOOR1_fromdB_INV_LOOKUP[(floorB&test)|(floorA&(~test))];
968
969  *mag=premag*floormag;
970  *ang=0.f;
971}
972
973/* just like below, this is currently set up to only do
974   single-step-depth coupling.  Otherwise, we'd have to do more
975   copying (which will be inevitable later) */
976
977/* doing the real circular magnitude calculation is audibly superior
978   to (A+B)/sqrt(2) */
979static float dipole_hypot(float a, float b){
980  if(a>0.){
981    if(b>0.)return sqrt(a*a+b*b);
982    if(a>-b)return sqrt(a*a-b*b);
983    return -sqrt(b*b-a*a);
984  }
985  if(b<0.)return -sqrt(a*a+b*b);
986  if(-a>b)return -sqrt(a*a-b*b);
987  return sqrt(b*b-a*a);
988}
989static float round_hypot(float a, float b){
990  if(a>0.){
991    if(b>0.)return sqrt(a*a+b*b);
992    if(a>-b)return sqrt(a*a+b*b);
993    return -sqrt(b*b+a*a);
994  }
995  if(b<0.)return -sqrt(a*a+b*b);
996  if(-a>b)return -sqrt(a*a+b*b);
997  return sqrt(b*b+a*a);
998}
999
1000/* revert to round hypot for now */
1001float **_vp_quantize_couple_memo(vorbis_block *vb,
1002                                 vorbis_info_psy_global *g,
1003                                 vorbis_look_psy *p,
1004                                 vorbis_info_mapping0 *vi,
1005                                 float **mdct){
1006
1007  int i,j,n=p->n;
1008  float **ret=_vorbis_block_alloc(vb,vi->coupling_steps*sizeof(*ret));
1009  int limit=g->coupling_pointlimit[p->vi->blockflag][PACKETBLOBS/2];
1010
1011  for(i=0;i<vi->coupling_steps;i++){
1012    float *mdctM=mdct[vi->coupling_mag[i]];
1013    float *mdctA=mdct[vi->coupling_ang[i]];
1014    ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
1015    for(j=0;j<limit;j++)
1016      ret[i][j]=dipole_hypot(mdctM[j],mdctA[j]);
1017    for(;j<n;j++)
1018      ret[i][j]=round_hypot(mdctM[j],mdctA[j]);
1019  }
1020
1021  return(ret);
1022}
1023
1024/* this is for per-channel noise normalization */
1025static int apsort(const void *a, const void *b){
1026  float f1=fabs(**(float**)a);
1027  float f2=fabs(**(float**)b);
1028  return (f1<f2)-(f1>f2);
1029}
1030
1031int **_vp_quantize_couple_sort(vorbis_block *vb,
1032                               vorbis_look_psy *p,
1033                               vorbis_info_mapping0 *vi,
1034                               float **mags){
1035
1036
1037  if(p->vi->normal_point_p){
1038    int i,j,k,n=p->n;
1039    int **ret=_vorbis_block_alloc(vb,vi->coupling_steps*sizeof(*ret));
1040    int partition=p->vi->normal_partition;
1041    float **work=alloca(sizeof(*work)*partition);
1042
1043    for(i=0;i<vi->coupling_steps;i++){
1044      ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
1045
1046      for(j=0;j<n;j+=partition){
1047        for(k=0;k<partition;k++)work[k]=mags[i]+k+j;
1048        qsort(work,partition,sizeof(*work),apsort);
1049        for(k=0;k<partition;k++)ret[i][k+j]=work[k]-mags[i];
1050      }
1051    }
1052    return(ret);
1053  }
1054  return(NULL);
1055}
1056
1057void _vp_noise_normalize_sort(vorbis_look_psy *p,
1058                              float *magnitudes,int *sortedindex){
1059  int i,j,n=p->n;
1060  vorbis_info_psy *vi=p->vi;
1061  int partition=vi->normal_partition;
1062  float **work=alloca(sizeof(*work)*partition);
1063  int start=vi->normal_start;
1064
1065  for(j=start;j<n;j+=partition){
1066    if(j+partition>n)partition=n-j;
1067    for(i=0;i<partition;i++)work[i]=magnitudes+i+j;
1068    qsort(work,partition,sizeof(*work),apsort);
1069    for(i=0;i<partition;i++){
1070      sortedindex[i+j-start]=work[i]-magnitudes;
1071    }
1072  }
1073}
1074
1075void _vp_noise_normalize(vorbis_look_psy *p,
1076                         float *in,float *out,int *sortedindex){
1077  int flag=0,i,j=0,n=p->n;
1078  vorbis_info_psy *vi=p->vi;
1079  int partition=vi->normal_partition;
1080  int start=vi->normal_start;
1081
1082  if(start>n)start=n;
1083
1084  if(vi->normal_channel_p){
1085    for(;j<start;j++)
1086      out[j]=rint(in[j]);
1087
1088    for(;j+partition<=n;j+=partition){
1089      float acc=0.;
1090      int k;
1091
1092      for(i=j;i<j+partition;i++)
1093        acc+=in[i]*in[i];
1094
1095      for(i=0;i<partition;i++){
1096        k=sortedindex[i+j-start];
1097
1098        if(in[k]*in[k]>=.25f){
1099          out[k]=rint(in[k]);
1100          acc-=in[k]*in[k];
1101          flag=1;
1102        }else{
1103          if(acc<vi->normal_thresh)break;
1104          out[k]=unitnorm(in[k]);
1105          acc-=1.;
1106        }
1107      }
1108
1109      for(;i<partition;i++){
1110        k=sortedindex[i+j-start];
1111        out[k]=0.;
1112      }
1113    }
1114  }
1115
1116  for(;j<n;j++)
1117    out[j]=rint(in[j]);
1118
1119}
1120
1121void _vp_couple(int blobno,
1122                vorbis_info_psy_global *g,
1123                vorbis_look_psy *p,
1124                vorbis_info_mapping0 *vi,
1125                float **res,
1126                float **mag_memo,
1127                int   **mag_sort,
1128                int   **ifloor,
1129                int   *nonzero,
1130                int  sliding_lowpass){
1131
1132  int i,j,k,n=p->n;
1133
1134  /* perform any requested channel coupling */
1135  /* point stereo can only be used in a first stage (in this encoder)
1136     because of the dependency on floor lookups */
1137  for(i=0;i<vi->coupling_steps;i++){
1138
1139    /* once we're doing multistage coupling in which a channel goes
1140       through more than one coupling step, the floor vector
1141       magnitudes will also have to be recalculated an propogated
1142       along with PCM.  Right now, we're not (that will wait until 5.1
1143       most likely), so the code isn't here yet. The memory management
1144       here is all assuming single depth couplings anyway. */
1145
1146    /* make sure coupling a zero and a nonzero channel results in two
1147       nonzero channels. */
1148    if(nonzero[vi->coupling_mag[i]] ||
1149       nonzero[vi->coupling_ang[i]]){
1150
1151
1152      float *rM=res[vi->coupling_mag[i]];
1153      float *rA=res[vi->coupling_ang[i]];
1154      float *qM=rM+n;
1155      float *qA=rA+n;
1156      int *floorM=ifloor[vi->coupling_mag[i]];
1157      int *floorA=ifloor[vi->coupling_ang[i]];
1158      float prepoint=stereo_threshholds[g->coupling_prepointamp[blobno]];
1159      float postpoint=stereo_threshholds[g->coupling_postpointamp[blobno]];
1160      int partition=(p->vi->normal_point_p?p->vi->normal_partition:p->n);
1161      int limit=g->coupling_pointlimit[p->vi->blockflag][blobno];
1162      int pointlimit=limit;
1163
1164      nonzero[vi->coupling_mag[i]]=1;
1165      nonzero[vi->coupling_ang[i]]=1;
1166
1167       /* The threshold of a stereo is changed with the size of n */
1168       if(n > 1000)
1169         postpoint=stereo_threshholds_limited[g->coupling_postpointamp[blobno]];
1170
1171      for(j=0;j<p->n;j+=partition){
1172        float acc=0.f;
1173
1174        for(k=0;k<partition;k++){
1175          int l=k+j;
1176
1177          if(l<sliding_lowpass){
1178            if((l>=limit && fabs(rM[l])<postpoint && fabs(rA[l])<postpoint) ||
1179               (fabs(rM[l])<prepoint && fabs(rA[l])<prepoint)){
1180
1181
1182              precomputed_couple_point(mag_memo[i][l],
1183                                       floorM[l],floorA[l],
1184                                       qM+l,qA+l);
1185
1186              if(rint(qM[l])==0.f)acc+=qM[l]*qM[l];
1187            }else{
1188              couple_lossless(rM[l],rA[l],qM+l,qA+l);
1189            }
1190          }else{
1191            qM[l]=0.;
1192            qA[l]=0.;
1193          }
1194        }
1195
1196        if(p->vi->normal_point_p){
1197          for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
1198            int l=mag_sort[i][j+k];
1199            if(l<sliding_lowpass && l>=pointlimit && rint(qM[l])==0.f){
1200              qM[l]=unitnorm(qM[l]);
1201              acc-=1.f;
1202            }
1203          }
1204        }
1205      }
1206    }
1207  }
1208}
1209
1210/* AoTuV */
1211/** @ M2 **
1212   The boost problem by the combination of noise normalization and point stereo is eased.
1213   However, this is a temporary patch.
1214   by Aoyumi @ 2004/04/18
1215*/
1216
1217void hf_reduction(vorbis_info_psy_global *g,
1218                      vorbis_look_psy *p,
1219                      vorbis_info_mapping0 *vi,
1220                      float **mdct){
1221
1222  int i,j,n=p->n, de=0.3*p->m_val;
1223  int limit=g->coupling_pointlimit[p->vi->blockflag][PACKETBLOBS/2];
1224
1225  for(i=0; i<vi->coupling_steps; i++){
1226    /* for(j=start; j<limit; j++){} // ???*/
1227    for(j=limit; j<n; j++)
1228      mdct[i][j] *= (1.0 - de*((float)(j-limit) / (float)(n-limit)));
1229  }
1230}
1231