1/*
2 * This software has been licensed to the Centre of Speech Technology, KTH
3 * by Microsoft Corp. with the terms in the accompanying file BSD.txt,
4 * which is a BSD style license.
5 *
6 *    "Copyright (c) 1990-1996 Entropic Research Laboratory, Inc.
7 *                   All rights reserved"
8 *
9 * Written by:  David Talkin
10 * Checked by:
11 * Revised by:  Derek Lin, David Talkin
12 *
13 * Brief description:
14 *     A collection of pretty generic signal-processing routines.
15 *
16 *
17 */
18
19#include <math.h>
20#include <stdlib.h>
21#include <stdio.h>
22#ifndef TRUE
23# define TRUE 1
24# define FALSE 0
25#endif
26#include "jkGetF0.h"
27#include "snack.h"
28
29/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
30/* Return a time-weighting window of type type and length n in dout.
31 * Dout is assumed to be at least n elements long.  Type is decoded in
32 * the switch statement below.
33 */
34int xget_window(dout, n, type)
35     register float *dout;
36     register int n, type;
37{
38  static float *din = NULL;
39  static int n0 = 0;
40  float preemp = 0.0;
41
42  if(n > n0) {
43    register float *p;
44    register int i;
45
46    if(din) ckfree((void *)din);
47    din = NULL;
48    if(!(din = (float*)ckalloc(sizeof(float)*n))) {
49      Fprintf(stderr,"Allocation problems in xget_window()\n");
50      return(FALSE);
51    }
52    for(i=0, p=din; i++ < n; ) *p++ = 1;
53    n0 = n;
54  }
55  return(window(din, dout, n, preemp, type));
56}
57
58/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
59/* Apply a rectangular window (i.e. none).  Optionally, preemphasize. */
60void xrwindow(din, dout, n, preemp)
61     register float *din;
62     register float *dout, preemp;
63     register int n;
64{
65  register float *p;
66
67/* If preemphasis is to be performed,  this assumes that there are n+1 valid
68   samples in the input buffer (din). */
69  if(preemp != 0.0) {
70    for( p=din+1; n-- > 0; )
71      *dout++ = (float)((*p++) - (preemp * *din++));
72  } else {
73    for( ; n-- > 0; )
74      *dout++ =  *din++;
75  }
76}
77
78/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
79/* Generate a cos^4 window, if one does not already exist. */
80void xcwindow(din, dout, n, preemp)
81     register float *din;
82     register float *dout, preemp;
83     register int n;
84{
85  register int i;
86  register float *p;
87  static int wsize = 0;
88  static float *wind=NULL;
89  register float *q, co;
90
91  if(wsize != n) {		/* Need to create a new cos**4 window? */
92    register double arg, half=0.5;
93
94    if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float));
95    else wind = (float*)ckalloc(n*sizeof(float));
96    wsize = n;
97    for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; ) {
98      co = (float) (half*(1.0 - cos((half + (double)i++) * arg)));
99      *q++ = co * co * co * co;
100    }
101  }
102/* If preemphasis is to be performed,  this assumes that there are n+1 valid
103   samples in the input buffer (din). */
104  if(preemp != 0.0) {
105    for(i=n, p=din+1, q=wind; i--; )
106      *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++)));
107  } else {
108    for(i=n, q=wind; i--; )
109      *dout++ = *q++ * *din++;
110  }
111}
112
113/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
114/* Generate a Hamming window, if one does not already exist. */
115void xhwindow(din, dout, n, preemp)
116     register float *din;
117     register float *dout, preemp;
118     register int n;
119{
120  register int i;
121  register float *p;
122  static int wsize = 0;
123  static float *wind=NULL;
124  register float *q;
125
126  if(wsize != n) {		/* Need to create a new Hamming window? */
127    register double arg, half=0.5;
128
129    if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float));
130    else wind = (float*)ckalloc(n*sizeof(float));
131    wsize = n;
132    for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; )
133      *q++ = (float) (.54 - .46 * cos((half + (double)i++) * arg));
134  }
135/* If preemphasis is to be performed,  this assumes that there are n+1 valid
136   samples in the input buffer (din). */
137  if(preemp != 0.0) {
138    for(i=n, p=din+1, q=wind; i--; )
139      *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++)));
140  } else {
141    for(i=n, q=wind; i--; )
142      *dout++ = *q++ * *din++;
143  }
144}
145
146/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
147/* Generate a Hanning window, if one does not already exist. */
148void xhnwindow(din, dout, n, preemp)
149     register float *din;
150     register float *dout, preemp;
151     register int n;
152{
153  register int i;
154  register float *p;
155  static int wsize = 0;
156  static float *wind=NULL;
157  register float *q;
158
159  if(wsize != n) {		/* Need to create a new Hanning window? */
160    register double arg, half=0.5;
161
162    if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float));
163    else wind = (float*)ckalloc(n*sizeof(float));
164    wsize = n;
165    for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; )
166      *q++ = (float) (half - half * cos((half + (double)i++) * arg));
167  }
168/* If preemphasis is to be performed,  this assumes that there are n+1 valid
169   samples in the input buffer (din). */
170  if(preemp != 0.0) {
171    for(i=n, p=din+1, q=wind; i--; )
172      *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++)));
173  } else {
174    for(i=n, q=wind; i--; )
175      *dout++ = *q++ * *din++;
176  }
177}
178
179/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
180/* Apply a window of type type to the short PCM sequence of length n
181 * in din.  Return the floating-point result sequence in dout.  If preemp
182 * is non-zero, apply preemphasis to tha data as it is windowed.
183 */
184int window(din, dout, n, preemp, type)
185     register float *din;
186     register float *dout, preemp;
187     register int n;
188     int type;
189{
190  switch(type) {
191  case 0:			/* rectangular */
192    xrwindow(din, dout, n, preemp);
193    break;
194  case 1:			/* Hamming */
195    xhwindow(din, dout, n, preemp);
196    break;
197  case 2:			/* cos^4 */
198    xcwindow(din, dout, n, preemp);
199    break;
200  case 3:			/* Hanning */
201    xhnwindow(din, dout, n, preemp);
202    break;
203  default:
204    Fprintf(stderr,"Unknown window type (%d) requested in window()\n",type);
205    return(FALSE);
206  }
207  return(TRUE);
208}
209
210/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
211/* Compute the pp+1 autocorrelation lags of the windowsize samples in s.
212 * Return the normalized autocorrelation coefficients in r.
213 * The rms is returned in e.
214 */
215void xautoc( windowsize, s, p, r, e )
216     register int p, windowsize;
217     register float *s, *e;
218     register float *r;
219{
220  register int i, j;
221  register float *q, *t, sum, sum0;
222
223  for( i=windowsize, q=s, sum0=0.0; i--;) {
224    sum = *q++;
225    sum0 += sum*sum;
226  }
227  *r = 1.;			/* r[0] will always =1. */
228  if(sum0 == 0.0) {		/* No energy: fake low-energy white noise. */
229    *e = 1.;			/* Arbitrarily assign 1 to rms. */
230    /* Now fake autocorrelation of white noise. */
231    for ( i=1; i<=p; i++){
232      r[i] = 0.;
233    }
234    return;
235  }
236  *e = (float) sqrt((double)(sum0/windowsize));
237  sum0 = (float) (1.0/sum0);
238  for( i=1; i <= p; i++){
239    for( sum=0.0, j=windowsize-i, q=s, t=s+i; j--; )
240      sum += (*q++) * (*t++);
241    *(++r) = sum*sum0;
242  }
243}
244
245/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
246/* Using Durbin's recursion, convert the autocorrelation sequence in r
247 * to reflection coefficients in k and predictor coefficients in a.
248 * The prediction error energy (gain) is left in *ex.
249 * Note: durbin returns the coefficients in normal sign format.
250 *	(i.e. a[0] is assumed to be = +1.)
251 */
252void xdurbin ( r, k, a, p, ex)
253     register int p;			/* analysis order */
254     register float *r, *k, *a, *ex;
255{
256  float  bb[BIGSORD];
257  register int i, j;
258  register float e, s, *b = bb;
259
260  e = *r;
261  *k = -r[1]/e;
262  *a = *k;
263  e *= (float) (1. - (*k) * (*k));
264  for ( i=1; i < p; i++){
265    s = 0;
266    for ( j=0; j<i; j++){
267      s -= a[j] * r[i-j];
268    }
269    k[i] = ( s - r[i+1] )/e;
270    a[i] = k[i];
271    for ( j=0; j<=i; j++){
272      b[j] = a[j];
273    }
274    for ( j=0; j<i; j++){
275      a[j] += k[i] * b[i-j-1];
276    }
277    e *= (float) ( 1. - (k[i] * k[i]) );
278  }
279  *ex = e;
280}
281
282/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
283/*  Compute the autocorrelations of the p LP coefficients in a.
284 *  (a[0] is assumed to be = 1 and not explicitely accessed.)
285 *  The magnitude of a is returned in c.
286 *  2* the other autocorrelation coefficients are returned in b.
287 */
288void xa_to_aca ( a, b, c, p )
289float *a, *b, *c;
290register int p;
291{
292  register float  s, *ap, *a0;
293  register int  i, j;
294
295  for ( s=1., ap=a, i = p; i--; ap++ )
296    s += *ap * *ap;
297
298  *c = s;
299  for ( i = 1; i <= p; i++){
300    s = a[i-1];
301    for (a0 = a, ap = a+i, j = p-i; j--; )
302      s += (*a0++ * *ap++);
303    *b++ = (float) (2. * s);
304  }
305
306}
307
308/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
309/* Compute the Itakura LPC distance between the model represented
310 * by the signal autocorrelation (r) and its residual (gain) and
311 * the model represented by an LPC autocorrelation (c, b).
312 * Both models are of order p.
313 * r is assumed normalized and r[0]=1 is not explicitely accessed.
314 * Values returned by the function are >= 1.
315 */
316float xitakura ( p, b, c, r, gain )
317     register float *b, *c, *r, *gain;
318     register int p;
319{
320  register float s;
321
322  for( s= *c; p--; )
323    s += *r++ * *b++;
324
325  return (s/ *gain);
326}
327
328/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
329/* Compute the time-weighted RMS of a size segment of data.  The data
330 * is weighted by a window of type w_type before RMS computation.  w_type
331 * is decoded above in window().
332 */
333float wind_energy(data,size,w_type)
334     register float *data;	/* input PCM data */
335     register int size,		/* size of window */
336       w_type;			/* window type */
337{
338  static int nwind = 0;
339  static float *dwind = NULL;
340  register float *dp, sum, f;
341  register int i;
342
343  if(nwind < size) {
344    if(dwind) dwind = (float*)ckrealloc((void *)dwind,size*sizeof(float));
345    else dwind = (float*)ckalloc(size*sizeof(float));
346    if(!dwind) {
347      Fprintf(stderr,"Can't allocate scratch memory in wind_energy()\n");
348      return(0.0);
349    }
350  }
351  if(nwind != size) {
352    xget_window(dwind, size, w_type);
353    nwind = size;
354  }
355  for(i=size, dp = dwind, sum = 0.0; i-- > 0; ) {
356    f = *dp++ * (float)(*data++);
357    sum += f*f;
358  }
359  return((float)sqrt((double)(sum/size)));
360}
361
362/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
363/* Generic autocorrelation LPC analysis of the short-integer data
364 * sequence in data.
365 */
366int xlpc(lpc_ord,lpc_stabl,wsize,data,lpca,ar,lpck,normerr,rms,preemp,type)
367     int lpc_ord,		/* Analysis order */
368       wsize,			/* window size in points */
369       type;		/* window type (decoded in window() above) */
370     float lpc_stabl,	/* Stability factor to prevent numerical problems. */
371       *lpca,		/* if non-NULL, return vvector for predictors */
372       *ar,		/* if non-NULL, return vector for normalized autoc. */
373       *lpck,		/* if non-NULL, return vector for PARCOR's */
374       *normerr,		/* return scaler for normalized error */
375       *rms,		/* return scaler for energy in preemphasized window */
376       preemp;
377     float *data;	/* input data sequence; assumed to be wsize+1 long */
378{
379  static float *dwind=NULL;
380  static int nwind=0;
381  float rho[BIGSORD+1], k[BIGSORD], a[BIGSORD+1],*r,*kp,*ap,en,er,wfact=1.0;
382
383  if((wsize <= 0) || (!data) || (lpc_ord > BIGSORD)) return(FALSE);
384
385  if(nwind != wsize) {
386    if(dwind) dwind = (float*)ckrealloc((void *)dwind,wsize*sizeof(float));
387    else dwind = (float*)ckalloc(wsize*sizeof(float));
388    if(!dwind) {
389      Fprintf(stderr,"Can't allocate scratch memory in lpc()\n");
390      return(FALSE);
391    }
392    nwind = wsize;
393  }
394
395  window(data, dwind, wsize, preemp, type);
396  if(!(r = ar)) r = rho;	/* Permit optional return of the various */
397  if(!(kp = lpck)) kp = k;	/* coefficients and intermediate results. */
398  if(!(ap = lpca)) ap = a;
399  xautoc( wsize, dwind, lpc_ord, r, &en );
400  if(lpc_stabl > 1.0) {	/* add a little to the diagonal for stability */
401    int i;
402    float ffact;
403    ffact = (float) (1.0/(1.0 + exp((-lpc_stabl/20.0) * log(10.0))));
404    for(i=1; i <= lpc_ord; i++) rho[i] = ffact * r[i];
405    *rho = *r;
406    r = rho;
407    if(ar)
408      for(i=0;i<=lpc_ord; i++) ar[i] = r[i];
409  }
410  xdurbin ( r, kp, &ap[1], lpc_ord, &er);
411  switch(type) {		/* rms correction for window */
412  case 0:
413    wfact = 1.0;		/* rectangular */
414    break;
415  case 1:
416    wfact = .630397f;		/* Hamming */
417    break;
418  case 2:
419    wfact = .443149f;		/* (.5 - .5*cos)^4 */
420    break;
421  case 3:
422    wfact = .612372f;		/* Hanning */
423    break;
424  }
425  *ap = 1.0;
426  if(rms) *rms = en/wfact;
427  if(normerr) *normerr = er;
428  return(TRUE);
429}
430
431
432/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
433/* Return a sequence based on the normalized crosscorrelation of the signal
434   in data.
435 *
436  data is the input speech array
437  size is the number of samples in each correlation
438  start is the first lag to compute (governed by the highest expected F0)
439  nlags is the number of cross correlations to compute (set by lowest F0)
440  engref is the energy computed at lag=0 (i.e. energy in ref. window)
441  maxloc is the lag at which the maximum in the correlation was found
442  maxval is the value of the maximum in the CCF over the requested lag interval
443  correl is the array of nlags cross-correlation coefficients (-1.0 to 1.0)
444 *
445 */
446void crossf(data, size, start, nlags, engref, maxloc, maxval, correl)
447     int *maxloc;
448     float *engref, *maxval, *data, *correl;
449     int size, start, nlags;
450{
451  static float *dbdata=NULL;
452  static int dbsize = 0;
453  register float *dp, *ds, sum, st;
454  register int j;
455  register  float *dq, t, *p, engr, *dds, amax;
456  register  double engc;
457  int i, iloc, total;
458  int sizei, sizeo, maxsize;
459
460  /* Compute mean in reference window and subtract this from the
461     entire sequence.  This doesn't do too much damage to the data
462     sequenced for the purposes of F0 estimation and removes the need for
463     more principled (and costly) low-cut filtering. */
464  if((total = size+start+nlags) > dbsize) {
465    if(dbdata)
466      ckfree((void *)dbdata);
467    dbdata = NULL;
468    dbsize = 0;
469    if(!(dbdata = (float*)ckalloc(sizeof(float)*total))) {
470      Fprintf(stderr,"Allocation failure in crossf()\n");
471      return;/*exit(-1);*/
472    }
473    dbsize = total;
474  }
475  for(engr=0.0, j=size, p=data; j--; ) engr += *p++;
476  engr /= size;
477  for(j=size+nlags+start, dq = dbdata, p=data; j--; )  *dq++ = *p++ - engr;
478
479  maxsize = start + nlags;
480  sizei = size + start + nlags + 1;
481  sizeo = nlags + 1;
482
483  /* Compute energy in reference window. */
484  for(j=size, dp=dbdata, sum=0.0; j--; ) {
485    st = *dp++;
486    sum += st * st;
487  }
488
489  *engref = engr = sum;
490  if(engr > 0.0) {    /* If there is any signal energy to work with... */
491    /* Compute energy at the first requested lag. */
492    for(j=size, dp=dbdata+start, sum=0.0; j--; ) {
493      st = *dp++;
494      sum += st * st;
495    }
496    engc = sum;
497
498    /* COMPUTE CORRELATIONS AT ALL OTHER REQUESTED LAGS. */
499    for(i=0, dq=correl, amax=0.0, iloc = -1; i < nlags; i++) {
500      for(j=size, sum=0.0, dp=dbdata, dds = ds = dbdata+i+start; j--; )
501	sum += *dp++ * *ds++;
502      *dq++ = t = (float) (sum/sqrt((double)(engc*engr))); /* output norm. CC */
503      engc -= (double)(*dds * *dds); /* adjust norm. energy for next lag */
504      if((engc += (double)(*ds * *ds)) < 1.0)
505	engc = 1.0;		/* (hack: in case of roundoff error) */
506      if(t > amax) {		/* Find abs. max. as we go. */
507	amax = t;
508	iloc = i+start;
509      }
510    }
511    *maxloc = iloc;
512    *maxval = amax;
513  } else {	/* No energy in signal; fake reasonable return vals. */
514    *maxloc = 0;
515    *maxval = 0.0;
516    for(p=correl,i=nlags; i-- > 0; )
517      *p++ = 0.0;
518  }
519}
520
521/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
522/* Return a sequence based on the normalized crosscorrelation of the
523   signal in data.  This is similar to crossf(), but is designed to
524   compute only small patches of the correlation sequence.  The length of
525   each patch is determined by nlags; the number of patches by nlocs, and
526   the locations of the patches is specified by the array locs.  Regions
527   of the CCF that are not computed are set to 0.
528 *
529  data is the input speech array
530  size is the number of samples in each correlation
531  start0 is the first (virtual) lag to compute (governed by highest F0)
532  nlags0 is the number of lags (virtual+actual) in the correlation sequence
533  nlags is the number of cross correlations to compute at each location
534  engref is the energy computed at lag=0 (i.e. energy in ref. window)
535  maxloc is the lag at which the maximum in the correlation was found
536  maxval is the value of the maximum in the CCF over the requested lag interval
537  correl is the array of nlags cross-correlation coefficients (-1.0 to 1.0)
538  locs is an array of indices pointing to the center of a patches where the
539       cross correlation is to be computed.
540  nlocs is the number of correlation patches to compute.
541 *
542 */
543void crossfi(data, size, start0, nlags0, nlags, engref, maxloc, maxval, correl, locs, nlocs)
544     int *maxloc;
545     float *engref, *maxval, *data, *correl;
546     int size, start0, nlags0, nlags, *locs, nlocs;
547{
548  static float *dbdata=NULL;
549  static int dbsize = 0;
550  register float *dp, *ds, sum, st;
551  register int j;
552  register  float *dq, t, *p, engr, *dds, amax;
553  register  double engc;
554  int i, iloc, start, total;
555
556  /* Compute mean in reference window and subtract this from the
557     entire sequence. */
558  if((total = size+start0+nlags0) > dbsize) {
559    if(dbdata)
560      ckfree((void *)dbdata);
561    dbdata = NULL;
562    dbsize = 0;
563    if(!(dbdata = (float*)ckalloc(sizeof(float)*total))) {
564      Fprintf(stderr,"Allocation failure in crossfi()\n");
565      return;/*exit(-1);*/
566    }
567    dbsize = total;
568  }
569  for(engr=0.0, j=size, p=data; j--; ) engr += *p++;
570  engr /= size;
571/*  for(j=size+nlags0+start0, t = -2.1, amax = 2.1, dq = dbdata, p=data; j--; ) {
572    if(((smax = *p++ - engr) > t) && (smax < amax))
573      smax = 0.0;
574    *dq++ = smax;
575  } */
576  for(j=size+nlags0+start0, dq = dbdata, p=data; j--; ) {
577    *dq++ = *p++ - engr;
578  }
579
580  /* Zero the correlation output array to avoid confusing the peak
581     picker (since all lags will not be computed). */
582  for(p=correl,i=nlags0; i-- > 0; )
583    *p++ = 0.0;
584
585  /* compute energy in reference window */
586  for(j=size, dp=dbdata, sum=0.0; j--; ) {
587    st = *dp++;
588    sum += st * st;
589  }
590
591  *engref = engr = sum;
592   amax=0.0;
593  iloc = -1;
594  if(engr > 0.0) {
595    for( ; nlocs > 0; nlocs--, locs++ ) {
596      start = *locs - (nlags>>1);
597      if(start < start0)
598	start = start0;
599      dq = correl + start - start0;
600      /* compute energy at first requested lag */
601      for(j=size, dp=dbdata+start, sum=0.0; j--; ) {
602	st = *dp++;
603	sum += st * st;
604      }
605      engc = sum;
606
607      /* COMPUTE CORRELATIONS AT ALL REQUESTED LAGS */
608      for(i=0; i < nlags; i++) {
609	for(j=size, sum=0.0, dp=dbdata, dds = ds = dbdata+i+start; j--; )
610	  sum += *dp++ * *ds++;
611	if(engc < 1.0)
612	  engc = 1.0;		/* in case of roundoff error */
613	*dq++ = t = (float) (sum/sqrt((double)(10000.0 + (engc*engr))));
614	engc -= (double)(*dds * *dds);
615	engc += (double)(*ds * *ds);
616	if(t > amax) {
617	  amax = t;
618	  iloc = i+start;
619	}
620      }
621    }
622    *maxloc = iloc;
623    *maxval = amax;
624  } else {
625    *maxloc = 0;
626    *maxval = 0.0;
627  }
628}
629