1/*
2 * This software has been licensed to the Centre of Speech Technology, KTH
3 * by Microsoft Corp. with the terms in the accompanying file BSD.txt,
4 * which is a BSD style license.
5 *
6 *    "Copyright (c) 1990-1996 Entropic Research Laboratory, Inc.
7 *                   All rights reserved"
8 *
9 * Written by:  Derek Lin
10 * Checked by:
11 * Revised by:  David Talkin
12 *
13 * Brief description:  Estimates F0 using normalized cross correlation and
14 *   dynamic programming.
15 *
16 */
17
18
19#include "snack.h"
20#include <math.h>
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <limits.h>
25#ifndef TRUE
26# define TRUE 1
27# define FALSE 0
28#endif
29#ifndef FLT_MAX
30# define FLT_MAX (3.40282347E+38f)
31#endif
32#ifndef M_PI
33# define M_PI (3.1415926536f)
34#endif
35#include "jkGetF0.h"
36
37int	    debug_level = 0;
38
39void free_dp_f0();
40static int check_f0_params();
41
42int
43Get_f0(Sound *sound, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])
44{
45  float *fdata;
46  int done;
47  long buff_size, actsize;
48  double sf, start_time;
49  F0_params *par, *read_f0_params();
50  float *f0p, *vuvp, *rms_speech, *acpkp;
51  int i, vecsize;
52  int init_dp_f0(), dp_f0();
53  static int framestep = -1;
54  long sdstep = 0, total_samps;
55  int ndone = 0;
56  Tcl_Obj *list;
57  double framestep2 = 0.0, wind_dur;
58
59  int arg, startpos = 0, endpos = -1, fmax, fmin;
60  static CONST84 char *subOptionStrings[] = {
61    "-start", "-end", "-maxpitch", "-minpitch", "-progress",
62    "-framelength", "-method", "-windowlength", NULL
63  };
64  enum subOptions {
65    START, END, F0MAX, F0MIN, PROGRESS, FRAME, METHOD, WINLEN
66  };
67
68  if (sound->cmdPtr != NULL) {
69    Tcl_DecrRefCount(sound->cmdPtr);
70    sound->cmdPtr = NULL;
71  }
72
73  par = (F0_params *) ckalloc(sizeof(F0_params));
74  par->cand_thresh = 0.3f;
75  par->lag_weight = 0.3f;
76  par->freq_weight = 0.02f;
77  par->trans_cost = 0.005f;
78  par->trans_amp = 0.5f;
79  par->trans_spec = 0.5f;
80  par->voice_bias = 0.0f;
81  par->double_cost = 0.35f;
82  par->min_f0 = 50;
83  par->max_f0 = 550;
84  par->frame_step = 0.01f;
85  par->wind_dur = 0.0075f;
86  par->n_cands = 20;
87  par->mean_f0 = 200;     /* unused */
88  par->mean_f0_weight = 0.0f;  /* unused */
89  par->conditioning = 0;    /*unused */
90
91  for (arg = 2; arg < objc; arg += 2) {
92    int index;
93
94    if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings,
95			    "option", 0, &index) != TCL_OK) {
96      return TCL_ERROR;
97    }
98
99    if (arg + 1 == objc) {
100      Tcl_AppendResult(interp, "No argument given for ",
101		       subOptionStrings[index], " option", (char *) NULL);
102      return TCL_ERROR;
103    }
104
105    switch ((enum subOptions) index) {
106    case START:
107      {
108	if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK)
109	  return TCL_ERROR;
110	break;
111      }
112    case END:
113      {
114	if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK)
115	  return TCL_ERROR;
116	break;
117      }
118    case F0MAX:
119      {
120	if (Tcl_GetIntFromObj(interp, objv[arg+1], &fmax) != TCL_OK)
121	  return TCL_ERROR;
122	par->max_f0 = (float) fmax;
123	break;
124      }
125    case F0MIN:
126      {
127	if (Tcl_GetIntFromObj(interp, objv[arg+1], &fmin) != TCL_OK)
128	  return TCL_ERROR;
129	par->min_f0 = (float) fmin;
130	break;
131      }
132    case PROGRESS:
133      {
134	char *str = Tcl_GetStringFromObj(objv[arg+1], NULL);
135
136	if (strlen(str) > 0) {
137	  Tcl_IncrRefCount(objv[arg+1]);
138	  sound->cmdPtr = objv[arg+1];
139	}
140	break;
141      }
142    case FRAME:
143      {
144	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &framestep2)
145	    != TCL_OK)
146	  return TCL_ERROR;
147	par->frame_step = (float) framestep2;
148	break;
149      }
150    case METHOD:
151      {
152	break;
153      }
154    case WINLEN:
155      {
156	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &wind_dur)
157	    != TCL_OK)
158	  return TCL_ERROR;
159	par->wind_dur = (float) wind_dur;
160	break;
161      }
162    }
163  }
164  if (startpos < 0) startpos = 0;
165  if (endpos >= (sound->length - 1) || endpos == -1)
166    endpos = sound->length - 1;
167  if (startpos > endpos) return TCL_OK;
168
169  sf = (double) sound->samprate;
170
171  if (framestep > 0)  /* If a value was specified with -S, use it. */
172    par->frame_step = (float) (framestep / sf);
173  start_time = 0.0f;
174  if(check_f0_params(interp, par, sf)){
175    Tcl_AppendResult(interp, "invalid/inconsistent parameters -- exiting.", NULL);
176    return TCL_ERROR;
177  }
178
179  total_samps = endpos - startpos + 1;
180  if(total_samps < ((par->frame_step * 2.0) + par->wind_dur) * sf) {
181    Tcl_AppendResult(interp, "input range too small for analysis by get_f0.", NULL);
182    return TCL_ERROR;
183  }
184  /* Initialize variables in get_f0.c; allocate data structures;
185   * determine length and overlap of input frames to read.
186   */
187  if (init_dp_f0(sf, par, &buff_size, &sdstep)
188      || buff_size > INT_MAX || sdstep > INT_MAX)
189  {
190    Tcl_AppendResult(interp, "problem in init_dp_f0().", NULL);
191    return TCL_ERROR;
192  }
193
194  if (debug_level)
195    Fprintf(stderr, "init_dp_f0 returned buff_size %ld, sdstep %ld.\n",buff_size, sdstep);
196
197  if (buff_size > total_samps)
198    buff_size = total_samps;
199
200  actsize = min(buff_size, sound->length);
201  fdata = (float *) ckalloc(sizeof(float) * max(buff_size, sdstep));
202  list = Tcl_NewListObj(0, NULL);
203  Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 0.0);
204  ndone = startpos;
205
206  while (TRUE) {
207    done = (actsize < buff_size) || (total_samps == buff_size);
208    Snack_GetSoundData(sound, ndone, fdata, actsize);
209    /*if (sound->debug > 0) Snack_WriteLog("dp_f0...\n");*/
210    if (dp_f0(fdata, (int) actsize, (int) sdstep, sf, par,
211	      &f0p, &vuvp, &rms_speech, &acpkp, &vecsize, done)) {
212      Tcl_AppendResult(interp, "problem in dp_f0().", NULL);
213      return TCL_ERROR;
214    }
215    /*if (sound->debug > 0) Snack_WriteLogInt("done dp_f0",vecsize);*/
216    for (i = vecsize - 1; i >= 0; i--) {
217      Tcl_Obj *frameList;
218      frameList = Tcl_NewListObj(0, NULL);
219      Tcl_ListObjAppendElement(interp, list, frameList);
220      Tcl_ListObjAppendElement(interp, frameList,
221			       Tcl_NewDoubleObj((double)f0p[i]));
222      Tcl_ListObjAppendElement(interp, frameList,
223			       Tcl_NewDoubleObj((double)vuvp[i]));
224      Tcl_ListObjAppendElement(interp, frameList,
225			       Tcl_NewDoubleObj((double)rms_speech[i]));
226      Tcl_ListObjAppendElement(interp, frameList,
227			       Tcl_NewDoubleObj((double)acpkp[i]));
228    }
229
230    if (done) break;
231
232    ndone += sdstep;
233    actsize = min(buff_size, sound->length - ndone);
234    total_samps -= sdstep;
235
236    if (actsize > total_samps)
237      actsize = total_samps;
238
239    if (1) {
240      int res = Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", (double) ndone / sound->length);
241      if (res != TCL_OK) {
242	return TCL_ERROR;
243      }
244    }
245  }
246
247  Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 1.0);
248
249  ckfree((void *)fdata);
250
251  ckfree((void *)par);
252
253  free_dp_f0();
254
255  Tcl_SetObjResult(interp, list);
256
257  return TCL_OK;
258}
259
260
261/*
262 * Some consistency checks on parameter values.
263 * Return a positive integer if any errors detected, 0 if none.
264 */
265
266static int
267check_f0_params(Tcl_Interp *interp, F0_params *par, double sample_freq)
268{
269  int	  error = 0;
270  double  dstep;
271
272  if((par->cand_thresh < 0.01) || (par->cand_thresh > 0.99)) {
273    Tcl_AppendResult(interp,
274	  "ERROR: cand_thresh parameter must be between [0.01, 0.99].",NULL);
275    error++;
276  }
277  if((par->wind_dur > .1) || (par->wind_dur < .0001)) {
278    Tcl_AppendResult(interp,"ERROR: wind_dur parameter must be between [0.0001, 0.1].",NULL);
279    error++;
280  }
281  if((par->n_cands > 100) || (par->n_cands < 3)){
282    Tcl_AppendResult(interp,
283	    "ERROR: n_cands parameter must be between [3,100].",NULL);
284    error++;
285  }
286  if((par->max_f0 <= par->min_f0) || (par->max_f0 >= (sample_freq/2.0)) ||
287     (par->min_f0 < (sample_freq/10000.0))){
288    Tcl_AppendResult(interp,
289	    "ERROR: min(max)_f0 parameter inconsistent with sampling frequency.",NULL);
290    error++;
291  }
292  dstep = ((double)((int)(0.5 + (sample_freq * par->frame_step))))/sample_freq;
293  if(dstep != par->frame_step) {
294    if(debug_level)
295      Tcl_AppendResult(interp,
296	      "Frame step set to exactly match signal sample rate.",NULL);
297    par->frame_step = (float) dstep;
298  }
299  if((par->frame_step > 0.1) || (par->frame_step < (1.0/sample_freq))){
300    Tcl_AppendResult(interp,
301	    "ERROR: frame_step parameter must be between [1/sampling rate, 0.1].",NULL);
302    error++;
303  }
304
305  return(error);
306}
307
308static void get_cand(), peak(), do_ffir();
309static int lc_lin_fir(), downsamp();
310
311/* ----------------------------------------------------------------------- */
312void get_fast_cands(fdata, fdsdata, ind, step, size, dec, start, nlags, engref, maxloc, maxval, cp, peaks, locs, ncand, par)
313     float *fdata, *fdsdata, *engref, *maxval, *peaks;
314     int size, start, nlags, *maxloc, *locs, *ncand, ind, step, dec;
315     Cross *cp;
316     F0_params *par;
317{
318  int decind, decstart, decnlags, decsize, i, j, *lp;
319  float *corp, xp, yp, lag_wt;
320  register float *pe;
321
322  lag_wt = par->lag_weight/nlags;
323  decnlags = 1 + (nlags/dec);
324  if((decstart = start/dec) < 1) decstart = 1;
325  decind = (ind * step)/dec;
326  decsize = 1 + (size/dec);
327  corp = cp->correl;
328
329  crossf(fdsdata + decind, decsize, decstart, decnlags, engref, maxloc,
330	maxval, corp);
331  cp->maxloc = *maxloc;	/* location of maximum in correlation */
332  cp->maxval = *maxval;	/* max. correlation value (found at maxloc) */
333  cp->rms = (float) sqrt(*engref/size); /* rms in reference window */
334  cp->firstlag = decstart;
335
336  get_cand(cp,peaks,locs,decnlags,ncand,par->cand_thresh); /* return high peaks in xcorr */
337
338  /* Interpolate to estimate peak locations and values at high sample rate. */
339  for(i = *ncand, lp = locs, pe = peaks; i--; pe++, lp++) {
340    j = *lp - decstart - 1;
341    peak(&corp[j],&xp,&yp);
342    *lp = (*lp * dec) + (int)(0.5+(xp*dec)); /* refined lag */
343    *pe = yp*(1.0f - (lag_wt* *lp)); /* refined amplitude */
344  }
345
346  if(*ncand >= par->n_cands) {	/* need to prune candidates? */
347    register int *loc, *locm, lt;
348    register float smaxval, *pem;
349    register int outer, inner, lim;
350    for(outer=0, lim = par->n_cands-1; outer < lim; outer++)
351      for(inner = *ncand - 1 - outer,
352	  pe = peaks + (*ncand) -1, pem = pe-1,
353	  loc = locs + (*ncand) - 1, locm = loc-1;
354	  inner--;
355	  pe--,pem--,loc--,locm--)
356	if((smaxval = *pe) > *pem) {
357	  *pe = *pem;
358	  *pem = smaxval;
359	  lt = *loc;
360	  *loc = *locm;
361	  *locm = lt;
362	}
363    *ncand = par->n_cands-1;  /* leave room for the unvoiced hypothesis */
364  }
365  crossfi(fdata + (ind * step), size, start, nlags, 7, engref, maxloc,
366	  maxval, corp, locs, *ncand);
367
368  cp->maxloc = *maxloc;	/* location of maximum in correlation */
369  cp->maxval = *maxval;	/* max. correlation value (found at maxloc) */
370  cp->rms = (float) sqrt(*engref/size); /* rms in reference window */
371  cp->firstlag = start;
372  get_cand(cp,peaks,locs,nlags,ncand,par->cand_thresh); /* return high peaks in xcorr */
373    if(*ncand >= par->n_cands) {	/* need to prune candidates again? */
374    register int *loc, *locm, lt;
375    register float smaxval, *pe, *pem;
376    register int outer, inner, lim;
377    for(outer=0, lim = par->n_cands-1; outer < lim; outer++)
378      for(inner = *ncand - 1 - outer,
379	  pe = peaks + (*ncand) -1, pem = pe-1,
380	  loc = locs + (*ncand) - 1, locm = loc-1;
381	  inner--;
382	  pe--,pem--,loc--,locm--)
383	if((smaxval = *pe) > *pem) {
384	  *pe = *pem;
385	  *pem = smaxval;
386	  lt = *loc;
387	  *loc = *locm;
388	  *locm = lt;
389	}
390    *ncand = par->n_cands - 1;  /* leave room for the unvoiced hypothesis */
391  }
392}
393
394/* ----------------------------------------------------------------------- */
395float *downsample(input,samsin,state_idx,freq,samsout,decimate, first_time, last_time)
396     double freq;
397     float *input;
398      int samsin, *samsout, decimate, state_idx, first_time, last_time;
399{
400  static float	b[2048];
401  static float *foutput = NULL;
402  float	beta = 0.0f;
403  static int	ncoeff = 127, ncoefft = 0;
404  int init;
405
406  if(input && (samsin > 0) && (decimate > 0) && *samsout) {
407    if(decimate == 1) {
408      return(input);
409    }
410
411    if(first_time){
412      int nbuff = (samsin/decimate) + (2*ncoeff);
413
414      ncoeff = ((int)(freq * .005)) | 1;
415      beta = .5f/decimate;
416      foutput = (float*)ckrealloc((void *)foutput, sizeof(float) * nbuff);
417      /*      spsassert(foutput, "Can't allocate foutput in downsample");*/
418      for( ; nbuff > 0 ;)
419	foutput[--nbuff] = 0.0;
420
421      if( !lc_lin_fir(beta,&ncoeff,b)) {
422	fprintf(stderr,"\nProblems computing interpolation filter\n");
423	ckfree((void *)foutput);
424	return(NULL);
425      }
426      ncoefft = (ncoeff/2) + 1;
427    }		    /*  endif new coefficients need to be computed */
428
429    if(first_time) init = 1;
430    else if (last_time) init = 2;
431    else init = 0;
432
433    if(downsamp(input,foutput,samsin,samsout,state_idx,decimate,ncoefft,b,init)) {
434      return(foutput);
435    } else
436      Fprintf(stderr,"Problems in downsamp() in downsample()\n");
437  } else
438    Fprintf(stderr,"Bad parameters passed to downsample()\n");
439
440  return(NULL);
441}
442
443/* ----------------------------------------------------------------------- */
444/* Get likely candidates for F0 peaks. */
445static void get_cand(cross,peak,loc,nlags,ncand,cand_thresh)
446     Cross *cross;
447     float *peak, cand_thresh;
448     int *loc;
449     int  *ncand, nlags;
450{
451  register int i, lastl, *t;
452  register float o, p, q, *r, *s, clip;
453  int start, ncan, maxl;
454
455  clip = (float) (cand_thresh * cross->maxval);
456  maxl = cross->maxloc;
457  lastl = nlags - 2;
458  start = cross->firstlag;
459
460  r = cross->correl;
461  o= *r++;			/* first point */
462  q = *r++;	                /* middle point */
463  p = *r++;
464  s = peak;
465  t = loc;
466  ncan=0;
467  for(i=1; i < lastl; i++, o=q, q=p, p= *r++){
468    if((q > clip) &&		/* is this a high enough value? */
469      (q >= p) && (q >= o)){ /* NOTE: this finds SHOLDERS and PLATEAUS
470				      as well as peaks (is this a good idea?) */
471	*s++ = q;		/* record the peak value */
472	*t++ = i + start;	/* and its location */
473	ncan++;			/* count number of peaks found */
474      }
475  }
476/*
477  o = q;
478  q = p;
479  if( (q > clip) && (q >=0)){
480    *s++ = q;
481    *t++ = i+start;
482    ncan++;
483  }
484*/
485  *ncand = ncan;
486}
487
488/* ----------------------------------------------------------------------- */
489/* buffer-to-buffer downsample operation */
490/* This is STRICTLY a decimator! (no upsample) */
491static int downsamp(in, out, samples, outsamps, state_idx, decimate, ncoef, fc, init)
492     float *in, *out;
493     int samples, *outsamps, decimate, ncoef, state_idx;
494     float fc[];
495     int init;
496{
497  if(in && out) {
498    do_ffir(in, samples, out, outsamps, state_idx, ncoef, fc, 0, decimate, init);
499    return(TRUE);
500  } else
501    printf("Bad signal(s) passed to downsamp()\n");
502  return(FALSE);
503}
504
505/*      ----------------------------------------------------------      */
506static void do_ffir(buf,in_samps,bufo,out_samps,idx, ncoef,fc,invert,skip,init)
507/* fc contains 1/2 the coefficients of a symmetric FIR filter with unity
508    passband gain.  This filter is convolved with the signal in buf.
509    The output is placed in buf2.  If(invert), the filter magnitude
510    response will be inverted.  If(init&1), beginning of signal is in buf;
511    if(init&2), end of signal is in buf.  out_samps is set to the number of
512    output points placed in bufo. */
513register float	*buf, *bufo;
514float *fc;
515register int in_samps, ncoef, invert, skip, init, *out_samps;
516int idx;
517{
518  register float *dp1, *dp2, *dp3, sum, integral;
519  static float *co=NULL, *mem=NULL;
520  static float state[1000];
521  static int fsize=0, resid=0;
522  register int i, j, k, l;
523  register float *sp;
524  register float *buf1;
525
526  buf1 = buf;
527  if(ncoef > fsize) {/*allocate memory for full coeff. array and filter memory */    fsize = 0;
528    i = (ncoef+1)*2;
529    if(!((co = (float *)ckrealloc((void *)co, sizeof(float)*i)) &&
530	 (mem = (float *)ckrealloc((void *)mem, sizeof(float)*i)))) {
531      fprintf(stderr,"allocation problems in do_fir()\n");
532      return;
533    }
534    fsize = ncoef;
535  }
536
537  /* fill 2nd half with data */
538  for(i=ncoef, dp1=mem+ncoef-1; i-- > 0; )  *dp1++ = *buf++;
539
540  if(init & 1) {	/* Is the beginning of the signal in buf? */
541    /* Copy the half-filter and its mirror image into the coefficient array. */
542    for(i=ncoef-1, dp3=fc+ncoef-1, dp2=co, dp1 = co+((ncoef-1)*2),
543	integral = 0.0; i-- > 0; )
544      if(!invert) *dp1-- = *dp2++ = *dp3--;
545      else {
546	integral += (sum = *dp3--);
547	*dp1-- = *dp2++ = -sum;
548      }
549    if(!invert)  *dp1 = *dp3;	/* point of symmetry */
550    else {
551      integral *= 2;
552      integral += *dp3;
553      *dp1 = integral - *dp3;
554    }
555
556    for(i=ncoef-1, dp1=mem; i-- > 0; ) *dp1++ = 0;
557  }
558  else
559    for(i=ncoef-1, dp1=mem, sp=state; i-- > 0; ) *dp1++ = *sp++;
560
561  i = in_samps;
562  resid = 0;
563
564  k = (ncoef << 1) -1;	/* inner-product loop limit */
565
566  if(skip <= 1) {       /* never used */
567/*    *out_samps = i;
568    for( ; i-- > 0; ) {
569      for(j=k, dp1=mem, dp2=co, dp3=mem+1, sum = 0.0; j-- > 0;
570	  *dp1++ = *dp3++ )
571	sum += *dp2++ * *dp1;
572
573      *--dp1 = *buf++;
574      *bufo++ = (sum < 0.0)? sum -0.5 : sum +0.5;
575    }
576    if(init & 2) {
577      for(i=ncoef; i-- > 0; ) {
578	for(j=k, dp1=mem, dp2=co, dp3=mem+1, sum = 0.0; j-- > 0;
579	    *dp1++ = *dp3++ )
580	  sum += *dp2++ * *dp1;
581	*--dp1 = 0.0;
582	*bufo++ = (sum < 0)? sum -0.5 : sum +0.5;
583      }
584      *out_samps += ncoef;
585    }
586    return;
587*/
588  }
589  else {			/* skip points (e.g. for downsampling) */
590    /* the buffer end is padded with (ncoef-1) data points */
591    for( l=0 ; l < *out_samps; l++ ) {
592      for(j=k-skip, dp1=mem, dp2=co, dp3=mem+skip, sum=0.0; j-- >0;
593	  *dp1++ = *dp3++)
594	sum += *dp2++ * *dp1;
595      for(j=skip; j-- >0; *dp1++ = *buf++) /* new data to memory */
596	sum += *dp2++ * *dp1;
597      *bufo++ = (sum<0.0) ? sum -0.5f : sum +0.5f;
598    }
599    if(init & 2){
600      resid = in_samps - *out_samps * skip;
601      for(l=resid/skip; l-- >0; ){
602	for(j=k-skip, dp1=mem, dp2=co, dp3=mem+skip, sum=0.0; j-- >0;
603	    *dp1++ = *dp3++)
604	    sum += *dp2++ * *dp1;
605	for(j=skip; j-- >0; *dp1++ = 0.0)
606	  sum += *dp2++ * *dp1;
607	*bufo++ = (sum<0.0) ? sum -0.5f : sum +0.5f;
608	(*out_samps)++;
609      }
610    }
611    else
612      for(dp3=buf1+idx-ncoef+1, l=ncoef-1, sp=state; l-- >0; ) *sp++ = *dp3++;
613  }
614}
615
616/*      ----------------------------------------------------------      */
617static int lc_lin_fir(fc,nf,coef)
618/* create the coefficients for a symmetric FIR lowpass filter using the
619   window technique with a Hanning window. */
620register float	fc;
621float	*coef;
622int	*nf;
623{
624    register int	i, n;
625    register double	twopi, fn, c;
626
627    if(((*nf % 2) != 1))
628	*nf = *nf + 1;
629    n = (*nf + 1)/2;
630
631    /*  Compute part of the ideal impulse response (the sin(x)/x kernel). */
632    twopi = M_PI * 2.0;
633    coef[0] = (float) (2.0 * fc);
634    c = M_PI;
635    fn = twopi * fc;
636    for(i=1;i < n; i++) coef[i] = (float)(sin(i * fn)/(c * i));
637
638    /* Now apply a Hanning window to the (infinite) impulse response. */
639    /* (Probably should use a better window, like Kaiser...) */
640    fn = twopi/(double)(*nf);
641    for(i=0;i<n;i++)
642	coef[n-i-1] *= (float)((.5 - (.5 * cos(fn * ((double)i + 0.5)))));
643
644    return(TRUE);
645}
646
647
648/* ----------------------------------------------------------------------- */
649/* Use parabolic interpolation over the three points defining the peak
650 * vicinity to estimate the "true" peak. */
651static void peak(y, xp, yp)
652     float *y,			/* vector of length 3 defining peak */
653       *xp, *yp;  /* x,y values of parabolic peak fitting the input points. */
654{
655  register float a, c;
656
657  a = (float)((y[2]-y[1])+(.5*(y[0]-y[2])));
658  if(fabs(a) > .000001) {
659    *xp = c = (float)((y[0]-y[2])/(4.0*a));
660    *yp = y[1] - (a*c*c);
661  } else {
662    *xp = 0.0;
663    *yp = y[1];
664  }
665}
666
667/* A fundamental frequency estimation algorithm using the normalized
668   cross correlation function and dynamic programming.  The algorithm
669   implemented here is similar to that presented by B. Secrest and
670   G. Doddington, "An integrated pitch tracking algorithm for speech
671   systems", Proc. ICASSP-83, pp.1352-1355.  It is fully described
672   by D. Talkin, "A robust algorithm for ptich tracking (RAPT)", in
673   W. B. Kleijn & K. K. Paliwal (eds.) Speech Coding and Synthesis,
674   (New York: Elsevier, 1995). */
675
676/* For each frame, up to par->n_cands cross correlation peaks are
677   considered as F0 intervals.  Each is scored according to its within-
678   frame properties (relative amplitude, relative location), and
679   according to its connectivity with each of the candidates in the
680   previous frame.  An unvoiced hypothesis is also generated at each
681   frame and is considered in the light of voicing state change cost,
682   the quality of the cross correlation peak, and frequency continuity. */
683
684/* At each frame, each candidate has associated with it the following
685   items:
686	its peak value
687	its peak value modified by its within-frame properties
688	its location
689	the candidate # in the previous frame yielding the min. err.
690		(this is the optimum path pointer!)
691	its cumulative cost: (local cost + connectivity cost +
692		cumulative cost of its best-previous-frame-match). */
693
694/* Dynamic programming is then used to pick the best F0 trajectory and voicing
695   state given the local and transition costs for the entire utterance. */
696
697/* To avoid the necessity of computing the full crosscorrelation at
698   the input sample rate, the signal is downsampled; a full ccf is
699   computed at the lower frequency; interpolation is used to estimate the
700   location of the peaks at the higher sample rate; and the fine-grained
701   ccf is computed only in the vicinity of these estimated peak
702   locations. */
703
704
705/*
706 * READ_SIZE: length of input data frame in sec to read
707 * DP_CIRCULAR: determines the initial size of DP circular buffer in sec
708 * DP_HIST: stored frame history in second before checking for common path
709 *      DP_CIRCULAR > READ_SIZE, DP_CIRCULAR at least 2 times of DP_HIST
710 * DP_LIMIT: in case no convergence is found, DP frames of DP_LIMIT secs
711 *      are kept before output is forced by simply picking the lowest cost
712 *      path
713 */
714
715#define READ_SIZE 0.2
716#define DP_CIRCULAR 1.5
717#define DP_HIST 0.5
718#define DP_LIMIT 1.0
719
720/*
721 * stationarity parameters -
722 * STAT_WSIZE: window size in sec used in measuring frame energy/stationarity
723 * STAT_AINT: analysis interval in sec in measuring frame energy/stationarity
724 */
725#define STAT_WSIZE 0.030
726#define STAT_AINT 0.020
727
728/*
729 * headF points to current frame in the circular buffer,
730 * tailF points to the frame where tracks start
731 * cmpthF points to starting frame of converged path to backtrack
732 */
733
734static Frame *headF = NULL, *tailF = NULL, *cmpthF = NULL;
735
736static  int *pcands = NULL;	/* array for backtracking in convergence check */
737static int cir_buff_growth_count = 0;
738
739static int size_cir_buffer,	/* # of frames in circular DP buffer */
740           size_frame_hist,	/* # of frames required before convergence test */
741           size_frame_out,	/* # of frames before forcing output */
742           num_active_frames,	/* # of frames from tailF to headF */
743           output_buf_size;	/* # of frames allocated to output buffers */
744
745/*
746 * DP parameters
747 */
748static float tcost, tfact_a, tfact_s, frame_int, vbias, fdouble, wdur, ln2,
749             freqwt, lagwt;
750static int step, size, nlags, start, stop, ncomp, *locs = NULL;
751static short maxpeaks;
752
753static int wReuse = 0;  /* number of windows seen before resued */
754static Windstat *windstat = NULL;
755
756static float *f0p = NULL, *vuvp = NULL, *rms_speech = NULL,
757             *acpkp = NULL, *peaks = NULL;
758static int first_time = 1, pad;
759
760
761/*--------------------------------------------------------------------*/
762int
763get_Nframes(buffsize, pad, step)
764    long    buffsize;
765    int     pad, step;
766{
767  if (buffsize < pad)
768    return (0);
769  else
770    return ((buffsize - pad)/step);
771}
772
773
774/*--------------------------------------------------------------------*/
775int
776init_dp_f0(freq, par, buffsize, sdstep)
777    double	freq;
778    F0_params	*par;
779    long	*buffsize, *sdstep;
780{
781  int nframes;
782  int i;
783  int stat_wsize, agap, ind, downpatch;
784
785/*
786 * reassigning some constants
787 */
788
789  tcost = par->trans_cost;
790  tfact_a = par->trans_amp;
791  tfact_s = par->trans_spec;
792  vbias = par->voice_bias;
793  fdouble = par->double_cost;
794  frame_int = par->frame_step;
795
796  step = eround(frame_int * freq);
797  size = eround(par->wind_dur * freq);
798  frame_int = (float)(((float)step)/freq);
799  wdur = (float)(((float)size)/freq);
800  start = eround(freq / par->max_f0);
801  stop = eround(freq / par->min_f0);
802  nlags = stop - start + 1;
803  ncomp = size + stop + 1; /* # of samples required by xcorr
804			      comp. per fr. */
805  maxpeaks = 2 + (nlags/2);	/* maximum number of "peaks" findable in ccf */
806  ln2 = (float)log(2.0);
807  size_frame_hist = (int) (DP_HIST / frame_int);
808  size_frame_out = (int) (DP_LIMIT / frame_int);
809
810/*
811 * SET UP THE D.P. WEIGHTING FACTORS:
812 *      The intent is to make the effectiveness of the various fudge factors
813 *      independent of frame rate or sampling frequency.
814 */
815
816  /* Lag-dependent weighting factor to emphasize early peaks (higher freqs)*/
817  lagwt = par->lag_weight/stop;
818
819  /* Penalty for a frequency skip in F0 per frame */
820  freqwt = par->freq_weight/frame_int;
821
822  i = (int) (READ_SIZE *freq);
823  if(ncomp >= step) nframes = ((i-ncomp)/step ) + 1;
824  else nframes = i / step;
825
826  /* *buffsize is the number of samples needed to make F0 computation
827     of nframes DP frames possible.  The last DP frame is patched with
828     enough points so that F0 computation on it can be carried.  F0
829     computaion on each frame needs enough points to do
830
831     1) xcross or cross correlation measure:
832           enough points to do xcross - ncomp
833
834     2) stationarity measure:
835           enough to make 30 msec windowing possible - ind
836
837     3) downsampling:
838           enough to make filtering possible -- downpatch
839
840     So there are nframes whole DP frames, padded with pad points
841     to make the last frame F0 computation ok.
842
843  */
844
845  /* last point in data frame needs points of 1/2 downsampler filter length
846     long, 0.005 is the filter length used in downsampler */
847  downpatch = (((int) (freq * 0.005))+1) / 2;
848
849  stat_wsize = (int) (STAT_WSIZE * freq);
850  agap = (int) (STAT_AINT * freq);
851  ind = ( agap - stat_wsize ) / 2;
852  i = stat_wsize + ind;
853  pad = downpatch + ((i>ncomp) ? i:ncomp);
854  *buffsize = nframes * step + pad;
855  *sdstep = nframes * step;
856
857  /* Allocate space for the DP storage circularly linked data structure */
858
859  size_cir_buffer = (int) (DP_CIRCULAR / frame_int);
860
861  /* creating circularly linked data structures */
862  tailF = alloc_frame(nlags, par->n_cands);
863  headF = tailF;
864
865  /* link them up */
866  for(i=1; i<size_cir_buffer; i++){
867    headF->next = alloc_frame(nlags, par->n_cands);
868    headF->next->prev = headF;
869    headF = headF->next;
870  }
871  headF->next = tailF;
872  tailF->prev = headF;
873
874  headF = tailF;
875
876  /* Allocate sscratch array to use during backtrack convergence test. */
877  if( ! pcands ) {
878    pcands = (int *) ckalloc( par->n_cands * sizeof(int));
879    /*    spsassert(pcands,"can't allocate pathcands");*/
880  }
881
882  /* Allocate arrays to return F0 and related signals. */
883
884  /* Note: remember to compare *vecsize with size_frame_out, because
885     size_cir_buffer is not constant */
886  output_buf_size = size_cir_buffer;
887  rms_speech = (float*)ckalloc(sizeof(float) * output_buf_size);
888  /*  spsassert(rms_speech,"rms_speech ckalloc failed");*/
889  f0p = (float*)ckalloc(sizeof(float) * output_buf_size);
890  /*  spsassert(f0p,"f0p ckalloc failed");*/
891  vuvp = (float*)ckalloc(sizeof(float)* output_buf_size);
892  /*  spsassert(vuvp,"vuvp ckalloc failed");*/
893  acpkp = (float*)ckalloc(sizeof(float) * output_buf_size);
894  /*  spsassert(acpkp,"acpkp ckalloc failed");*/
895
896  /* Allocate space for peak location and amplitude scratch arrays. */
897  peaks = (float*)ckalloc(sizeof(float) * maxpeaks);
898  /*  spsassert(peaks,"peaks ckalloc failed");*/
899  locs = (int*)ckalloc(sizeof(int) * maxpeaks);
900  /*  spsassert(locs, "locs ckalloc failed");*/
901
902  /* Initialise the retrieval/saving scheme of window statistic measures */
903  wReuse = agap / step;
904  if (wReuse){
905      windstat = (Windstat *) ckalloc( wReuse * sizeof(Windstat));
906      /*      spsassert(windstat, "windstat ckalloc failed");*/
907      for(i=0; i<wReuse; i++){
908	  windstat[i].err = 0;
909	  windstat[i].rms = 0;
910      }
911  }
912
913  if(debug_level){
914    Fprintf(stderr, "done with initialization:\n");
915    Fprintf(stderr,
916	    " size_cir_buffer:%d  xcorr frame size:%d start lag:%d nlags:%d\n",
917	    size_cir_buffer, size, start, nlags);
918  }
919
920  num_active_frames = 0;
921  first_time = 1;
922
923  return(0);
924}
925
926static Stat *get_stationarity();
927
928/*--------------------------------------------------------------------*/
929int
930dp_f0(fdata, buff_size, sdstep, freq,
931      par, f0p_pt, vuvp_pt, rms_speech_pt, acpkp_pt, vecsize, last_time)
932    float	*fdata;
933    int		buff_size, sdstep;
934    double	freq;
935    F0_params	*par;		/* analysis control parameters */
936    float	**f0p_pt, **vuvp_pt, **rms_speech_pt, **acpkp_pt;
937    int		*vecsize, last_time;
938{
939  float  maxval, engref, *sta, *rms_ratio, *dsdata, *downsample();
940  register float ttemp, ftemp, ft1, ferr, err, errmin;
941  register int  i, j, k, loc1, loc2;
942  int   nframes, maxloc, ncand, ncandp, minloc,
943        decimate, samsds;
944
945  Stat *stat = NULL;
946
947  nframes = get_Nframes((long) buff_size, pad, step); /* # of whole frames */
948
949  if(debug_level)
950    Fprintf(stderr,
951	    "******* Computing %d dp frames ******** from %d points\n", nframes, buff_size);
952
953  /* Now downsample the signal for coarse peak estimates. */
954
955  decimate = (int)(freq/2000.0);    /* downsample to about 2kHz */
956  if (decimate <= 1)
957    dsdata = fdata;
958  else {
959    samsds = ((nframes-1) * step + ncomp) / decimate;
960    dsdata = downsample(fdata, buff_size, sdstep, freq, &samsds, decimate,
961			first_time, last_time);
962    if (!dsdata) {
963      Fprintf(stderr, "can't get downsampled data.\n");
964      return 1;
965    }
966  }
967
968  /* Get a function of the "stationarity" of the speech signal. */
969
970  stat = get_stationarity(fdata, freq, buff_size, nframes, step, first_time);
971  if (!stat) {
972    Fprintf(stderr, "can't get stationarity\n");
973    return(1);
974  }
975  sta = stat->stat;
976  rms_ratio = stat->rms_ratio;
977
978  /***********************************************************************/
979  /* MAIN FUNDAMENTAL FREQUENCY ESTIMATION LOOP */
980  /***********************************************************************/
981  if(!first_time && nframes > 0) headF = headF->next;
982
983  for(i = 0; i < nframes; i++) {
984
985    /* NOTE: This buffer growth provision is probably not necessary.
986       It was put in (with errors) by Derek Lin and apparently never
987       tested.  My tests and analysis suggest it is completely
988       superfluous. DT 9/5/96 */
989    /* Dynamically allocating more space for the circular buffer */
990    if(headF == tailF->prev){
991      Frame *frm;
992
993      if(cir_buff_growth_count > 5){
994	Fprintf(stderr,
995		"too many requests (%d) for dynamically allocating space.\n   There may be a problem in finding converged path.\n",cir_buff_growth_count);
996	return(1);
997      }
998      if(debug_level)
999	Fprintf(stderr, "allocating %d more frames for DP circ. buffer.\n", size_cir_buffer);
1000      frm = alloc_frame(nlags, par->n_cands);
1001      headF->next = frm;
1002      frm->prev = headF;
1003      for(k=1; k<size_cir_buffer; k++){
1004	frm->next = alloc_frame(nlags, par->n_cands);
1005	frm->next->prev = frm;
1006	frm = frm->next;
1007      }
1008      frm->next = tailF;
1009      tailF->prev = frm;
1010      cir_buff_growth_count++;
1011    }
1012
1013    headF->rms = stat->rms[i];
1014    get_fast_cands(fdata, dsdata, i, step, size, decimate, start,
1015		   nlags, &engref, &maxloc,
1016		   &maxval, headF->cp, peaks, locs, &ncand, par);
1017
1018    /*    Move the peak value and location arrays into the dp structure */
1019    {
1020      register float *ftp1, *ftp2;
1021      register short *sp1;
1022      register int *sp2;
1023
1024      for(ftp1 = headF->dp->pvals, ftp2 = peaks,
1025	  sp1 = headF->dp->locs, sp2 = locs, j=ncand; j--; ) {
1026	*ftp1++ = *ftp2++;
1027	*sp1++ = *sp2++;
1028      }
1029      *sp1 = -1;		/* distinguish the UNVOICED candidate */
1030      *ftp1 = maxval;
1031      headF->dp->mpvals[ncand] = vbias+maxval; /* (high cost if cor. is high)*/
1032    }
1033
1034    /* Apply a lag-dependent weight to the peaks to encourage the selection
1035       of the first major peak.  Translate the modified peak values into
1036       costs (high peak ==> low cost). */
1037    for(j=0; j < ncand; j++){
1038      ftemp = 1.0f - ((float)locs[j] * lagwt);
1039      headF->dp->mpvals[j] = 1.0f - (peaks[j] * ftemp);
1040    }
1041    ncand++;			/* include the unvoiced candidate */
1042    headF->dp->ncands = ncand;
1043
1044    /*********************************************************************/
1045    /*    COMPUTE THE DISTANCE MEASURES AND ACCUMULATE THE COSTS.       */
1046    /*********************************************************************/
1047
1048    ncandp = headF->prev->dp->ncands;
1049    for(k=0; k<ncand; k++){	/* for each of the current candidates... */
1050      minloc = 0;
1051      errmin = FLT_MAX;
1052      if((loc2 = headF->dp->locs[k]) > 0) { /* current cand. is voiced */
1053	for(j=0; j<ncandp; j++){ /* for each PREVIOUS candidate... */
1054	  /*    Get cost due to inter-frame period change. */
1055	  loc1 = headF->prev->dp->locs[j];
1056	  if (loc1 > 0) { /* prev. was voiced */
1057	    ftemp = (float) log(((double) loc2) / loc1);
1058	    ttemp = (float) fabs(ftemp);
1059	    ft1 = (float) (fdouble + fabs(ftemp + ln2));
1060	    if (ttemp > ft1)
1061	      ttemp = ft1;
1062	    ft1 = (float) (fdouble + fabs(ftemp - ln2));
1063	    if (ttemp > ft1)
1064	      ttemp = ft1;
1065	    ferr = ttemp * freqwt;
1066	  } else {		/* prev. was unvoiced */
1067	    ferr = tcost + (tfact_s * sta[i]) + (tfact_a / rms_ratio[i]);
1068	  }
1069	  /*    Add in cumulative cost associated with previous peak. */
1070	  err = ferr + headF->prev->dp->dpvals[j];
1071	  if(err < errmin){	/* find min. cost */
1072	    errmin = err;
1073	    minloc = j;
1074	  }
1075	}
1076      } else {			/* this is the unvoiced candidate */
1077	for(j=0; j<ncandp; j++){ /* for each PREVIOUS candidate... */
1078
1079	  /*    Get voicing transition cost. */
1080	  if (headF->prev->dp->locs[j] > 0) { /* previous was voiced */
1081	    ferr = tcost + (tfact_s * sta[i]) + (tfact_a * rms_ratio[i]);
1082	  }
1083	  else
1084	    ferr = 0.0;
1085	  /*    Add in cumulative cost associated with previous peak. */
1086	  err = ferr + headF->prev->dp->dpvals[j];
1087	  if(err < errmin){	/* find min. cost */
1088	    errmin = err;
1089	    minloc = j;
1090	  }
1091	}
1092      }
1093      /* Now have found the best path from this cand. to prev. frame */
1094      if (first_time && i==0) {		/* this is the first frame */
1095	headF->dp->dpvals[k] = headF->dp->mpvals[k];
1096	headF->dp->prept[k] = 0;
1097      } else {
1098	headF->dp->dpvals[k] = errmin + headF->dp->mpvals[k];
1099	headF->dp->prept[k] = minloc;
1100      }
1101    } /*    END OF THIS DP FRAME */
1102
1103    if (i < nframes - 1)
1104      headF = headF->next;
1105
1106    if (debug_level >= 2) {
1107      Fprintf(stderr,"%d engref:%10.0f max:%7.5f loc:%4d\n",
1108	      i,engref,maxval,maxloc);
1109    }
1110
1111  } /* end for (i ...) */
1112
1113  /***************************************************************/
1114  /* DONE WITH FILLING DP STRUCTURES FOR THE SET OF SAMPLED DATA */
1115  /*    NOW FIND A CONVERGED DP PATH                             */
1116  /***************************************************************/
1117
1118  *vecsize = 0;			/* # of output frames returned */
1119
1120  num_active_frames += nframes;
1121
1122  if( num_active_frames >= size_frame_hist  || last_time ){
1123    Frame *frm;
1124    int  num_paths, best_cand, frmcnt, checkpath_done = 1;
1125    float patherrmin;
1126
1127    if(debug_level)
1128      Fprintf(stderr, "available frames for backtracking: %d\n",
1129num_active_frames);
1130
1131    patherrmin = FLT_MAX;
1132    best_cand = 0;
1133    num_paths = headF->dp->ncands;
1134
1135    /* Get the best candidate for the final frame and initialize the
1136       paths' backpointers. */
1137    frm = headF;
1138    for(k=0; k < num_paths; k++) {
1139      if (patherrmin > headF->dp->dpvals[k]){
1140	patherrmin = headF->dp->dpvals[k];
1141	best_cand = k;	/* index indicating the best candidate at a path */
1142      }
1143      pcands[k] = frm->dp->prept[k];
1144    }
1145
1146    if(last_time){     /* Input data was exhausted. force final outputs. */
1147      cmpthF = headF;		/* Use the current frame as starting point. */
1148    } else {
1149      /* Starting from the most recent frame, trace back each candidate's
1150	 best path until reaching a common candidate at some past frame. */
1151      frmcnt = 0;
1152      while (1) {
1153	frm = frm->prev;
1154	frmcnt++;
1155	checkpath_done = 1;
1156	for(k=1; k < num_paths; k++){ /* Check for convergence. */
1157	  if(pcands[0] != pcands[k])
1158	    checkpath_done = 0;
1159	}
1160	if( ! checkpath_done) { /* Prepare for checking at prev. frame. */
1161	  for(k=0; k < num_paths; k++){
1162	    pcands[k] = frm->dp->prept[pcands[k]];
1163	  }
1164	} else {	/* All paths have converged. */
1165	  cmpthF = frm;
1166	  best_cand = pcands[0];
1167	  if(debug_level)
1168	    Fprintf(stderr,
1169		    "paths went back %d frames before converging\n",frmcnt);
1170	  break;
1171	}
1172	if(frm == tailF){	/* Used all available data? */
1173	  if( num_active_frames < size_frame_out) { /* Delay some more? */
1174	    checkpath_done = 0; /* Yes, don't backtrack at this time. */
1175	    cmpthF = NULL;
1176	  } else {		/* No more delay! Force best-guess output. */
1177	    checkpath_done = 1;
1178	    cmpthF = headF;
1179	    /*	    Fprintf(stderr,
1180		    "WARNING: no converging path found after going back %d frames, will use the lowest cost path\n",num_active_frames);*/
1181	  }
1182	  break;
1183	} /* end if (frm ...) */
1184      }	/* end while (1) */
1185    } /* end if (last_time) ... else */
1186
1187    /*************************************************************/
1188    /* BACKTRACKING FROM cmpthF (best_cand) ALL THE WAY TO tailF    */
1189    /*************************************************************/
1190    i = 0;
1191    frm = cmpthF;	/* Start where convergence was found (or faked). */
1192    while( frm != tailF->prev && checkpath_done){
1193      if( i == output_buf_size ){ /* Need more room for outputs? */
1194	output_buf_size *= 2;
1195	if(debug_level)
1196	  Fprintf(stderr,
1197		  "reallocating space for output frames: %d\n",
1198		  output_buf_size);
1199	rms_speech = (float *) ckrealloc((void *) rms_speech,
1200				       sizeof(float) * output_buf_size);
1201	/*	spsassert(rms_speech, "rms_speech realloc failed in dp_f0()");*/
1202	f0p = (float *) ckrealloc((void *) f0p,
1203				sizeof(float) * output_buf_size);
1204	/*	spsassert(f0p, "f0p realloc failed in dp_f0()");*/
1205	vuvp = (float *) ckrealloc((void *) vuvp, sizeof(float) * output_buf_size);
1206	/*	spsassert(vuvp, "vuvp realloc failed in dp_f0()");*/
1207	acpkp = (float *) ckrealloc((void *) acpkp, sizeof(float) * output_buf_size);
1208	/*	spsassert(acpkp, "acpkp realloc failed in dp_f0()");*/
1209      }
1210      rms_speech[i] = frm->rms;
1211      acpkp[i] =  frm->dp->pvals[best_cand];
1212      loc1 = frm->dp->locs[best_cand];
1213      vuvp[i] = 1.0;
1214      best_cand = frm->dp->prept[best_cand];
1215      ftemp = (float) loc1;
1216      if(loc1 > 0) {		/* Was f0 actually estimated for this frame? */
1217	if (loc1 > start && loc1 < stop) { /* loc1 must be a local maximum. */
1218	  float cormax, cprev, cnext, den;
1219
1220	  j = loc1 - start;
1221	  cormax = frm->cp->correl[j];
1222	  cprev = frm->cp->correl[j+1];
1223	  cnext = frm->cp->correl[j-1];
1224	  den = (float) (2.0 * ( cprev + cnext - (2.0 * cormax) ));
1225	  /*
1226	   * Only parabolic interpolate if cormax is indeed a local
1227	   * turning point. Find peak of curve that goes though the 3 points
1228	   */
1229
1230	  if (fabs(den) > 0.000001)
1231	    ftemp += 2.0f - ((((5.0f*cprev)+(3.0f*cnext)-(8.0f*cormax))/den));
1232	}
1233	f0p[i] = (float) (freq/ftemp);
1234      } else {		/* No valid estimate; just fake some arbitrary F0. */
1235	f0p[i] = 0;
1236	vuvp[i] = 0.0;
1237      }
1238      frm = frm->prev;
1239
1240      if (debug_level >= 2)
1241	Fprintf(stderr," i:%4d%8.1f%8.1f\n",i,f0p[i],vuvp[i]);
1242      /* f0p[i] starts from the most recent one */
1243      /* Need to reverse the order in the calling function */
1244      i++;
1245    } /* end while() */
1246    if (checkpath_done){
1247      *vecsize = i;
1248      tailF = cmpthF->next;
1249      num_active_frames -= *vecsize;
1250    }
1251  } /* end if() */
1252
1253  if (debug_level)
1254    Fprintf(stderr, "writing out %d frames.\n", *vecsize);
1255
1256  *f0p_pt = f0p;
1257  *vuvp_pt = vuvp;
1258  *acpkp_pt = acpkp;
1259  *rms_speech_pt = rms_speech;
1260  /*  *acpkp_pt = acpkp;*/
1261
1262  if(first_time) first_time = 0;
1263  return(0);
1264}
1265
1266
1267/*--------------------------------------------------------------------*/
1268Frame *
1269alloc_frame(nlags, ncands)
1270    int     nlags, ncands;
1271{
1272  Frame *frm;
1273  int j;
1274
1275  frm = (Frame*)ckalloc(sizeof(Frame));
1276  frm->dp = (Dprec *) ckalloc(sizeof(Dprec));
1277  /*  spsassert(frm->dp,"frm->dp ckalloc failed in alloc_frame");*/
1278  frm->dp->ncands = 0;
1279  frm->cp = (Cross *) ckalloc(sizeof(Cross));
1280  /*  spsassert(frm->cp,"frm->cp ckalloc failed in alloc_frame");*/
1281  frm->cp->correl = (float *) ckalloc(sizeof(float) * nlags);
1282  /*  spsassert(frm->cp->correl, "frm->cp->correl ckalloc failed");*/
1283  /* Allocate space for candidates and working arrays. */
1284  frm->dp->locs = (short*)ckalloc(sizeof(short) * ncands);
1285  /*  spsassert(frm->dp->locs,"frm->dp->locs ckalloc failed in alloc_frame()");*/
1286  frm->dp->pvals = (float*)ckalloc(sizeof(float) * ncands);
1287/*  spsassert(frm->dp->pvals,"frm->dp->pvals ckalloc failed in alloc_frame()");*/
1288  frm->dp->mpvals = (float*)ckalloc(sizeof(float) * ncands);
1289  /*  spsassert(frm->dp->mpvals,"frm->dp->mpvals ckalloc failed in alloc_frame()");*/
1290  frm->dp->prept = (short*)ckalloc(sizeof(short) * ncands);
1291  /*  spsassert(frm->dp->prept,"frm->dp->prept ckalloc failed in alloc_frame()");*/
1292  frm->dp->dpvals = (float*)ckalloc(sizeof(float) * ncands);
1293  /*  spsassert(frm->dp->dpvals,"frm->dp->dpvals ckalloc failed in alloc_frame()");*/
1294
1295  /*  Initialize the cumulative DP costs to zero */
1296  for(j = ncands-1; j >= 0; j--)
1297    frm->dp->dpvals[j] = 0.0;
1298
1299  return(frm);
1300}
1301
1302
1303/*--------------------------------------------------------------------*/
1304/* push window stat to stack, and pop the oldest one */
1305
1306static int
1307save_windstat(rho, order, err, rms)
1308    float   *rho;
1309    int     order;
1310    float   err;
1311    float   rms;
1312{
1313    int i,j;
1314
1315    if(wReuse > 1){               /* push down the stack */
1316	for(j=1; j<wReuse; j++){
1317	    for(i=0;i<=order; i++) windstat[j-1].rho[i] = windstat[j].rho[i];
1318	    windstat[j-1].err = windstat[j].err;
1319	    windstat[j-1].rms = windstat[j].rms;
1320	}
1321	for(i=0;i<=order; i++) windstat[wReuse-1].rho[i] = rho[i]; /*save*/
1322	windstat[wReuse-1].err = (float) err;
1323	windstat[wReuse-1].rms = (float) rms;
1324	return 1;
1325    } else if (wReuse == 1) {
1326	for(i=0;i<=order; i++) windstat[0].rho[i] = rho[i];  /* save */
1327	windstat[0].err = (float) err;
1328	windstat[0].rms = (float) rms;
1329	return 1;
1330    } else
1331	return 0;
1332}
1333
1334
1335/*--------------------------------------------------------------------*/
1336static int
1337retrieve_windstat(rho, order, err, rms)
1338    float   *rho;
1339    int     order;
1340    float   *err;
1341    float   *rms;
1342{
1343    Windstat wstat;
1344    int i;
1345
1346    if(wReuse){
1347	wstat = windstat[0];
1348	for(i=0; i<=order; i++) rho[i] = wstat.rho[i];
1349	*err = wstat.err;
1350	*rms = wstat.rms;
1351	return 1;
1352    }
1353    else return 0;
1354}
1355
1356
1357/*--------------------------------------------------------------------*/
1358static float
1359get_similarity(order, size, pdata, cdata,
1360	       rmsa, rms_ratio, pre, stab, w_type, init)
1361    int     order, size;
1362    float   *pdata, *cdata;
1363    float   *rmsa, *rms_ratio, pre, stab;
1364    int     w_type, init;
1365{
1366  float rho3[BIGSORD+1], err3, rms3, rmsd3, b0, t, a2[BIGSORD+1],
1367      rho1[BIGSORD+1], a1[BIGSORD+1], b[BIGSORD+1], err1, rms1, rmsd1;
1368  float xitakura(), wind_energy();
1369  void xa_to_aca ();
1370  int xlpc();
1371
1372/* (In the lpc() calls below, size-1 is used, since the windowing and
1373   preemphasis function assumes an extra point is available in the
1374   input data array.  This condition is apparently no longer met after
1375   Derek's modifications.) */
1376
1377  /* get current window stat */
1378  xlpc(order, stab, size-1, cdata,
1379      a2, rho3, (float *) NULL, &err3, &rmsd3, pre, w_type);
1380  rms3 = wind_energy(cdata, size, w_type);
1381
1382  if(!init) {
1383      /* get previous window stat */
1384      if( !retrieve_windstat(rho1, order, &err1, &rms1)){
1385	  xlpc(order, stab, size-1, pdata,
1386	      a1, rho1, (float *) NULL, &err1, &rmsd1, pre, w_type);
1387	  rms1 = wind_energy(pdata, size, w_type);
1388      }
1389      xa_to_aca(a2+1,b,&b0,order);
1390      t = xitakura(order,b,&b0,rho1+1,&err1) - .8f;
1391      if(rms1 > 0.0)
1392	  *rms_ratio = (0.001f + rms3)/rms1;
1393      else
1394	  if(rms3 > 0.0)
1395	      *rms_ratio = 2.0;	/* indicate some energy increase */
1396	  else
1397	      *rms_ratio = 1.0;	/* no change */
1398  } else {
1399      *rms_ratio = 1.0;
1400      t = 10.0;
1401  }
1402  *rmsa = rms3;
1403  save_windstat( rho3, order, err3, rms3);
1404  return((float)(0.2/t));
1405}
1406
1407
1408/* -------------------------------------------------------------------- */
1409/* This is an ad hoc signal stationarity function based on Itakura
1410 * distance and relative amplitudes.
1411 */
1412/*
1413  This illustrates the window locations when the very first frame is read.
1414  It shows an example where each frame step |  .  | is 10 msec.  The
1415  frame step size is variable.  The window size is always 30 msec.
1416  The window centers '*' is always 20 msec apart.
1417  The windows cross each other right at the center of the DP frame, or
1418  where the '.' is.
1419
1420                          ---------*---------   current window
1421
1422              ---------*---------  previous window
1423
1424  |  .  |  .  |  .  |  .  |  .  |  .  |  .  |  .  |  .  |
1425              ^           ^  ^
1426              ^           ^  ^
1427              ^           ^  fdata
1428              ^           ^
1429              ^           q
1430	      p
1431
1432                          ---
1433                          ind
1434
1435  fdata, q, p, ind, are variables used below.
1436
1437*/
1438
1439static Stat *stat = NULL;
1440static float *mem = NULL;
1441
1442static Stat*
1443get_stationarity(fdata, freq, buff_size, nframes, frame_step, first_time)
1444    float   *fdata;
1445    double  freq;
1446    int     buff_size, nframes, frame_step, first_time;
1447{
1448  static int nframes_old = 0, memsize;
1449  float preemp = 0.4f, stab = 30.0f;
1450  float *p, *q, *r, *datend;
1451  int ind, i, j, m, size, order, agap, w_type = 3;
1452
1453  agap = (int) (STAT_AINT *freq);
1454  size = (int) (STAT_WSIZE * freq);
1455  ind = (agap - size) / 2;
1456
1457  if( nframes_old < nframes || !stat || first_time){
1458    /* move this to init_dp_f0() later */
1459    nframes_old = nframes;
1460    if(stat){
1461      ckfree((char *) stat->stat);
1462      ckfree((char *) stat->rms);
1463      ckfree((char *) stat->rms_ratio);
1464      ckfree((char *) stat);
1465    }
1466    if (mem) ckfree((void *)mem);
1467    stat = (Stat *) ckalloc(sizeof(Stat));
1468    /*    spsassert(stat,"stat ckalloc failed in get_stationarity");*/
1469    stat->stat = (float*)ckalloc(sizeof(float)*nframes);
1470    /*    spsassert(stat->stat,"stat->stat ckalloc failed in get_stationarity");*/
1471    stat->rms = (float*)ckalloc(sizeof(float)*nframes);
1472    /*    spsassert(stat->rms,"stat->rms ckalloc failed in get_stationarity");*/
1473    stat->rms_ratio = (float*)ckalloc(sizeof(float)*nframes);
1474    /*    spsassert(stat->rms_ratio,"stat->ratio ckalloc failed in get_stationarity");*/
1475    memsize = (int) (STAT_WSIZE * freq) + (int) (STAT_AINT * freq);
1476    mem = (float *) ckalloc( sizeof(float) * memsize);
1477    /*    spsassert(mem, "mem ckalloc failed in get_stationarity()");*/
1478    for(j=0; j<memsize; j++) mem[j] = 0;
1479  }
1480
1481  if(nframes == 0) return(stat);
1482
1483  q = fdata + ind;
1484  datend = fdata + buff_size;
1485
1486  if((order = (int) (2.0 + (freq/1000.0))) > BIGSORD) {
1487    Fprintf(stderr,
1488	    "Optimim order (%d) exceeds that allowable (%d); reduce Fs\n",order, BIGSORD);
1489    order = BIGSORD;
1490  }
1491
1492  /* prepare for the first frame */
1493  for(j=memsize/2, i=0; j<memsize; j++, i++) mem[j] = fdata[i];
1494
1495  /* never run over end of frame, should already taken care of when read */
1496
1497  for(j=0, p = q - agap; j < nframes; j++, p += frame_step, q += frame_step){
1498      if( (p >= fdata) && (q >= fdata) && ( q + size <= datend) )
1499	  stat->stat[j] = get_similarity(order,size, p, q,
1500					     &(stat->rms[j]),
1501					     &(stat->rms_ratio[j]),preemp,
1502					     stab,w_type, 0);
1503      else {
1504	  if(first_time) {
1505	      if( (p < fdata) && (q >= fdata) && (q+size <=datend) )
1506		  stat->stat[j] = get_similarity(order,size, NULL, q,
1507						     &(stat->rms[j]),
1508						     &(stat->rms_ratio[j]),
1509						     preemp,stab,w_type, 1);
1510	      else{
1511		  stat->rms[j] = 0.0;
1512		  stat->stat[j] = 0.01f * 0.2f;   /* a big transition */
1513		  stat->rms_ratio[j] = 1.0;   /* no amplitude change */
1514	      }
1515	  } else {
1516	      if( (p<fdata) && (q+size <=datend) ){
1517		  stat->stat[j] = get_similarity(order,size, mem,
1518						     mem + (memsize/2) + ind,
1519						     &(stat->rms[j]),
1520						     &(stat->rms_ratio[j]),
1521						     preemp, stab,w_type, 0);
1522		  /* prepare for the next frame_step if needed */
1523		  if(p + frame_step < fdata ){
1524		      for( m=0; m<(memsize-frame_step); m++)
1525			  mem[m] = mem[m+frame_step];
1526		      r = q + size;
1527		      for( m=0; m<frame_step; m++)
1528			  mem[memsize-frame_step+m] = *r++;
1529		  }
1530	      }
1531	  }
1532      }
1533  }
1534
1535  /* last frame, prepare for next call */
1536  for(j=(memsize/2)-1, p=fdata + (nframes * frame_step)-1; j>=0 && p>=fdata; j--)
1537    mem[j] = *p--;
1538  return(stat);
1539}
1540
1541
1542/* -------------------------------------------------------------------- */
1543/*	Round the argument to the nearest integer.			*/
1544/*
1545int
1546eround(flnum)
1547    double  flnum;
1548{
1549  return((flnum >= 0.0) ? (int)(flnum + 0.5) : (int)(flnum - 0.5));
1550}
1551
1552*/
1553void free_dp_f0()
1554{
1555  int i;
1556  Frame *frm, *next;
1557
1558  ckfree((void *)pcands);
1559  pcands = NULL;
1560
1561  ckfree((void *)rms_speech);
1562  rms_speech = NULL;
1563
1564  ckfree((void *)f0p);
1565  f0p = NULL;
1566
1567  ckfree((void *)vuvp);
1568  vuvp = NULL;
1569
1570  ckfree((void *)acpkp);
1571  acpkp = NULL;
1572
1573  ckfree((void *)peaks);
1574  peaks = NULL;
1575
1576  ckfree((void *)locs);
1577  locs = NULL;
1578
1579  if (wReuse) {
1580    ckfree((void *)windstat);
1581    windstat = NULL;
1582  }
1583
1584  frm = headF;
1585
1586  for(i = 0; i < size_cir_buffer; i++) {
1587    next = frm->next;
1588    ckfree((void *)frm->cp->correl);
1589    ckfree((void *)frm->dp->locs);
1590    ckfree((void *)frm->dp->pvals);
1591    ckfree((void *)frm->dp->mpvals);
1592    ckfree((void *)frm->dp->prept);
1593    ckfree((void *)frm->dp->dpvals);
1594    ckfree((void *)frm->cp);
1595    ckfree((void *)frm->dp);
1596    ckfree((void *)frm);
1597    frm = next;
1598  }
1599  headF = NULL;
1600  tailF = NULL;
1601
1602  ckfree((void *)stat->stat);
1603  ckfree((void *)stat->rms);
1604  ckfree((void *)stat->rms_ratio);
1605
1606  ckfree((void *)stat);
1607  stat = NULL;
1608
1609  ckfree((void *)mem);
1610  mem = NULL;
1611}
1612
1613int
1614cGet_f0(Sound *sound, Tcl_Interp *interp, float **outlist, int *length)
1615{
1616  float *fdata;
1617  int done;
1618  long buff_size, actsize;
1619  double sf, start_time;
1620  F0_params *par, *read_f0_params();
1621  float *f0p, *vuvp, *rms_speech, *acpkp;
1622  int i, vecsize;
1623  int init_dp_f0(), dp_f0();
1624  static int framestep = -1;
1625  long sdstep = 0, total_samps;
1626  int ndone = 0;
1627  Tcl_Obj *list;
1628  float *tmp = (float *)ckalloc(sizeof(float) * (5 + sound->length / 80));
1629  int count = 0;
1630  int startpos = 0, endpos = -1;
1631
1632  if (sound->cmdPtr != NULL) {
1633    Tcl_DecrRefCount(sound->cmdPtr);
1634    sound->cmdPtr = NULL;
1635  }
1636
1637  par = (F0_params *) ckalloc(sizeof(F0_params));
1638  par->cand_thresh = 0.3f;
1639  par->lag_weight = 0.3f;
1640  par->freq_weight = 0.02f;
1641  par->trans_cost = 0.005f;
1642  par->trans_amp = 0.5f;
1643  par->trans_spec = 0.5f;
1644  par->voice_bias = 0.0f;
1645  par->double_cost = 0.35f;
1646  par->min_f0 = 50;
1647  par->max_f0 = 550;
1648  par->frame_step = 0.01f;
1649  par->wind_dur = 0.0075f;
1650  par->n_cands = 20;
1651  par->mean_f0 = 200;          /* unused */
1652  par->mean_f0_weight = 0.0f;  /* unused */
1653  par->conditioning = 0;       /* unused */
1654
1655  if (startpos < 0) startpos = 0;
1656  if (endpos >= (sound->length - 1) || endpos == -1)
1657    endpos = sound->length - 1;
1658  if (startpos > endpos) return TCL_OK;
1659
1660  sf = (double) sound->samprate;
1661
1662  if (framestep > 0)  /* If a value was specified with -S, use it. */
1663    par->frame_step = (float) (framestep / sf);
1664  start_time = 0.0f;
1665  if(check_f0_params(interp, par, sf)){
1666    Tcl_AppendResult(interp, "invalid/inconsistent parameters -- exiting.", NULL);
1667    return TCL_ERROR;
1668  }
1669
1670  total_samps = endpos - startpos + 1;
1671  if(total_samps < ((par->frame_step * 2.0) + par->wind_dur) * sf) {
1672    Tcl_AppendResult(interp, "input range too small for analysis by get_f0.", NULL);
1673    return TCL_ERROR;
1674  }
1675  /* Initialize variables in get_f0.c; allocate data structures;
1676   * determine length and overlap of input frames to read.
1677   */
1678  if (init_dp_f0(sf, par, &buff_size, &sdstep)
1679      || buff_size > INT_MAX || sdstep > INT_MAX)
1680  {
1681    Tcl_AppendResult(interp, "problem in init_dp_f0().", NULL);
1682    return TCL_ERROR;
1683  }
1684
1685  if (debug_level)
1686    Fprintf(stderr, "init_dp_f0 returned buff_size %ld, sdstep %ld.\n",buff_size, sdstep);
1687
1688  if (buff_size > total_samps)
1689    buff_size = total_samps;
1690
1691  actsize = min(buff_size, sound->length);
1692  fdata = (float *) ckalloc(sizeof(float) * max(buff_size, sdstep));
1693  list = Tcl_NewListObj(0, NULL);
1694  /*  Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 0.0);*/
1695  ndone = startpos;
1696
1697  while (TRUE) {
1698    done = (actsize < buff_size) || (total_samps == buff_size);
1699    Snack_GetSoundData(sound, ndone, fdata, actsize);
1700    /*if (sound->debug > 0) Snack_WriteLog("dp_f0...\n");*/
1701    if (dp_f0(fdata, (int) actsize, (int) sdstep, sf, par,
1702	      &f0p, &vuvp, &rms_speech, &acpkp, &vecsize, done)) {
1703      Tcl_AppendResult(interp, "problem in dp_f0().", NULL);
1704      return TCL_ERROR;
1705    }
1706    /*if (sound->debug > 0) Snack_WriteLogInt("done dp_f0",vecsize);*/
1707    for (i = vecsize - 1; i >= 0; i--) {
1708      tmp[count] = f0p[i];
1709      count++;
1710    }
1711
1712    if (done) break;
1713
1714    ndone += sdstep;
1715    actsize = min(buff_size, sound->length - ndone);
1716    total_samps -= sdstep;
1717
1718    if (actsize > total_samps)
1719      actsize = total_samps;
1720
1721    /*    if (1) {
1722      int res = Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", (double) ndone / sound->length);
1723      if (res != TCL_OK) {
1724	return TCL_ERROR;
1725      }
1726      }*/
1727  }
1728
1729  /*Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 1.0);*/
1730
1731  ckfree((void *)fdata);
1732
1733  ckfree((void *)par);
1734
1735  free_dp_f0();
1736
1737  *outlist = tmp;
1738  *length = count;
1739  /*Tcl_SetObjResult(interp, list);*/
1740
1741  return TCL_OK;
1742}
1743