1/* formant.c */
2/*
3 * This software has been licensed to the Centre of Speech Technology, KTH
4 * by AT&T Corp. and Microsoft Corp. with the terms in the accompanying
5 * file BSD.txt, which is a BSD style license.
6 *
7 *    "Copyright (c) 1987-1990  AT&T, Inc.
8 *    "Copyright (c) 1986-1990  Entropic Speech, Inc.
9 *    "Copyright (c) 1990-1994  Entropic Research Laboratory, Inc.
10 *                   All rights reserved"
11 *
12 * Written by:  David Talkin
13 * Revised by: John Shore
14 *
15 */
16
17#include "snack.h"
18#include <stdio.h>
19#include <math.h>
20#include <stdlib.h>
21#include <string.h>
22#include "jkFormant.h"
23
24
25int debug = 0;
26int w_verbose = 0;
27
28/*	dpform.c       */
29
30/* a formant tracker based on LPC polynomial roots and dynamic programming */
31				/***/
32/* At each frame, the LPC poles are ordered by increasing frequency.  All
33   "reasonable" mappings of the poles to F1, F2, ... are performed.
34   The cost of "connecting" each of these mappings with each of the mappings
35   in the previous frame is computed.  The lowest cost connection is then
36   chosen as the optimum one.  At each frame, each mapping has associated
37   with it a cost based on the formant bandwidths and frequencies.  This
38   "local" cost is finally added to the cost of the best "connection."  At
39   end of utterance (or after a reasonable delay like .5sec) the best
40   mappings for the entire utterance may be found by retracing back through
41   best candidate mappings, starting at end of utterance (or current frame).
42*/
43
44/* Here are the major fudge factors for tweaking the formant tracker. */
45#define MAXCAN	300  /* maximum number of candidate mappings allowed */
46static double MISSING = 1, /* equivalent delta-Hz cost for missing formant */
47	NOBAND = 1000, /* equivalent bandwidth cost of a missing formant */
48	DF_FACT =  20.0, /* cost for proportional frequency changes */
49	/* with good "stationarity" function:*/
50  /*        DF_FACT =  80.0, *//*  cost for proportional frequency changes */
51	DFN_FACT = 0.3, /* cost for proportional dev. from nominal freqs. */
52	BAND_FACT = .002, /* cost per Hz of bandwidth in the poles */
53/*	F_BIAS	  = 0.0004,   bias toward selecting low-freq. poles */
54	F_BIAS	  = 0.000, /*  bias toward selecting low-freq. poles */
55	F_MERGE = 2000.0; /* cost of mapping f1 and f2 to same frequency */
56static double	*fre,
57		fnom[]  = {  500, 1500, 2500, 3500, 4500, 5500, 6500},/*  "nominal" freqs.*/
58		fmins[] = {   50,  400, 1000, 2000, 2000, 3000, 3000}, /* frequency bounds */
59		fmaxs[] = { 1500, 3500, 4500, 5000, 6000, 6000, 8000}; /* for 1st 5 formants */
60
61static int	maxp,	/* number of poles to consider */
62		maxf,	/* number of formants to find */
63		ncan,  domerge = TRUE;
64
65static short **pc;
66
67static int canbe(pnumb, fnumb) /* can this pole be this freq.? */
68int	pnumb, fnumb;
69{
70return((fre[pnumb] >= fmins[fnumb])&&(fre[pnumb] <= fmaxs[fnumb]));
71}
72
73/* This does the real work of mapping frequencies to formants. */
74static void candy(cand,pnumb,fnumb)
75     int	cand, /* candidate number being considered */
76       pnumb, /* pole number under consideration */
77       fnumb;	/* formant number under consideration */
78{
79  int i,j;
80
81  if(fnumb < maxf) pc[cand][fnumb] = -1;
82  if((pnumb < maxp)&&(fnumb < maxf)){
83    /*   printf("\ncan:%3d  pnumb:%3d  fnumb:%3d",cand,pnumb,fnumb); */
84    if(canbe(pnumb,fnumb)){
85      pc[cand][fnumb] = pnumb;
86      if(domerge&&(fnumb==0)&&(canbe(pnumb,fnumb+1))){ /* allow for f1,f2 merger */
87	ncan++;
88	pc[ncan][0] = pc[cand][0];
89	candy(ncan,pnumb,fnumb+1); /* same pole, next formant */
90      }
91      candy(cand,pnumb+1,fnumb+1); /* next formant; next pole */
92      if(((pnumb+1) < maxp) && canbe(pnumb+1,fnumb)){
93	/* try other frequencies for this formant */
94	ncan++;			/* add one to the candidate index/tally */
95	/*		printf("\n%4d  %4d  %4d",ncan,pnumb+1,fnumb); */
96	for(i=0; i<fnumb; i++)	/* clone the lower formants */
97	  pc[ncan][i] = pc[cand][i];
98	candy(ncan,pnumb+1,fnumb);
99      }
100    } else {
101      candy(cand,pnumb+1,fnumb);
102    }
103  }
104  /* If all pole frequencies have been examined without finding one which
105     will map onto the current formant, go on to the next formant leaving the
106     current formant null. */
107  if((pnumb >= maxp) && (fnumb < maxf-1) && (pc[cand][fnumb] < 0)){
108    if(fnumb){
109      j=fnumb-1;
110      while((j>0) && pc[cand][j] < 0) j--;
111      i = ((j=pc[cand][j]) >= 0)? j : 0;
112    } else i = 0;
113    candy(cand,i,fnumb+1);
114  }
115}
116
117/* Given a set of pole frequencies and allowable formant frequencies
118   for nform formants, calculate all possible mappings of pole frequencies
119   to formants, including, possibly, mappings with missing formants. */
120void get_fcand(npole,freq,band,nform,pcan)
121     int	npole, nform;
122     short **pcan;
123     double	*freq, *band; /* poles ordered by increasing FREQUENCY */
124{
125
126  ncan = 0;
127  pc = pcan;
128  fre = freq;
129  maxp = npole;
130  maxf = nform;
131  candy(ncan, 0, 0);
132  ncan++;	/* (converts ncan as an index to ncan as a candidate count) */
133}
134
135void set_nominal_freqs(f1)
136     double f1;
137{
138  int i;
139  for(i=0; i < MAXFORMANTS; i++) {
140    fnom[i] = ((i * 2) + 1) * f1;
141    fmins[i] = fnom[i] - ((i+1) * f1) + 50.0;
142    fmaxs[i] = fnom[i] + (i * f1) + 1000.0;
143  }
144}
145
146/*      ----------------------------------------------------------      */
147/* find the maximum in the "stationarity" function (stored in rms) */
148double get_stat_max(pole, nframes)
149     register POLE **pole;
150     register int nframes;
151{
152  register int i;
153  register double amax, t;
154
155  for(i=1, amax = (*pole++)->rms; i++ < nframes; )
156    if((t = (*pole++)->rms) > amax) amax = t;
157
158  return(amax);
159}
160
161Sound *dpform(ps, nform, nom_f1)
162     Sound *ps;
163     int nform;
164     double nom_f1;
165{
166  double pferr, conerr, minerr, dffact, ftemp, berr, ferr, bfact, ffact,
167         rmsmax, fbias, **fr, **ba, rmsdffact, merger=0.0, merge_cost,
168         FBIAS;
169  register int	i, j, k, l, ic, ip, mincan=0;
170  short	**pcan;
171  FORM	**fl;
172  POLE	**pole; /* raw LPC pole data structure array */
173  Sound *fbs;
174  int dmaxc,dminc,dcountc,dcountf;
175
176  if(ps) {
177    if(nom_f1 > 0.0)
178      set_nominal_freqs(nom_f1);
179    pole = (POLE**)ps->extHead;
180    rmsmax = get_stat_max(pole, ps->length);
181    FBIAS = F_BIAS /(.01 * ps->samprate);
182    /* Setup working values of the cost weights. */
183    dffact = (DF_FACT * .01) * ps->samprate; /* keep dffact scaled to frame rate */
184    bfact = BAND_FACT /(.01 * ps->samprate);
185    ffact = DFN_FACT /(.01 * ps->samprate);
186    merge_cost = F_MERGE;
187    if(merge_cost > 1000.0) domerge = FALSE;
188
189    /* Allocate space for the formant and bandwidth arrays to be passed back. */
190    if(debug & DEB_ENTRY){
191      printf("Allocating formant and bandwidth arrays in dpform()\n");
192    }
193    fr = (double**)ckalloc(sizeof(double*) * nform * 2);
194    ba = fr + nform;
195    for(i=0;i < nform*2; i++){
196      fr[i] = (double*)ckalloc(sizeof(double) * ps->length);
197    }
198    /*    cp = new_ext(ps->name,"fb");*/
199    /*    if((fbs=new_signal(cp,SIG_UNKNOWN,dup_header(ps->header),fr,ps->length,		       ps->samprate, nform * 2))) {*/
200    if (1) {
201      /* Allocate space for the raw candidate array. */
202      if(debug & DEB_ENTRY){
203	printf("Allocating raw candidate array in dpform()\n");
204      }
205      pcan = (short**)ckalloc(sizeof(short*) * MAXCAN);
206      for(i=0;i<MAXCAN;i++) pcan[i] = (short*)ckalloc(sizeof(short) * nform);
207
208      /* Allocate space for the dp lattice */
209      if(debug & DEB_ENTRY){
210	printf("Allocating DP lattice structure in dpform()\n");
211      }
212      fl = (FORM**)ckalloc(sizeof(FORM*) * ps->length);
213      for(i=0;i < ps->length; i++)
214	fl[i] = (FORM*)ckalloc(sizeof(FORM));
215
216      /*******************************************************************/
217      /* main formant tracking loop */
218      /*******************************************************************/
219      if(debug & DEB_ENTRY){
220	printf("Entering main computation loop in dpform()\n");
221      }
222      for(i=0; i < ps->length; i++){	/* for all analysis frames... */
223
224	ncan = 0;		/* initialize candidate mapping count to 0 */
225
226	/* moderate the cost of frequency jumps by the relative amplitude */
227	rmsdffact = pole[i]->rms;
228	rmsdffact = rmsdffact/rmsmax;
229	rmsdffact = rmsdffact * dffact;
230
231	/* Get all likely mappings of the poles onto formants for this frame. */
232	if(pole[i]->npoles){	/* if there ARE pole frequencies available... */
233	  get_fcand(pole[i]->npoles,pole[i]->freq,pole[i]->band,nform,pcan);
234
235	  /* Allocate space for this frame's candidates in the dp lattice. */
236	  fl[i]->prept =  (short*)ckalloc(sizeof(short) * ncan);
237	  fl[i]->cumerr = (double*)ckalloc(sizeof(double) * ncan);
238	  fl[i]->cand =   (short**)ckalloc(sizeof(short*) * ncan);
239	  for(j=0;j<ncan;j++){	/* allocate cand. slots and install candidates */
240	    fl[i]->cand[j] = (short*)ckalloc(sizeof(short) * nform);
241	    for(k=0; k<nform; k++)
242	      fl[i]->cand[j][k] = pcan[j][k];
243	  }
244	}
245	fl[i]->ncand = ncan;
246	/* compute the distance between the current and previous mappings */
247	for(j=0;j<ncan;j++){	/* for each CURRENT mapping... */
248	  if( i ){		/* past the first frame? */
249	    minerr = 0;
250	    if(fl[i-1]->ncand) minerr = 2.0e30;
251	    mincan = -1;
252	    for(k=0; k < fl[i-1]->ncand; k++){ /* for each PREVIOUS map... */
253	      for(pferr=0.0, l=0; l<nform; l++){
254		ic = fl[i]->cand[j][l];
255		ip = fl[i-1]->cand[k][l];
256		if((ic >= 0)	&& (ip >= 0)){
257		  ftemp = 2.0 * fabs(pole[i]->freq[ic] - pole[i-1]->freq[ip])/
258		           (pole[i]->freq[ic] + pole[i-1]->freq[ip]);
259    /*		  ftemp = pole[i]->freq[ic] - pole[i-1]->freq[ip];
260		  if(ftemp >= 0.0)
261		    ftemp = ftemp/pole[i-1]->freq[ip];
262		  else
263		    ftemp = ftemp/pole[i]->freq[ic]; */
264		  /* cost prop. to SQUARE of deviation to discourage large jumps */
265		  pferr += ftemp * ftemp;
266		}
267		else pferr += MISSING;
268	      }
269	      /* scale delta-frequency cost and add in prev. cum. cost */
270	      conerr = (rmsdffact * pferr) + fl[i-1]->cumerr[k];
271	      if(conerr < minerr){
272		minerr = conerr;
273		mincan = k;
274	      }
275	    }			/* end for each PREVIOUS mapping... */
276	  }	else {		/* (i.e. if this is the first frame... ) */
277	    minerr = 0;
278	  }
279
280	  fl[i]->prept[j] = mincan; /* point to best previous mapping */
281	  /* (Note that mincan=-1 if there were no candidates in prev. fr.) */
282	  /* Compute the local costs for this current mapping. */
283	  for(k=0, berr=0, ferr=0, fbias=0; k<nform; k++){
284	    ic = fl[i]->cand[j][k];
285	    if(ic >= 0){
286	      if( !k ){		/* F1 candidate? */
287		ftemp = pole[i]->freq[ic];
288		merger = (domerge &&
289			  (ftemp == pole[i]->freq[fl[i]->cand[j][1]]))?
290			  merge_cost: 0.0;
291	      }
292	      berr += pole[i]->band[ic];
293	      ferr += (fabs(pole[i]->freq[ic]-fnom[k])/fnom[k]);
294	      fbias += pole[i]->freq[ic];
295	    } else {		/* if there was no freq. for this formant */
296	      fbias += fnom[k];
297	      berr += NOBAND;
298	      ferr += MISSING;
299	    }
300	  }
301
302	  /* Compute the total cost of this mapping and best previous. */
303	  fl[i]->cumerr[j] = (FBIAS * fbias) + (bfact * berr) + merger +
304	                     (ffact * ferr) + minerr;
305	}			/* end for each CURRENT mapping... */
306
307	if(debug & DEB_LPC_PARS){
308	  printf("\nFrame %4d  # candidates:%3d stat:%f prms:%f",i,ncan,rmsdffact,pole[i]->rms);
309	  for (j=0; j<ncan; j++){
310	    printf("\n	");
311	    for(k=0; k<nform; k++)
312	      if(pcan[j][k] >= 0)
313		printf("%6.0f ",pole[i]->freq[fl[i]->cand[j][k]]);
314	      else
315		printf("  NA   ");
316	    printf("  cum:%7.2f pp:%d",fl[i]->cumerr[j], fl[i]->prept[j]);
317	  }
318	}
319      }				/* end for all analysis frames... */
320      /**************************************************************************/
321
322      /* Pick the candidate in the final frame with the lowest cost. */
323      /* Starting with that min.-cost cand., work back thru the lattice. */
324      if(debug & DEB_ENTRY){
325	printf("Entering backtrack loop in dpform()\n");
326      }
327      dmaxc = 0;
328      dminc = 100;
329      dcountc = dcountf = 0;
330      for(mincan = -1, i=ps->length - 1; i>=0; i--){
331	if(debug & DEB_LPC_PARS){
332	  printf("\nFrame:%4d mincan:%2d ncand:%2d ",i,mincan,fl[i]->ncand);
333	}
334	if(mincan < 0)		/* need to find best starting candidate? */
335	  if(fl[i]->ncand){	/* have candidates at this frame? */
336	    minerr = fl[i]->cumerr[0];
337	    mincan = 0;
338	    for(j=1; j<fl[i]->ncand; j++)
339	      if( fl[i]->cumerr[j] < minerr ){
340		minerr = fl[i]->cumerr[j];
341		mincan = j;
342	      }
343	  }
344	if(mincan >= 0){	/* if there is a "best" candidate at this frame */
345	  if((j = fl[i]->ncand) > dmaxc) dmaxc = j;
346	  else
347	    if( j < dminc) dminc = j;
348	  dcountc += j;
349	  dcountf++;
350	  for(j=0; j<nform; j++){
351	    k = fl[i]->cand[mincan][j];
352	    if(k >= 0){
353	      fr[j][i] = pole[i]->freq[k];
354	      if(debug & DEB_LPC_PARS){
355		printf("%6.0f",fr[j][i]);
356	      }
357	      ba[j][i] = pole[i]->band[k];
358	    } else {		/* IF FORMANT IS MISSING... */
359	      if(i < ps->length - 1){
360		fr[j][i] = fr[j][i+1]; /* replicate backwards */
361		ba[j][i] = ba[j][i+1];
362	      } else {
363		fr[j][i] = fnom[j]; /* or insert neutral values */
364		ba[j][i] = NOBAND;
365	      }
366	      if(debug & DEB_LPC_PARS){
367		printf("%6.0f",fr[j][i]);
368	      }
369	    }
370	  }
371	  mincan = fl[i]->prept[mincan];
372	} else {		/* if no candidates, fake with "nominal" frequencies. */
373	  for(j=0; j < nform; j++){
374	    fr[j][i] = fnom[j];
375	    ba[j][i] = NOBAND;
376	    if(debug & DEB_LPC_PARS){
377	      printf("%6.0f",fr[j][i]);
378	    }
379	  }
380	}			/* note that mincan will remain =-1 if no candidates */
381      }				/* end unpacking formant tracks from the dp lattice */
382      /* Deallocate all the DP lattice work space. */
383      /*if(debug & DEB_ENTRY){
384	printf("%s complete; max. cands:%d  min. cands.:%d average cands.:%f\n",
385	     fbs->name,dmaxc,dminc,((double)dcountc)/dcountf);
386	printf("Entering memory deallocation in dpform()\n");
387      }*/
388      for(i=ps->length - 1; i>=0; i--){
389	if(fl[i]->ncand){
390	  if(fl[i]->cand) {
391	    for(j=0; j<fl[i]->ncand; j++) ckfree((void *)fl[i]->cand[j]);
392	    ckfree((void *)fl[i]->cand);
393	    ckfree((void *)fl[i]->cumerr);
394	    ckfree((void *)fl[i]->prept);
395	  }
396	}
397      }
398      for(i=0; i<ps->length; i++)	ckfree((void *)fl[i]);
399      ckfree((void *)fl);
400      fl = 0;
401
402      for(i=0; i<ps->length; i++) {
403	ckfree((void *)pole[i]->freq);
404	ckfree((void *)pole[i]->band);
405	ckfree((void *)pole[i]);
406      }
407      ckfree((void *)pole);
408
409      /* Deallocate space for the raw candidate aray. */
410      for(i=0;i<MAXCAN;i++) ckfree((void *)pcan[i]);
411      ckfree((void *)pcan);
412
413      fbs = Snack_NewSound(ps->samprate, SNACK_FLOAT, nform * 2);
414      Snack_ResizeSoundStorage(fbs, ps->length);
415      for (i = 0; i < ps->length; i++) {
416	for (j = 0; j < nform * 2; j++) {
417	  Snack_SetSample(fbs, j, i, (float)fr[j][i]);
418	}
419      }
420      fbs->length = ps->length;
421
422      for(i = 0; i < nform*2; i++) ckfree((void *)fr[i]);
423      ckfree((void *)fr);
424
425      return(fbs);
426    } else
427      printf("Can't create a new Signal in dpform()\n");
428  } else
429    printf("Bad data pointers passed into dpform()\n");
430  return(NULL);
431}
432
433/* lpc_poles.c */
434
435/* computation and I/O routines for dealing with LPC poles */
436
437#define MAXORDER 30
438
439extern int formant(), lpc(), lpcbsa(), dlpcwtd(), w_covar();
440
441/*************************************************************************/
442double integerize(time, freq)
443     register double time, freq;
444{
445  register int i;
446
447  i = (int) (.5 + (freq * time));
448  return(((double)i)/freq);
449}
450
451/*	Round the argument to the nearest integer.			*/
452int eround(flnum)
453register double	flnum;
454{
455	return((flnum >= 0.0) ? (int)(flnum + 0.5) : (int)(flnum - 0.5));
456}
457
458/*************************************************************************/
459Sound *lpc_poles(sp,wdur,frame_int,lpc_ord,preemp,lpc_type,w_type)
460     Sound *sp;
461     int lpc_ord, lpc_type, w_type;
462     double wdur, frame_int, preemp;
463{
464  int i, j, size, step, nform, init, nfrm;
465  POLE **pole;
466  double lpc_stabl = 70.0, energy, lpca[MAXORDER], normerr,
467         *bap=NULL, *frp=NULL, *rhp=NULL;
468  short *datap, *dporg;
469  Sound *lp;
470
471  if(lpc_type == 1) { /* force "standard" stabilized covariance (ala bsa) */
472    wdur = 0.025;
473    preemp = exp(-62.831853 * 90. / sp->samprate); /* exp(-1800*pi*T) */
474  }
475  if((lpc_ord > MAXORDER) || (lpc_ord < 2)/* || (! ((short**)sp->data)[0])*/)
476    return(NULL);
477  /*  np = (char*)new_ext(sp->name,"pole");*/
478  wdur = integerize(wdur,(double)sp->samprate);
479  frame_int = integerize(frame_int,(double)sp->samprate);
480  nfrm= 1 + (int) (((((double)sp->length)/sp->samprate) - wdur)/(frame_int));
481  if(nfrm >= 1/*lp->buff_size >= 1*/) {
482    size = (int) (.5 + (wdur * sp->samprate));
483    step = (int) (.5 + (frame_int * sp->samprate));
484    pole = (POLE**)ckalloc(nfrm/*lp->buff_size*/ * sizeof(POLE*));
485    datap = dporg = (short *) ckalloc(sizeof(short) * sp->length);
486    for (i = 0; i < Snack_GetLength(sp); i++) {
487      datap[i] = (short) Snack_GetSample(sp, 0, i);
488    }
489    for(j=0, init=TRUE/*, datap=((short**)sp->data)[0]*/; j < nfrm/*lp->buff_size*/;j++, datap += step){
490      pole[j] = (POLE*)ckalloc(sizeof(POLE));
491      pole[j]->freq = frp = (double*)ckalloc(sizeof(double)*lpc_ord);
492      pole[j]->band = bap = (double*)ckalloc(sizeof(double)*lpc_ord);
493
494      switch(lpc_type) {
495      case 0:
496	if(! lpc(lpc_ord,lpc_stabl,size,datap,lpca,rhp,NULL,&normerr,
497		 &energy, preemp, w_type)){
498	  printf("Problems with lpc in lpc_poles()");
499	  break;
500	}
501	break;
502      case 1:
503	if(! lpcbsa(lpc_ord,lpc_stabl,size,datap,lpca,rhp,NULL,&normerr,
504		    &energy, preemp)){
505          printf("Problems with lpc in lpc_poles()");
506	  break;
507	}
508	break;
509      case 2:
510	{
511	  int Ord = lpc_ord;
512	  double alpha, r0;
513
514	  w_covar(datap, &Ord, size, 0, lpca, &alpha, &r0, preemp, 0);
515	  if((Ord != lpc_ord) || (alpha <= 0.0))
516	    printf("Problems with covar(); alpha:%f  Ord:%d\n",alpha,Ord);
517	  energy = sqrt(r0/(size-Ord));
518	}
519	break;
520      }
521      pole[j]->change = 0.0;
522       /* don't waste time on low energy frames */
523       if((pole[j]->rms = energy) > 1.0){
524	 formant(lpc_ord,(double)sp->samprate, lpca, &nform, frp, bap, init);
525	 pole[j]->npoles = nform;
526	 init=FALSE;		/* use old poles to start next search */
527       } else {			/* write out no pole frequencies */
528	 pole[j]->npoles = 0;
529	 init = TRUE;		/* restart root search in a neutral zone */
530       }
531/*     if(debug & 4) {
532	 printf("\nfr:%4d np:%4d rms:%7.0f  ",j,pole[j]->npoles,pole[j]->rms);
533	 for(k=0; k<pole[j]->npoles; k++)
534	   printf(" %7.1f",pole[j]->freq[k]);
535	 printf("\n                   ");
536	 for(k=0; k<pole[j]->npoles; k++)
537	   printf(" %7.1f",pole[j]->band[k]);
538	 printf("\n");
539	 }*/
540     } /* end LPC pole computation for all lp->buff_size frames */
541    /*     lp->data = (caddr_t)pole;*/
542    ckfree((void *)dporg);
543    lp = Snack_NewSound((int)(1.0/frame_int), LIN16, lpc_ord);
544    Snack_ResizeSoundStorage(lp, nfrm);
545    for (i = 0; i < nfrm; i++) {
546      for (j = 0; j < lpc_ord; j++) {
547      Snack_SetSample(lp, j, i, (float)pole[i]->freq[j]);
548      }
549    }
550    lp->length = nfrm;
551    lp->extHead = (char *)pole;
552    return(lp);
553  } else {
554    printf("Bad buffer size in lpc_poles()\n");
555  }
556  return(NULL);
557}
558
559/**********************************************************************/
560double frand()
561{
562  return (((double)rand())/(double)RAND_MAX);
563}
564
565/**********************************************************************/
566/* a quick and dirty interface to bsa's stabilized covariance LPC */
567#define NPM	30	/* max lpc order		*/
568
569int lpcbsa(np, lpc_stabl, wind, data, lpc, rho, nul1, nul2, energy, preemp)
570     int np, wind;
571     short *data;
572     double *lpc, *rho, *nul1, *nul2, *energy, lpc_stabl, preemp;
573{
574  static int i, mm, owind=0, wind1;
575  static double w[1000];
576  double rc[NPM],phi[NPM*NPM],shi[NPM],sig[1000];
577  double xl = .09, fham, amax;
578  register double *psp1, *psp3, *pspl;
579
580  if(owind != wind) {		/* need to compute a new window? */
581    fham = 6.28318506 / wind;
582    for(psp1=w,i=0;i<wind;i++,psp1++)
583      *psp1 = .54 - .46 * cos(i * fham);
584    owind = wind;
585  }
586  wind += np + 1;
587  wind1 = wind-1;
588
589  for(psp3=sig,pspl=sig+wind; psp3 < pspl; )
590    *psp3++ = (double)(*data++) + .016 * frand() - .008;
591  for(psp3=sig+1,pspl=sig+wind;psp3<pspl;psp3++)
592    *(psp3-1) = *psp3 - preemp * *(psp3-1);
593  for(amax = 0.,psp3=sig+np,pspl=sig+wind1;psp3<pspl;psp3++)
594    amax += *psp3 * *psp3;
595  *energy = sqrt(amax / (double)owind);
596  amax = 1.0/(*energy);
597
598  for(psp3=sig,pspl=sig+wind1;psp3<pspl;psp3++)
599    *psp3 *= amax;
600  if((mm=dlpcwtd(sig,&wind1,lpc,&np,rc,phi,shi,&xl,w))!=np) {
601    printf("LPCWTD error mm<np %d %d\n",mm,np);
602    return(FALSE);
603  }
604  return(TRUE);
605}
606
607/*	Copyright (c) 1987, 1988, 1989 AT&T	*/
608/*	  All Rights Reserved	*/
609
610/*	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T	*/
611/*	The copyright notice above does not evidence any	*/
612/*	actual or intended publication of such source code.	*/
613
614/* downsample.c */
615/* a quick and dirty downsampler */
616
617#ifndef TRUE
618# define TRUE 1
619# define FALSE 0
620#endif
621
622#define PI 3.1415927
623
624/*      ----------------------------------------------------------      */
625int lc_lin_fir(fc,nf,coef)
626/* create the coefficients for a symmetric FIR lowpass filter using the
627   window technique with a Hanning window. */
628register double	fc;
629double	coef[];
630int	*nf;
631{
632    register int	i, n;
633    register double	twopi, fn, c;
634
635    if(((*nf % 2) != 1) || (*nf > 127)) {
636	if(*nf <= 126) *nf = *nf + 1;
637	else *nf = 127;
638    }
639    n = (*nf + 1)/2;
640
641    /*  compute part of the ideal impulse response */
642    twopi = PI * 2.0;
643    coef[0] = 2.0 * fc;
644    c = PI;
645    fn = twopi * fc;
646    for(i=1;i < n; i++) coef[i] = sin(i * fn)/(c * i);
647
648    /* Now apply a Hanning window to the (infinite) impulse response. */
649    fn = twopi/((double)(*nf - 1));
650    for(i=0;i<n;i++)
651	coef[i] *= (.5 + (.5 * cos(fn * ((double)i))));
652
653    return(TRUE);
654}
655
656/*      ----------------------------------------------------------      */
657
658void do_fir(buf,in_samps,bufo,ncoef,ic,invert)
659/* ic contains 1/2 the coefficients of a symmetric FIR filter with unity
660    passband gain.  This filter is convolved with the signal in buf.
661    The output is placed in buf2.  If invert != 0, the filter magnitude
662    response will be inverted. */
663short	*buf, *bufo, ic[];
664int	in_samps, ncoef, invert;
665{
666    register short  *buft, *bufp, *bufp2, stem;
667    short co[256], mem[256];
668    register int i, j, k, l, m, sum, integral;
669
670    for(i=ncoef-1, bufp=ic+ncoef-1, bufp2=co, buft = co+((ncoef-1)*2),
671	integral = 0; i-- > 0; )
672      if(!invert) *buft-- = *bufp2++ = *bufp--;
673      else {
674	integral += (stem = *bufp--);
675	*buft-- = *bufp2++ = -stem;
676      }
677    if(!invert)  *buft-- = *bufp2++ = *bufp--; /* point of symmetry */
678    else {
679      integral *= 2;
680      integral += *bufp;
681      *buft-- = integral - *bufp;
682    }
683/*         for(i=(ncoef*2)-2; i >= 0; i--) printf("\n%4d%7d",i,co[i]);  */
684    for(i=ncoef-1, buft=mem; i-- > 0; ) *buft++ = 0;
685    for(i=ncoef; i-- > 0; ) *buft++ = *buf++;
686    l = 16384;
687    m = 15;
688    k = (ncoef << 1) -1;
689    for(i=in_samps-ncoef; i-- > 0; ) {
690      for(j=k, buft=mem, bufp=co, bufp2=mem+1, sum = 0; j-- > 0;
691	  *buft++ = *bufp2++ )
692	sum += (((*bufp++ * *buft) + l) >> m);
693
694      *--buft = *buf++;		/* new data to memory */
695      *bufo++ = sum;
696    }
697    for(i=ncoef; i-- > 0; ) {	/* pad data end with zeros */
698      for(j=k, buft=mem, bufp=co, bufp2=mem+1, sum = 0; j-- > 0;
699	  *buft++ = *bufp2++ )
700	sum += (((*bufp++ * *buft) + l) >> m);
701      *--buft = 0;
702      *bufo++ = sum;
703    }
704}
705
706/* ******************************************************************** */
707
708int get_abs_maximum(d,n)
709     register short *d;
710     register int n;
711{
712  register int i;
713  register short amax, t;
714
715  if((t = *d++) >= 0) amax = t;
716  else amax = -t;
717
718  for(i = n-1; i-- > 0; ) {
719    if((t = *d++) > amax) amax = t;
720    else {
721      if(-t > amax) amax = -t;
722    }
723  }
724  return((int)amax);
725}
726
727/* ******************************************************************** */
728
729int dwnsamp(buf,in_samps,buf2,out_samps,insert,decimate,ncoef,ic,smin,smax)
730     short	*buf, **buf2;
731     int	in_samps, *out_samps, insert, decimate, ncoef, *smin, *smax;
732     short ic[];
733{
734  register short  *bufp, *bufp2;
735  short	*buft;
736  register int i, j, k, l, m;
737  int imax, imin;
738
739  if(!(*buf2 = buft = (short*)ckalloc(sizeof(short)*insert*in_samps))) {
740    perror("ckalloc() in dwnsamp()");
741    return(FALSE);
742  }
743
744  k = imax = get_abs_maximum(buf,in_samps);
745  if (k == 0) k = 1;
746  if(insert > 1) k = (32767 * 32767)/k;	/*  prepare to scale data */
747  else k = (16384 * 32767)/k;
748  l = 16384;
749  m = 15;
750
751
752  /* Insert zero samples to boost the sampling frequency and scale the
753     signal to maintain maximum precision. */
754  for(i=0, bufp=buft, bufp2=buf; i < in_samps; i++) {
755    *bufp++ = ((k * (*bufp2++)) + l) >> m ;
756    for(j=1; j < insert; j++) *bufp++ = 0;
757  }
758
759  do_fir(buft,in_samps*insert,buft,ncoef,ic,0);
760
761  /*	Finally, decimate and return the downsampled signal. */
762  *out_samps = j = (in_samps * insert)/decimate;
763  k = decimate;
764  for(i=0, bufp=buft, imax = imin = *bufp; i < j; bufp += k,i++) {
765    *buft++ = *bufp;
766    if(imax < *bufp) imax = *bufp;
767    else
768      if(imin > *bufp) imin = *bufp;
769  }
770  *smin = imin;
771  *smax = imax;
772  *buf2 = (short*)ckrealloc((void *) *buf2,sizeof(short) * (*out_samps));
773  return(TRUE);
774}
775
776/*      ----------------------------------------------------------      */
777
778int ratprx(a,k,l,qlim)
779double	a;
780int	*l, *k, qlim;
781{
782    double aa, af, q, em, qq = 0, pp = 0, ps, e;
783    int	ai, ip, i;
784
785    aa = fabs(a);
786    ai = (int) aa;
787/*    af = fmod(aa,1.0); */
788    i = (int) aa;
789    af = aa - i;
790    q = 0;
791    em = 1.0;
792    while(++q <= qlim) {
793	ps = q * af;
794	ip = (int) (ps + 0.5);
795	e = fabs((ps - (double)ip)/q);
796	if(e < em) {
797	    em = e;
798	    pp = ip;
799	    qq = q;
800	}
801    };
802    *k = (int) ((ai * qq) + pp);
803    *k = (a > 0)? *k : -(*k);
804    *l = (int) qq;
805    return(TRUE);
806}
807
808/* ----------------------------------------------------------------------- */
809
810Sound *Fdownsample(s,freq2,start,end)
811     double freq2;
812     Sound *s;
813     int start;
814     int end;
815{
816  short	*bufin, **bufout;
817  static double	beta = 0.0, b[256];
818  double	ratio_t, maxi, ratio, beta_new, freq1;
819  static int	ncoeff = 127, ncoefft = 0, nbits = 15;
820  static short	ic[256];
821  int	insert, decimate, out_samps, smin, smax;
822  Sound *so;
823
824  register int i, j;
825
826  freq1 = s->samprate;
827
828  if((bufout = (short**)ckalloc(sizeof(short*)))) {
829    bufin = (short *) ckalloc(sizeof(short) * (end - start + 1));
830    for (i = start; i <= end; i++) {
831      bufin[i-start] = (short) Snack_GetSample(s, 0, i);
832    }
833
834    ratio = freq2/freq1;
835    ratprx(ratio,&insert,&decimate,10);
836    ratio_t = ((double)insert)/((double)decimate);
837
838    if(ratio_t > .99) return(s);
839
840    freq2 = ratio_t * freq1;
841    beta_new = (.5 * freq2)/(insert * freq1);
842
843    if(beta != beta_new){
844      beta = beta_new;
845      if( !lc_lin_fir(beta,&ncoeff,b)) {
846	printf("\nProblems computing interpolation filter\n");
847	return(FALSE);
848      }
849      maxi = (1 << nbits) - 1;
850      j = (ncoeff/2) + 1;
851      for(ncoefft = 0, i=0; i < j; i++){
852	ic[i] = (int) (0.5 + (maxi * b[i]));
853	if(ic[i]) ncoefft = i+1;
854      }
855    }				/*  endif new coefficients need to be computed */
856
857    if(dwnsamp(bufin,end-start+1,bufout,&out_samps,insert,decimate,ncoefft,ic,
858	       &smin,&smax)){
859      /*      so->buff_size = so->file_size = out_samps;*/
860      so = Snack_NewSound(0, LIN16, s->nchannels);
861      Snack_ResizeSoundStorage(so, out_samps);
862      for (i = 0; i < out_samps; i++) {
863	Snack_SetSample(so, 0, i, (float)(*bufout)[i]);
864      }
865      so->length = out_samps;
866      so->samprate = (int)freq2;
867      ckfree((void *)*bufout);
868      ckfree((void *)bufout);
869      ckfree((void *)bufin);
870      return(so);
871    } else
872      printf("Problems in dwnsamp() in downsample()\n");
873  } else
874       printf("Can't create a new Signal in downsample()\n");
875
876  return(NULL);
877}
878
879/*      ----------------------------------------------------------      */
880
881Sound
882*highpass(s)
883     Sound *s;
884{
885
886  short *datain, *dataout;
887  static short *lcf;
888  static int len = 0;
889  double scale, fn;
890  register int i;
891  Sound *so;
892
893  /*  Header *h, *dup_header();*/
894
895#define LCSIZ 101
896  /* This assumes the sampling frequency is 10kHz and that the FIR
897     is a Hanning function of (LCSIZ/10)ms duration. */
898
899  datain = (short *) ckalloc(sizeof(short) * s->length);
900  dataout = (short *) ckalloc(sizeof(short) * s->length);
901  for (i = 0; i < Snack_GetLength(s); i++) {
902    datain[i] = (short) Snack_GetSample(s, 0, i);
903  }
904
905  if(!len) {		/* need to create a Hanning FIR? */
906    lcf = (short*)ckalloc(sizeof(short) * LCSIZ);
907    len = 1 + (LCSIZ/2);
908    fn = PI * 2.0 / (LCSIZ - 1);
909    scale = 32767.0/(.5 * LCSIZ);
910    for(i=0; i < len; i++)
911      lcf[i] = (short) (scale * (.5 + (.4 * cos(fn * ((double)i)))));
912  }
913  do_fir(datain,s->length,dataout,len,lcf,1); /* in downsample.c */
914  so = Snack_NewSound(s->samprate, LIN16, s->nchannels);
915  if (so == NULL) return(NULL);
916  Snack_ResizeSoundStorage(so, s->length);
917  for (i = 0; i < s->length; i++) {
918    Snack_SetSample(so, 0, i, (float)dataout[i]);
919  }
920  so->length = s->length;
921  ckfree((void *)dataout);
922  ckfree((void *)datain);
923  return(so);
924}
925
926int
927formantCmd(Sound *s, Tcl_Interp *interp, int objc,
928	   Tcl_Obj *CONST objv[])
929{
930  int nform, i,j, lpc_ord, lpc_type, w_type;
931  char *w_type_str = NULL;
932  double frame_int, wdur,
933  ds_freq, nom_f1 = -10.0, preemp;
934  double cor_wdur;
935  Sound *dssnd = NULL, *hpsnd = NULL, *polesnd = NULL;
936  Sound *formantsnd = NULL, *hpsrcsnd, *polesrcsnd;
937  Tcl_Obj *list;
938  int arg, startpos = 0, endpos = -1;
939  static CONST84 char *subOptionStrings[] = {
940    "-start", "-end", "-progress",
941    "-framelength", "-preemphasisfactor", "-numformants",
942    "-lpcorder", "-windowlength", "-windowtype", "-lpctype",
943    "-ds_freq", "-nom_f1_freq", NULL
944  };
945  enum subOptions {
946    START, END, PROGRESS, FRAME, PREEMP, NUMFORM, ORDER, WINLEN,
947    WINTYPE, LPCTYPE, DSFREQ, NOMFREQ
948  };
949
950  lpc_ord = 12;
951  lpc_type = 0;			/* use bsa's stabilized covariance if != 0 */
952  w_type = 2;			/* window type: 0=rectangular; 1=Hamming; 2=cos**4 */
953  ds_freq = 10000.0;
954  wdur = .049;			/* for LPC analysis */
955  cor_wdur = .01;		/* for crosscorrelation F0 estimator */
956  frame_int = .01;
957  preemp = .7;
958  nform = 4;
959
960  for (arg = 2; arg < objc; arg += 2) {
961    int index;
962
963    if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings,
964			    "option", 0, &index) != TCL_OK) {
965      return TCL_ERROR;
966    }
967
968    if (arg + 1 == objc) {
969      Tcl_AppendResult(interp, "No argument given for ",
970		       subOptionStrings[index], " option", (char *) NULL);
971      return TCL_ERROR;
972    }
973
974    switch ((enum subOptions) index) {
975    case START:
976      {
977	if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK)
978	  return TCL_ERROR;
979	break;
980      }
981    case END:
982      {
983	if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK)
984	  return TCL_ERROR;
985	break;
986      }
987    case PROGRESS:
988      {
989	char *str = Tcl_GetStringFromObj(objv[arg+1], NULL);
990
991	if (strlen(str) > 0) {
992	  Tcl_IncrRefCount(objv[arg+1]);
993	  s->cmdPtr = objv[arg+1];
994	}
995	break;
996      }
997    case FRAME:
998      {
999	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &frame_int)
1000	    != TCL_OK)
1001	  return TCL_ERROR;
1002	break;
1003      }
1004    case PREEMP:
1005      {
1006	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &preemp)
1007	    != TCL_OK)
1008	  return TCL_ERROR;
1009	break;
1010      }
1011    case NUMFORM:
1012      {
1013	if (Tcl_GetIntFromObj(interp, objv[arg+1], &nform) != TCL_OK)
1014	  return TCL_ERROR;
1015	break;
1016      }
1017    case ORDER:
1018      {
1019	if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpc_ord) != TCL_OK)
1020	  return TCL_ERROR;
1021	break;
1022      }
1023    case WINLEN:
1024      {
1025	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &wdur)
1026	    != TCL_OK)
1027	  return TCL_ERROR;
1028	break;
1029      }
1030    case WINTYPE:
1031      {
1032	w_type_str = Tcl_GetStringFromObj(objv[arg+1], NULL);
1033	break;
1034      }
1035    case LPCTYPE:
1036      {
1037	if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpc_type) != TCL_OK)
1038	  return TCL_ERROR;
1039	break;
1040      }
1041    case DSFREQ:
1042      {
1043	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &ds_freq)
1044	    != TCL_OK)
1045	  return TCL_ERROR;
1046	break;
1047      }
1048    case NOMFREQ:
1049      {
1050	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &nom_f1)
1051	    != TCL_OK)
1052	  return TCL_ERROR;
1053	break;
1054      }
1055    }
1056  }
1057  if (startpos < 0) startpos = 0;
1058  if (endpos >= (s->length - 1) || endpos == -1)
1059    endpos = s->length - 1;
1060  if (startpos > endpos) return TCL_OK;
1061
1062  /*
1063   * Check for errors in specifying parameters
1064   */
1065
1066  if(nform > (lpc_ord-4)/2){
1067    Tcl_AppendResult(interp, "Number of formants must be <= (lpc order - 4)/2", NULL);
1068    return TCL_ERROR;
1069  }
1070
1071  if(nform > MAXFORMANTS){
1072    Tcl_AppendResult(interp, "A maximum of 7 formants are supported at this time", NULL);
1073    return TCL_ERROR;
1074  }
1075
1076  if (s->storeType != SOUND_IN_MEMORY ) {
1077    Tcl_AppendResult(interp, "formant only works with in-memory sounds",
1078		     (char *) NULL);
1079    return TCL_ERROR;
1080  }
1081
1082  if (w_type_str != NULL) {
1083    int len = strlen(w_type_str);
1084    if (strncasecmp(w_type_str, "rectangular", len) == 0 ||
1085	strncasecmp(w_type_str, "0", len) == 0) {
1086      w_type = 0;
1087    } else if (strncasecmp(w_type_str, "hamming", len) == 0 ||
1088	       strncasecmp(w_type_str, "1", len) == 0) {
1089      w_type = 1;
1090    } else if (strncasecmp(w_type_str, "cos^4", len) == 0 ||
1091	       strncasecmp(w_type_str, "2", len) == 0) {
1092      w_type = 2;
1093    } else if (strncasecmp(w_type_str, "hanning", len) == 0 ||
1094	       strncasecmp(w_type_str, "3", len) == 0) {
1095      w_type = 3;
1096    } else {
1097      Tcl_AppendResult(interp, "unknown window type: ", w_type_str,
1098		       (char *) NULL);
1099      return TCL_ERROR;
1100    }
1101  }
1102
1103  Snack_ProgressCallback(s->cmdPtr, interp,"Computing formants",0.05);
1104
1105  if(ds_freq < s->samprate) {
1106    dssnd = Fdownsample(s,ds_freq, startpos, endpos);
1107  }
1108
1109  Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants",
1110			 0.5);
1111
1112  hpsrcsnd = (dssnd ? dssnd : s);
1113
1114  if (preemp < 1.0) { /* be sure DC and rumble are gone! */
1115    hpsnd = highpass(hpsrcsnd);
1116  }
1117
1118  Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants",
1119			 0.6);
1120
1121  polesrcsnd = (hpsnd ? hpsnd : s);
1122
1123  if(!(polesnd = lpc_poles(polesrcsnd, wdur, frame_int, lpc_ord,
1124			   preemp, lpc_type, w_type))) {
1125    Tcl_AppendResult(interp, "Problems in lpc_poles()", NULL);
1126    return TCL_ERROR;
1127  }
1128
1129  Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants",
1130			 0.7);
1131
1132  /* LPC poles are now available for the formant estimator. */
1133  if (!(formantsnd = dpform(polesnd, nform, nom_f1))) {
1134    Tcl_AppendResult(interp, "Problems in dpform()", NULL);
1135    return TCL_ERROR;
1136  }
1137
1138  Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants",
1139			 0.95);
1140
1141  /*
1142    SaveSound(formantsnd, interp, "outt.wav", NULL,
1143    0, NULL, 0, formantsnd->length, WAV_STRING);
1144  */
1145
1146  if (dssnd) Snack_DeleteSound(dssnd);
1147  if (hpsnd) Snack_DeleteSound(hpsnd);
1148  Snack_DeleteSound(polesnd);
1149
1150  list = Tcl_NewListObj(0, NULL);
1151
1152  for (i = 0; i < formantsnd->length; i++) {
1153    Tcl_Obj *frameList;
1154    frameList = Tcl_NewListObj(0, NULL);
1155    Tcl_ListObjAppendElement(interp, list, frameList);
1156    for (j = 0; j < nform * 2; j++) {
1157      Tcl_ListObjAppendElement(interp, frameList,
1158	  Tcl_NewDoubleObj((double) Snack_GetSample(formantsnd, j, i)));
1159    }
1160  }
1161
1162  Snack_DeleteSound(formantsnd);
1163
1164  Snack_ProgressCallback(s->cmdPtr, interp,"Computing formants",1.0);
1165
1166  Tcl_SetObjResult(interp, list);
1167
1168  return TCL_OK;
1169}
1170