1/* 2 * This software has been licensed to the Centre of Speech Technology, KTH 3 * by Microsoft Corp. with the terms in the accompanying file BSD.txt, 4 * which is a BSD style license. 5 * 6 * "Copyright (c) 1990-1996 Entropic Research Laboratory, Inc. 7 * All rights reserved" 8 * 9 * Written by: David Talkin 10 * Checked by: 11 * Revised by: Derek Lin, David Talkin 12 * 13 * Brief description: 14 * A collection of pretty generic signal-processing routines. 15 * 16 * 17 */ 18 19#include <math.h> 20#include <stdlib.h> 21#include <stdio.h> 22#ifndef TRUE 23# define TRUE 1 24# define FALSE 0 25#endif 26#include "jkGetF0.h" 27#include "snack.h" 28 29/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 30/* Return a time-weighting window of type type and length n in dout. 31 * Dout is assumed to be at least n elements long. Type is decoded in 32 * the switch statement below. 33 */ 34int xget_window(dout, n, type) 35 register float *dout; 36 register int n, type; 37{ 38 static float *din = NULL; 39 static int n0 = 0; 40 float preemp = 0.0; 41 42 if(n > n0) { 43 register float *p; 44 register int i; 45 46 if(din) ckfree((void *)din); 47 din = NULL; 48 if(!(din = (float*)ckalloc(sizeof(float)*n))) { 49 Fprintf(stderr,"Allocation problems in xget_window()\n"); 50 return(FALSE); 51 } 52 for(i=0, p=din; i++ < n; ) *p++ = 1; 53 n0 = n; 54 } 55 return(window(din, dout, n, preemp, type)); 56} 57 58/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 59/* Apply a rectangular window (i.e. none). Optionally, preemphasize. */ 60void xrwindow(din, dout, n, preemp) 61 register float *din; 62 register float *dout, preemp; 63 register int n; 64{ 65 register float *p; 66 67/* If preemphasis is to be performed, this assumes that there are n+1 valid 68 samples in the input buffer (din). */ 69 if(preemp != 0.0) { 70 for( p=din+1; n-- > 0; ) 71 *dout++ = (float)((*p++) - (preemp * *din++)); 72 } else { 73 for( ; n-- > 0; ) 74 *dout++ = *din++; 75 } 76} 77 78/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 79/* Generate a cos^4 window, if one does not already exist. */ 80void xcwindow(din, dout, n, preemp) 81 register float *din; 82 register float *dout, preemp; 83 register int n; 84{ 85 register int i; 86 register float *p; 87 static int wsize = 0; 88 static float *wind=NULL; 89 register float *q, co; 90 91 if(wsize != n) { /* Need to create a new cos**4 window? */ 92 register double arg, half=0.5; 93 94 if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float)); 95 else wind = (float*)ckalloc(n*sizeof(float)); 96 wsize = n; 97 for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; ) { 98 co = (float) (half*(1.0 - cos((half + (double)i++) * arg))); 99 *q++ = co * co * co * co; 100 } 101 } 102/* If preemphasis is to be performed, this assumes that there are n+1 valid 103 samples in the input buffer (din). */ 104 if(preemp != 0.0) { 105 for(i=n, p=din+1, q=wind; i--; ) 106 *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++))); 107 } else { 108 for(i=n, q=wind; i--; ) 109 *dout++ = *q++ * *din++; 110 } 111} 112 113/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 114/* Generate a Hamming window, if one does not already exist. */ 115void xhwindow(din, dout, n, preemp) 116 register float *din; 117 register float *dout, preemp; 118 register int n; 119{ 120 register int i; 121 register float *p; 122 static int wsize = 0; 123 static float *wind=NULL; 124 register float *q; 125 126 if(wsize != n) { /* Need to create a new Hamming window? */ 127 register double arg, half=0.5; 128 129 if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float)); 130 else wind = (float*)ckalloc(n*sizeof(float)); 131 wsize = n; 132 for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; ) 133 *q++ = (float) (.54 - .46 * cos((half + (double)i++) * arg)); 134 } 135/* If preemphasis is to be performed, this assumes that there are n+1 valid 136 samples in the input buffer (din). */ 137 if(preemp != 0.0) { 138 for(i=n, p=din+1, q=wind; i--; ) 139 *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++))); 140 } else { 141 for(i=n, q=wind; i--; ) 142 *dout++ = *q++ * *din++; 143 } 144} 145 146/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 147/* Generate a Hanning window, if one does not already exist. */ 148void xhnwindow(din, dout, n, preemp) 149 register float *din; 150 register float *dout, preemp; 151 register int n; 152{ 153 register int i; 154 register float *p; 155 static int wsize = 0; 156 static float *wind=NULL; 157 register float *q; 158 159 if(wsize != n) { /* Need to create a new Hanning window? */ 160 register double arg, half=0.5; 161 162 if(wind) wind = (float*)ckrealloc((void *)wind,n*sizeof(float)); 163 else wind = (float*)ckalloc(n*sizeof(float)); 164 wsize = n; 165 for(i=0, arg=3.1415927*2.0/(wsize), q=wind; i < n; ) 166 *q++ = (float) (half - half * cos((half + (double)i++) * arg)); 167 } 168/* If preemphasis is to be performed, this assumes that there are n+1 valid 169 samples in the input buffer (din). */ 170 if(preemp != 0.0) { 171 for(i=n, p=din+1, q=wind; i--; ) 172 *dout++ = (float) (*q++ * ((float)(*p++) - (preemp * *din++))); 173 } else { 174 for(i=n, q=wind; i--; ) 175 *dout++ = *q++ * *din++; 176 } 177} 178 179/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 180/* Apply a window of type type to the short PCM sequence of length n 181 * in din. Return the floating-point result sequence in dout. If preemp 182 * is non-zero, apply preemphasis to tha data as it is windowed. 183 */ 184int window(din, dout, n, preemp, type) 185 register float *din; 186 register float *dout, preemp; 187 register int n; 188 int type; 189{ 190 switch(type) { 191 case 0: /* rectangular */ 192 xrwindow(din, dout, n, preemp); 193 break; 194 case 1: /* Hamming */ 195 xhwindow(din, dout, n, preemp); 196 break; 197 case 2: /* cos^4 */ 198 xcwindow(din, dout, n, preemp); 199 break; 200 case 3: /* Hanning */ 201 xhnwindow(din, dout, n, preemp); 202 break; 203 default: 204 Fprintf(stderr,"Unknown window type (%d) requested in window()\n",type); 205 return(FALSE); 206 } 207 return(TRUE); 208} 209 210/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 211/* Compute the pp+1 autocorrelation lags of the windowsize samples in s. 212 * Return the normalized autocorrelation coefficients in r. 213 * The rms is returned in e. 214 */ 215void xautoc( windowsize, s, p, r, e ) 216 register int p, windowsize; 217 register float *s, *e; 218 register float *r; 219{ 220 register int i, j; 221 register float *q, *t, sum, sum0; 222 223 for( i=windowsize, q=s, sum0=0.0; i--;) { 224 sum = *q++; 225 sum0 += sum*sum; 226 } 227 *r = 1.; /* r[0] will always =1. */ 228 if(sum0 == 0.0) { /* No energy: fake low-energy white noise. */ 229 *e = 1.; /* Arbitrarily assign 1 to rms. */ 230 /* Now fake autocorrelation of white noise. */ 231 for ( i=1; i<=p; i++){ 232 r[i] = 0.; 233 } 234 return; 235 } 236 *e = (float) sqrt((double)(sum0/windowsize)); 237 sum0 = (float) (1.0/sum0); 238 for( i=1; i <= p; i++){ 239 for( sum=0.0, j=windowsize-i, q=s, t=s+i; j--; ) 240 sum += (*q++) * (*t++); 241 *(++r) = sum*sum0; 242 } 243} 244 245/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 246/* Using Durbin's recursion, convert the autocorrelation sequence in r 247 * to reflection coefficients in k and predictor coefficients in a. 248 * The prediction error energy (gain) is left in *ex. 249 * Note: durbin returns the coefficients in normal sign format. 250 * (i.e. a[0] is assumed to be = +1.) 251 */ 252void xdurbin ( r, k, a, p, ex) 253 register int p; /* analysis order */ 254 register float *r, *k, *a, *ex; 255{ 256 float bb[BIGSORD]; 257 register int i, j; 258 register float e, s, *b = bb; 259 260 e = *r; 261 *k = -r[1]/e; 262 *a = *k; 263 e *= (float) (1. - (*k) * (*k)); 264 for ( i=1; i < p; i++){ 265 s = 0; 266 for ( j=0; j<i; j++){ 267 s -= a[j] * r[i-j]; 268 } 269 k[i] = ( s - r[i+1] )/e; 270 a[i] = k[i]; 271 for ( j=0; j<=i; j++){ 272 b[j] = a[j]; 273 } 274 for ( j=0; j<i; j++){ 275 a[j] += k[i] * b[i-j-1]; 276 } 277 e *= (float) ( 1. - (k[i] * k[i]) ); 278 } 279 *ex = e; 280} 281 282/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 283/* Compute the autocorrelations of the p LP coefficients in a. 284 * (a[0] is assumed to be = 1 and not explicitely accessed.) 285 * The magnitude of a is returned in c. 286 * 2* the other autocorrelation coefficients are returned in b. 287 */ 288void xa_to_aca ( a, b, c, p ) 289float *a, *b, *c; 290register int p; 291{ 292 register float s, *ap, *a0; 293 register int i, j; 294 295 for ( s=1., ap=a, i = p; i--; ap++ ) 296 s += *ap * *ap; 297 298 *c = s; 299 for ( i = 1; i <= p; i++){ 300 s = a[i-1]; 301 for (a0 = a, ap = a+i, j = p-i; j--; ) 302 s += (*a0++ * *ap++); 303 *b++ = (float) (2. * s); 304 } 305 306} 307 308/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 309/* Compute the Itakura LPC distance between the model represented 310 * by the signal autocorrelation (r) and its residual (gain) and 311 * the model represented by an LPC autocorrelation (c, b). 312 * Both models are of order p. 313 * r is assumed normalized and r[0]=1 is not explicitely accessed. 314 * Values returned by the function are >= 1. 315 */ 316float xitakura ( p, b, c, r, gain ) 317 register float *b, *c, *r, *gain; 318 register int p; 319{ 320 register float s; 321 322 for( s= *c; p--; ) 323 s += *r++ * *b++; 324 325 return (s/ *gain); 326} 327 328/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 329/* Compute the time-weighted RMS of a size segment of data. The data 330 * is weighted by a window of type w_type before RMS computation. w_type 331 * is decoded above in window(). 332 */ 333float wind_energy(data,size,w_type) 334 register float *data; /* input PCM data */ 335 register int size, /* size of window */ 336 w_type; /* window type */ 337{ 338 static int nwind = 0; 339 static float *dwind = NULL; 340 register float *dp, sum, f; 341 register int i; 342 343 if(nwind < size) { 344 if(dwind) dwind = (float*)ckrealloc((void *)dwind,size*sizeof(float)); 345 else dwind = (float*)ckalloc(size*sizeof(float)); 346 if(!dwind) { 347 Fprintf(stderr,"Can't allocate scratch memory in wind_energy()\n"); 348 return(0.0); 349 } 350 } 351 if(nwind != size) { 352 xget_window(dwind, size, w_type); 353 nwind = size; 354 } 355 for(i=size, dp = dwind, sum = 0.0; i-- > 0; ) { 356 f = *dp++ * (float)(*data++); 357 sum += f*f; 358 } 359 return((float)sqrt((double)(sum/size))); 360} 361 362/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 363/* Generic autocorrelation LPC analysis of the short-integer data 364 * sequence in data. 365 */ 366int xlpc(lpc_ord,lpc_stabl,wsize,data,lpca,ar,lpck,normerr,rms,preemp,type) 367 int lpc_ord, /* Analysis order */ 368 wsize, /* window size in points */ 369 type; /* window type (decoded in window() above) */ 370 float lpc_stabl, /* Stability factor to prevent numerical problems. */ 371 *lpca, /* if non-NULL, return vvector for predictors */ 372 *ar, /* if non-NULL, return vector for normalized autoc. */ 373 *lpck, /* if non-NULL, return vector for PARCOR's */ 374 *normerr, /* return scaler for normalized error */ 375 *rms, /* return scaler for energy in preemphasized window */ 376 preemp; 377 float *data; /* input data sequence; assumed to be wsize+1 long */ 378{ 379 static float *dwind=NULL; 380 static int nwind=0; 381 float rho[BIGSORD+1], k[BIGSORD], a[BIGSORD+1],*r,*kp,*ap,en,er,wfact=1.0; 382 383 if((wsize <= 0) || (!data) || (lpc_ord > BIGSORD)) return(FALSE); 384 385 if(nwind != wsize) { 386 if(dwind) dwind = (float*)ckrealloc((void *)dwind,wsize*sizeof(float)); 387 else dwind = (float*)ckalloc(wsize*sizeof(float)); 388 if(!dwind) { 389 Fprintf(stderr,"Can't allocate scratch memory in lpc()\n"); 390 return(FALSE); 391 } 392 nwind = wsize; 393 } 394 395 window(data, dwind, wsize, preemp, type); 396 if(!(r = ar)) r = rho; /* Permit optional return of the various */ 397 if(!(kp = lpck)) kp = k; /* coefficients and intermediate results. */ 398 if(!(ap = lpca)) ap = a; 399 xautoc( wsize, dwind, lpc_ord, r, &en ); 400 if(lpc_stabl > 1.0) { /* add a little to the diagonal for stability */ 401 int i; 402 float ffact; 403 ffact = (float) (1.0/(1.0 + exp((-lpc_stabl/20.0) * log(10.0)))); 404 for(i=1; i <= lpc_ord; i++) rho[i] = ffact * r[i]; 405 *rho = *r; 406 r = rho; 407 if(ar) 408 for(i=0;i<=lpc_ord; i++) ar[i] = r[i]; 409 } 410 xdurbin ( r, kp, &ap[1], lpc_ord, &er); 411 switch(type) { /* rms correction for window */ 412 case 0: 413 wfact = 1.0; /* rectangular */ 414 break; 415 case 1: 416 wfact = .630397f; /* Hamming */ 417 break; 418 case 2: 419 wfact = .443149f; /* (.5 - .5*cos)^4 */ 420 break; 421 case 3: 422 wfact = .612372f; /* Hanning */ 423 break; 424 } 425 *ap = 1.0; 426 if(rms) *rms = en/wfact; 427 if(normerr) *normerr = er; 428 return(TRUE); 429} 430 431 432/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 433/* Return a sequence based on the normalized crosscorrelation of the signal 434 in data. 435 * 436 data is the input speech array 437 size is the number of samples in each correlation 438 start is the first lag to compute (governed by the highest expected F0) 439 nlags is the number of cross correlations to compute (set by lowest F0) 440 engref is the energy computed at lag=0 (i.e. energy in ref. window) 441 maxloc is the lag at which the maximum in the correlation was found 442 maxval is the value of the maximum in the CCF over the requested lag interval 443 correl is the array of nlags cross-correlation coefficients (-1.0 to 1.0) 444 * 445 */ 446void crossf(data, size, start, nlags, engref, maxloc, maxval, correl) 447 int *maxloc; 448 float *engref, *maxval, *data, *correl; 449 int size, start, nlags; 450{ 451 static float *dbdata=NULL; 452 static int dbsize = 0; 453 register float *dp, *ds, sum, st; 454 register int j; 455 register float *dq, t, *p, engr, *dds, amax; 456 register double engc; 457 int i, iloc, total; 458 int sizei, sizeo, maxsize; 459 460 /* Compute mean in reference window and subtract this from the 461 entire sequence. This doesn't do too much damage to the data 462 sequenced for the purposes of F0 estimation and removes the need for 463 more principled (and costly) low-cut filtering. */ 464 if((total = size+start+nlags) > dbsize) { 465 if(dbdata) 466 ckfree((void *)dbdata); 467 dbdata = NULL; 468 dbsize = 0; 469 if(!(dbdata = (float*)ckalloc(sizeof(float)*total))) { 470 Fprintf(stderr,"Allocation failure in crossf()\n"); 471 return;/*exit(-1);*/ 472 } 473 dbsize = total; 474 } 475 for(engr=0.0, j=size, p=data; j--; ) engr += *p++; 476 engr /= size; 477 for(j=size+nlags+start, dq = dbdata, p=data; j--; ) *dq++ = *p++ - engr; 478 479 maxsize = start + nlags; 480 sizei = size + start + nlags + 1; 481 sizeo = nlags + 1; 482 483 /* Compute energy in reference window. */ 484 for(j=size, dp=dbdata, sum=0.0; j--; ) { 485 st = *dp++; 486 sum += st * st; 487 } 488 489 *engref = engr = sum; 490 if(engr > 0.0) { /* If there is any signal energy to work with... */ 491 /* Compute energy at the first requested lag. */ 492 for(j=size, dp=dbdata+start, sum=0.0; j--; ) { 493 st = *dp++; 494 sum += st * st; 495 } 496 engc = sum; 497 498 /* COMPUTE CORRELATIONS AT ALL OTHER REQUESTED LAGS. */ 499 for(i=0, dq=correl, amax=0.0, iloc = -1; i < nlags; i++) { 500 for(j=size, sum=0.0, dp=dbdata, dds = ds = dbdata+i+start; j--; ) 501 sum += *dp++ * *ds++; 502 *dq++ = t = (float) (sum/sqrt((double)(engc*engr))); /* output norm. CC */ 503 engc -= (double)(*dds * *dds); /* adjust norm. energy for next lag */ 504 if((engc += (double)(*ds * *ds)) < 1.0) 505 engc = 1.0; /* (hack: in case of roundoff error) */ 506 if(t > amax) { /* Find abs. max. as we go. */ 507 amax = t; 508 iloc = i+start; 509 } 510 } 511 *maxloc = iloc; 512 *maxval = amax; 513 } else { /* No energy in signal; fake reasonable return vals. */ 514 *maxloc = 0; 515 *maxval = 0.0; 516 for(p=correl,i=nlags; i-- > 0; ) 517 *p++ = 0.0; 518 } 519} 520 521/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 522/* Return a sequence based on the normalized crosscorrelation of the 523 signal in data. This is similar to crossf(), but is designed to 524 compute only small patches of the correlation sequence. The length of 525 each patch is determined by nlags; the number of patches by nlocs, and 526 the locations of the patches is specified by the array locs. Regions 527 of the CCF that are not computed are set to 0. 528 * 529 data is the input speech array 530 size is the number of samples in each correlation 531 start0 is the first (virtual) lag to compute (governed by highest F0) 532 nlags0 is the number of lags (virtual+actual) in the correlation sequence 533 nlags is the number of cross correlations to compute at each location 534 engref is the energy computed at lag=0 (i.e. energy in ref. window) 535 maxloc is the lag at which the maximum in the correlation was found 536 maxval is the value of the maximum in the CCF over the requested lag interval 537 correl is the array of nlags cross-correlation coefficients (-1.0 to 1.0) 538 locs is an array of indices pointing to the center of a patches where the 539 cross correlation is to be computed. 540 nlocs is the number of correlation patches to compute. 541 * 542 */ 543void crossfi(data, size, start0, nlags0, nlags, engref, maxloc, maxval, correl, locs, nlocs) 544 int *maxloc; 545 float *engref, *maxval, *data, *correl; 546 int size, start0, nlags0, nlags, *locs, nlocs; 547{ 548 static float *dbdata=NULL; 549 static int dbsize = 0; 550 register float *dp, *ds, sum, st; 551 register int j; 552 register float *dq, t, *p, engr, *dds, amax; 553 register double engc; 554 int i, iloc, start, total; 555 556 /* Compute mean in reference window and subtract this from the 557 entire sequence. */ 558 if((total = size+start0+nlags0) > dbsize) { 559 if(dbdata) 560 ckfree((void *)dbdata); 561 dbdata = NULL; 562 dbsize = 0; 563 if(!(dbdata = (float*)ckalloc(sizeof(float)*total))) { 564 Fprintf(stderr,"Allocation failure in crossfi()\n"); 565 return;/*exit(-1);*/ 566 } 567 dbsize = total; 568 } 569 for(engr=0.0, j=size, p=data; j--; ) engr += *p++; 570 engr /= size; 571/* for(j=size+nlags0+start0, t = -2.1, amax = 2.1, dq = dbdata, p=data; j--; ) { 572 if(((smax = *p++ - engr) > t) && (smax < amax)) 573 smax = 0.0; 574 *dq++ = smax; 575 } */ 576 for(j=size+nlags0+start0, dq = dbdata, p=data; j--; ) { 577 *dq++ = *p++ - engr; 578 } 579 580 /* Zero the correlation output array to avoid confusing the peak 581 picker (since all lags will not be computed). */ 582 for(p=correl,i=nlags0; i-- > 0; ) 583 *p++ = 0.0; 584 585 /* compute energy in reference window */ 586 for(j=size, dp=dbdata, sum=0.0; j--; ) { 587 st = *dp++; 588 sum += st * st; 589 } 590 591 *engref = engr = sum; 592 amax=0.0; 593 iloc = -1; 594 if(engr > 0.0) { 595 for( ; nlocs > 0; nlocs--, locs++ ) { 596 start = *locs - (nlags>>1); 597 if(start < start0) 598 start = start0; 599 dq = correl + start - start0; 600 /* compute energy at first requested lag */ 601 for(j=size, dp=dbdata+start, sum=0.0; j--; ) { 602 st = *dp++; 603 sum += st * st; 604 } 605 engc = sum; 606 607 /* COMPUTE CORRELATIONS AT ALL REQUESTED LAGS */ 608 for(i=0; i < nlags; i++) { 609 for(j=size, sum=0.0, dp=dbdata, dds = ds = dbdata+i+start; j--; ) 610 sum += *dp++ * *ds++; 611 if(engc < 1.0) 612 engc = 1.0; /* in case of roundoff error */ 613 *dq++ = t = (float) (sum/sqrt((double)(10000.0 + (engc*engr)))); 614 engc -= (double)(*dds * *dds); 615 engc += (double)(*ds * *ds); 616 if(t > amax) { 617 amax = t; 618 iloc = i+start; 619 } 620 } 621 } 622 *maxloc = iloc; 623 *maxval = amax; 624 } else { 625 *maxloc = 0; 626 *maxval = 0.0; 627 } 628} 629