1/* 2 * Copyright (C) 1997-2005 Kare Sjolander <kare@speech.kth.se> 3 * 4 * This file is part of the Snack Sound Toolkit. 5 * The latest version can be found at http://www.speech.kth.se/snack/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22#include "tcl.h" 23#include "snack.h" 24#include <string.h> 25#include <math.h> 26 27#define SNACK_PI 3.141592653589793 28 29void 30Snack_InitWindow(float *win, int winlen, int fftlen, int type) 31{ 32 int i; 33 34 if (winlen > fftlen) 35 winlen = fftlen; 36 37 if (type == SNACK_WIN_RECT) { 38 for (i = 0; i < winlen; i++) 39 win[i] = (float) 1.0; 40 } else if (type == SNACK_WIN_HANNING) { 41 for (i = 0; i < winlen; i++) 42 win[i] = (float)(0.5 * (1.0 - cos(i * 2.0 * SNACK_PI / (winlen - 1)))); 43 } else if (type == SNACK_WIN_BARTLETT) { 44 for (i = 0; i < winlen/2; i++) 45 win[i] = (float)(((2.0 * i) / (winlen - 1))); 46 for (i = winlen/2; i < winlen; i++) 47 win[i] = (float)(2.0 * (1.0 - ((float)i / (winlen - 1)))); 48 } else if (type == SNACK_WIN_BLACKMAN) { 49 for (i = 0; i < winlen; i++) 50 win[i] = (float)((0.42 - 0.5 * cos(i * 2.0 * SNACK_PI / (winlen - 1)) 51 + 0.08 * cos(i * 4.0 * SNACK_PI / (winlen - 1)))); 52 } else { /* default: Hamming window */ 53 for (i = 0; i < winlen; i++) 54 win[i] = (float)(0.54 - 0.46 * cos(i * 2.0 * SNACK_PI / (winlen - 1))); 55 } 56 57 for (i = winlen; i < fftlen; i++) 58 win[i] = 0.0; 59} 60 61int 62CheckFFTlen(Tcl_Interp *interp, int fftlen) 63{ 64 int n = NMIN; 65 char str[10]; 66 67 while (n <= NMAX) { 68 if (n == fftlen) return TCL_OK; 69 n *= 2; 70 } 71 72 Tcl_AppendResult(interp, "-fftlength must be one of { ", NULL); 73 74 for (n = NMIN; n <= NMAX; n*=2) { 75 sprintf(str, "%d ", n); 76 Tcl_AppendResult(interp, str, NULL); 77 } 78 Tcl_AppendResult(interp, "}", NULL); 79 return TCL_ERROR; 80} 81 82int 83CheckWinlen(Tcl_Interp *interp, int winlen, int fftlen) 84{ 85 char str[10]; 86 87 if (winlen < 1) { 88 Tcl_AppendResult(interp, "-winlength must be > 0", NULL); 89 return TCL_ERROR; 90 } 91 if (winlen > fftlen) { 92 Tcl_AppendResult(interp, "-winlength must be <= fftlength (", NULL); 93 sprintf(str, "%d)", fftlen); 94 Tcl_AppendResult(interp, str, NULL); 95 return TCL_ERROR; 96 } 97 98 return TCL_OK; 99} 100 101void 102GetFloatMonoSig(Sound *s,SnackLinkedFileInfo *info, 103 float *sig, int beg, int len, int channel) { 104 /* sig buffer must be allocated, file must be open! */ 105 106 int i; 107 108 if (s->storeType == SOUND_IN_MEMORY) { 109 if (s->nchannels == 1 || channel != -1) { 110 int p = beg * s->nchannels + channel; 111 112 for (i = 0; i < len; i++) { 113 sig[i] = (float) (FSAMPLE(s, p)); 114 p += s->nchannels; 115 } 116 } else { 117 int c; 118 119 for (i = 0; i < len; i++) { 120 sig[i] = 0.0; 121 } 122 for (c = 0; c < s->nchannels; c++) { 123 int p = beg * s->nchannels + c; 124 125 for (i = 0; i < len; i++) { 126 sig[i] += (float) (FSAMPLE(s, p)); 127 p += s->nchannels; 128 } 129 } 130 for (i = 0; i < len; i++) { 131 sig[i] /= s->nchannels; 132 } 133 } 134 } else { /* storeType != SOUND_IN_MEMORY */ 135 if (s->nchannels == 1 || channel != -1) { 136 int p = beg * s->nchannels + channel; 137 138 for (i = 0; i < len; i++) { 139 sig[i] = (float) (GetSample(info, p)); 140 p += s->nchannels; 141 } 142 } else { 143 int c; 144 145 for (i = 0; i < len; i++) { 146 sig[i] = 0.0; 147 } 148 for (c = 0; c < s->nchannels; c++) { 149 int p = beg * s->nchannels + c; 150 151 for (i = 0; i < len; i++) { 152 sig[i] += (float) (GetSample(info, p)); 153 p += s->nchannels; 154 } 155 } 156 for (i = 0; i < len; i++) { 157 sig[i] /= s->nchannels; 158 } 159 } 160 } 161} 162 163static float xfft[NMAX]; 164static float ffts[NMAX]; 165static float hamwin[NMAX]; 166#define SNACK_DEFAULT_DBPWINTYPE SNACK_WIN_HAMMING 167#define SNACK_DEFAULT_DBP_LPC_ORDER 20 168#define SNACK_MAX_LPC_ORDER 40 169 170int 171CheckLPCorder(Tcl_Interp *interp, int lpcorder) 172{ 173 char str[10]; 174 if (lpcorder < 1) { 175 Tcl_AppendResult(interp, "-lpcorder must be > 0", NULL); 176 return TCL_ERROR; 177 } 178 if (lpcorder > SNACK_MAX_LPC_ORDER) { 179 Tcl_AppendResult(interp, "-lpcorder must be <= ", NULL); 180 sprintf(str, "%d)", SNACK_MAX_LPC_ORDER); 181 Tcl_AppendResult(interp, str, NULL); 182 return TCL_ERROR; 183 } 184 185 return TCL_OK; 186} 187 188extern void Snack_PowerSpectrum(float *z); 189 190int 191dBPowerSpectrumCmd(Sound *s, Tcl_Interp *interp, int objc, 192 Tcl_Obj *CONST objv[]) 193{ 194 double dpreemph = 0.0; 195 float preemph = 0.0; 196 int i, j, n = 0, arg; 197 int channel = 0, winlen = 256, fftlen = 512; 198 int startpos = 0, endpos = -1, skip = -1; 199 Tcl_Obj *list; 200 SnackLinkedFileInfo info; 201 SnackWindowType wintype = SNACK_DEFAULT_DBPWINTYPE; 202 static CONST84 char *subOptionStrings[] = { 203 "-start", "-end", "-channel", "-fftlength", "-winlength", "-windowlength", 204 "-preemphasisfactor", "-skip", "-windowtype", "-analysistype", 205 "-lpcorder", NULL 206 }; 207 enum subOptions { 208 START, END, CHANNEL, FFTLEN, WINLEN, WINDOWLEN, PREEMPH, SKIP, WINTYPE, 209 ANATYPE, LPCORDER 210 }; 211 float *sig_lpc; 212 float presample = 0.0; 213 int siglen, type = 0, lpcOrder = SNACK_DEFAULT_DBP_LPC_ORDER; 214 float g_lpc; 215 216 if (s->debug > 0) Snack_WriteLog("Enter dBPowerSpectrumCmd\n"); 217 218 for (arg = 2; arg < objc; arg += 2) { 219 int index; 220 221 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 222 "option", 0, &index) != TCL_OK) { 223 return TCL_ERROR; 224 } 225 226 if (arg + 1 == objc) { 227 Tcl_AppendResult(interp, "No argument given for ", 228 subOptionStrings[index], " option", (char *) NULL); 229 return TCL_ERROR; 230 } 231 232 switch ((enum subOptions) index) { 233 case START: 234 { 235 if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK) 236 return TCL_ERROR; 237 break; 238 } 239 case END: 240 { 241 if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK) 242 return TCL_ERROR; 243 break; 244 } 245 case CHANNEL: 246 { 247 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 248 if (GetChannel(interp, str, s->nchannels, &channel) != TCL_OK) { 249 return TCL_ERROR; 250 } 251 break; 252 } 253 case FFTLEN: 254 { 255 if (Tcl_GetIntFromObj(interp, objv[arg+1], &fftlen) != TCL_OK) 256 return TCL_ERROR; 257 break; 258 } 259 case WINDOWLEN: 260 case WINLEN: 261 { 262 if (Tcl_GetIntFromObj(interp, objv[arg+1], &winlen) != TCL_OK) 263 return TCL_ERROR; 264 break; 265 } 266 case PREEMPH: 267 { 268 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dpreemph) != TCL_OK) 269 return TCL_ERROR; 270 break; 271 } 272 case SKIP: 273 { 274 if (Tcl_GetIntFromObj(interp, objv[arg+1], &skip) != TCL_OK) 275 return TCL_ERROR; 276 break; 277 } 278 case WINTYPE: 279 { 280 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 281 if (GetWindowType(interp, str, &wintype) != TCL_OK) 282 return TCL_ERROR; 283 break; 284 } 285 case ANATYPE: 286 { 287 int len; 288 char *str = Tcl_GetStringFromObj(objv[arg+1], &len); 289 290 if (strncasecmp(str, "lpc", len) == 0) { 291 type = 1; 292 } else if (strncasecmp(str, "fft", len) == 0) { 293 type = 0; 294 } else { 295 Tcl_AppendResult(interp, "-type should be FFT or LPC", (char *)NULL); 296 return TCL_ERROR; 297 } 298 break; 299 } 300 case LPCORDER: 301 { 302 if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpcOrder) != TCL_OK) 303 return TCL_ERROR; 304 if (CheckLPCorder(interp, lpcOrder) != TCL_OK) 305 return TCL_ERROR; 306 break; 307 } 308 } 309 } 310 311 if (CheckFFTlen(interp, fftlen) != TCL_OK) return TCL_ERROR; 312 313 if (CheckWinlen(interp, winlen, fftlen) != TCL_OK) return TCL_ERROR; 314 315 preemph = (float) dpreemph; 316 317 if (startpos < 0 || startpos > s->length - fftlen) { 318 Tcl_AppendResult(interp, "FFT window out of bounds", NULL); 319 return TCL_ERROR; 320 } 321 322 if (endpos < 0) 323 endpos = startpos; 324 325 if (endpos > s->length - 1) { 326 Tcl_AppendResult(interp, "FFT window out of bounds", NULL); 327 return TCL_ERROR; 328 } 329 330 for (i = 0; i < NMAX/2; i++) { 331 ffts[i] = 0.0; 332 } 333 334 if (s->storeType != SOUND_IN_MEMORY) { 335 if (OpenLinkedFile(s, &info) != TCL_OK) { 336 return TCL_OK; 337 } 338 } 339 340 Snack_InitWindow(hamwin, winlen, fftlen, wintype); 341 Snack_InitFFT(fftlen); 342 343 if (skip < 1) { 344 skip = fftlen; 345 } 346 siglen = endpos - startpos; 347 n = siglen / skip; 348 if (n < 1) { 349 n = 1; 350 } 351 352 if (s->nchannels == 1) { 353 channel = 0; 354 } 355 356 if (type != 0 && n > 0) { /* LPC + FFT */ 357 if (siglen < fftlen) siglen = fftlen; 358 sig_lpc = (float *) ckalloc(siglen * sizeof(float)); 359 360 GetFloatMonoSig(s, &info, sig_lpc, startpos, siglen, channel); 361 if (startpos > 0) 362 GetFloatMonoSig(s, &info, &presample, startpos - 1, 1, channel); 363 PreEmphase(sig_lpc, presample, siglen, preemph); 364 365 /* windowing signal to make lpc look more like the fft spectrum ??? */ 366 for (i = 0; i < winlen/2; i++) { 367 sig_lpc[i] = sig_lpc[i] * hamwin[i]; 368 } 369 for (i = winlen/2; i < winlen; i++) { 370 sig_lpc[i + siglen - winlen] = sig_lpc[i + siglen - winlen] * hamwin[i]; 371 } 372 373 g_lpc = LpcAnalysis(sig_lpc, siglen, xfft, lpcOrder); 374 ckfree((char *) sig_lpc); 375 376 for (i = 0; i <= lpcOrder; i++) { 377 /* the factor is a guess, try looking for analytical value */ 378 xfft[i] = xfft[i] * 5000000000.0f; 379 } 380 for (i = lpcOrder + 1; i < fftlen; i++) { 381 xfft[i] = 0.0; 382 } 383 384 Snack_DBPowerSpectrum(xfft); 385 386 for (i = 0; i < fftlen/2; i++) { 387 ffts[i] = -xfft[i]; 388 } 389 390 } else { /* usual FFT */ 391 392 for (j = 0; j < n; j++) { 393 if (s->storeType == SOUND_IN_MEMORY) { 394 if (s->nchannels == 1 || channel != -1) { 395 int p = (startpos + j * skip) * s->nchannels + channel; 396 397 for (i = 0; i < fftlen; i++) { 398 xfft[i] = (float) ((FSAMPLE(s, p + s->nchannels) 399 - preemph * FSAMPLE(s, p)) 400 * hamwin[i]); 401 p += s->nchannels; 402 } 403 } else { 404 int c; 405 406 for (i = 0; i < fftlen; i++) { 407 xfft[i] = 0.0; 408 } 409 for (c = 0; c < s->nchannels; c++) { 410 int p = (startpos + j * skip) * s->nchannels + c; 411 412 for (i = 0; i < fftlen; i++) { 413 xfft[i] += (float) ((FSAMPLE(s, p + s->nchannels) 414 - preemph * FSAMPLE(s, p)) 415 * hamwin[i]); 416 p += s->nchannels; 417 } 418 } 419 for (i = 0; i < fftlen; i++) { 420 xfft[i] /= s->nchannels; 421 } 422 } 423 } else { /* storeType != SOUND_IN_MEMORY */ 424 if (s->nchannels == 1 || channel != -1) { 425 int p = (startpos + j * skip) * s->nchannels + channel; 426 427 for (i = 0; i < fftlen; i++) { 428 xfft[i] = (float) ((GetSample(&info, p + s->nchannels) 429 - preemph * GetSample(&info, p)) 430 * hamwin[i]); 431 p += s->nchannels; 432 } 433 } else { 434 int c; 435 436 for (i = 0; i < fftlen; i++) { 437 xfft[i] = 0.0; 438 } 439 for (c = 0; c < s->nchannels; c++) { 440 int p = (startpos + j * skip) * s->nchannels + c; 441 442 for (i = 0; i < fftlen; i++) { 443 xfft[i] += (float) ((GetSample(&info, p + s->nchannels) 444 - preemph * GetSample(&info, p)) 445 * hamwin[i]); 446 p += s->nchannels; 447 } 448 } 449 for (i = 0; i < fftlen; i++) { 450 xfft[i] /= s->nchannels; 451 } 452 } 453 } 454 455 Snack_PowerSpectrum(xfft); 456 457 for (i = 0; i < fftlen/2; i++) { 458 ffts[i] += xfft[i]; 459 } 460 } 461 } 462 463 if (s->storeType != SOUND_IN_MEMORY) { 464 CloseLinkedFile(&info); 465 } 466 467 if (type == 0) { 468 for (i = 0; i < fftlen/2; i++) { 469 ffts[i] = ffts[i] / (float) n; 470 } 471 for (i = 1; i < fftlen/2; i++) { 472 if (ffts[i] < SNACK_INTLOGARGMIN) 473 ffts[i] = SNACK_INTLOGARGMIN; 474 ffts[i] = (float) (SNACK_DB * log(ffts[i]) - SNACK_CORRN); 475 } 476 if (ffts[0] < SNACK_INTLOGARGMIN) 477 ffts[0] = SNACK_INTLOGARGMIN; 478 ffts[0] = (float) (SNACK_DB * log(ffts[0]) - SNACK_CORR0); 479 } 480 list = Tcl_NewListObj(0, NULL); 481 for (i = 0; i < fftlen/2; i++) { 482 Tcl_ListObjAppendElement(interp, list, Tcl_NewDoubleObj(ffts[i])); 483 } 484 485 Tcl_SetObjResult(interp, list); 486 487 if (s->debug > 0) Snack_WriteLog("Exit dBPowerSpectrumCmd\n"); 488 489 return TCL_OK; 490} 491 492int 493GetChannel(Tcl_Interp *interp, char *str, int nchan, int *channel) 494{ 495 int n = -2; 496 int len = strlen(str); 497 498 if (strncasecmp(str, "left", len) == 0) { 499 n = 0; 500 } else if (strncasecmp(str, "right", len) == 0) { 501 n = 1; 502 } else if (strncasecmp(str, "all", len) == 0) { 503 n = -1; 504 } else if (strncasecmp(str, "both", len) == 0) { 505 n = -1; 506 } else { 507 Tcl_GetInt(interp, str, &n); 508 } 509 510 if (n < -1 || n >= nchan) { 511 Tcl_AppendResult(interp, "-channel must be left, right, both, all, -1, or an integer between 0 and the number channels - 1", NULL); 512 return TCL_ERROR; 513 } 514 515 *channel = n; 516 517 return TCL_OK; 518} 519 520int 521GetWindowType(Tcl_Interp *interp, char *str, SnackWindowType *wintype) 522{ 523 SnackWindowType type = -1; 524 int len = strlen(str); 525 526 if (strncasecmp(str, "hamming", len) == 0) { 527 type = SNACK_WIN_HAMMING; 528 } else if (strncasecmp(str, "hanning", len) == 0) { 529 type = SNACK_WIN_HANNING; 530 } else if (strncasecmp(str, "bartlett", len) == 0) { 531 type = SNACK_WIN_BARTLETT; 532 } else if (strncasecmp(str, "blackman", len) == 0) { 533 type = SNACK_WIN_BLACKMAN; 534 } else if (strncasecmp(str, "rectangle", len) == 0) { 535 type = SNACK_WIN_RECT; 536 } 537 538 if (type == -1) { 539 Tcl_AppendResult(interp, "-windowtype must be hanning, hamming, bartlett," 540 "blackman, or rectangle", NULL); 541 return TCL_ERROR; 542 } 543 544 *wintype = type; 545 546 return TCL_OK; 547} 548 549float 550LpcAnalysis (float *data, int N, float *f, int order) { 551 int i,m; 552 float sumU = 0.0; 553 float sumD = 0.0; 554 float b[SNACK_MAX_LPC_ORDER+1]; 555 float KK; 556 557 float ParcorCoeffs[SNACK_MAX_LPC_ORDER]; 558 /* PreData should be used when signal is not windowed */ 559 float PreData[SNACK_MAX_LPC_ORDER]; 560 float *errF; 561 float *errB; 562 float errF_m = 0.0; 563 564 if ((order < 1) || (order > SNACK_MAX_LPC_ORDER)) return 0.0f; 565 566 errF = (float *) ckalloc((N+SNACK_MAX_LPC_ORDER) * sizeof(float)); 567 errB = (float *) ckalloc((N+SNACK_MAX_LPC_ORDER) * sizeof(float)); 568 569 for (i=0; i<order; i++) { 570 ParcorCoeffs[i] = 0.0; 571 PreData[i] = 0.0; /* delete here and use as argument when sig not windowed */ 572 }; 573 574 for (m=0; m<order; m++) 575 errF[m] = PreData[m]; 576 for (m=0; m<N; m++) 577 errF[m+order] = data[m] ; 578 579 errB[0] = 0.0; 580 for (m=1; m<N+order; m++) 581 errB[m] = errF[m-1]; 582 583 for (i=0; i<order; i++) { 584 585 sumU=0.0; 586 sumD=0.0; 587 for (m=i+1; m<N+order; m++) { 588 sumU -= errF[m] * errB[m]; 589 sumD += errF[m] * errF[m] + errB[m] * errB[m]; 590 }; 591 592 if (sumD != 0) KK = 2.0f * sumU / sumD; 593 else KK = 0; 594 ParcorCoeffs[i] = KK; 595 596 597 for (m=N+order-1; m>i; m--) { 598 errF[m] += KK * errB[m]; 599 errB[m] = errB[m-1] + KK * errF[m-1]; 600 }; 601 }; 602 603 for (i=order; i<N+order; i++) { 604 errF_m += errF[i]*errF[i]; 605 } 606 errF_m /= N; 607 608 ckfree((char *)errF); 609 ckfree((char *)errB); 610 611 /* convert to direct filter coefficients */ 612 f[0] = 1.0; 613 for (m=1; m<=order; m++) { 614 f[m] = ParcorCoeffs[m-1]; 615 for (i=1; i<m; i++) 616 b[i] = f[i]; 617 for (i=1; i<m; i++) 618 f[i] = b[i] + ParcorCoeffs[m-1] * b[m-i]; 619 } 620 621 return (float)sqrt(errF_m); 622} 623 624 625void PreEmphase(float *sig, float presample, int len, float preemph) { 626 int i; 627 float temp; 628 629 if (preemph == 0.0) return; 630 631 for (i = 0; i < len; i++) { 632 temp = sig[i]; 633 sig[i] = temp - preemph * presample; 634 presample = temp; 635 } 636} 637 638int 639powerSpectrumCmd(Sound *s, Tcl_Interp *interp, int objc, 640 Tcl_Obj *CONST objv[]) 641{ 642 double dpreemph = 0.0; 643 float preemph = 0.0; 644 int i, j, n = 0, arg; 645 int channel = 0, winlen = 256, fftlen = 512; 646 int startpos = 0, endpos = -1, skip = -1; 647 Tcl_Obj *list; 648 SnackLinkedFileInfo info; 649 SnackWindowType wintype = SNACK_DEFAULT_DBPWINTYPE; 650 static CONST84 char *subOptionStrings[] = { 651 "-start", "-end", "-channel", "-fftlength", "-winlength", "-windowlength", 652 "-preemphasisfactor", "-skip", "-windowtype", "-analysistype", 653 "-lpcorder", NULL 654 }; 655 enum subOptions { 656 START, END, CHANNEL, FFTLEN, WINLEN, WINDOWLEN, PREEMPH, SKIP, WINTYPE, 657 ANATYPE, LPCORDER 658 }; 659 float *sig_lpc; 660 float presample = 0.0; 661 int siglen, type = 0, lpcOrder = SNACK_DEFAULT_DBP_LPC_ORDER; 662 float g_lpc; 663 664 if (s->debug > 0) Snack_WriteLog("Enter powerSpectrumCmd\n"); 665 666 for (arg = 2; arg < objc; arg += 2) { 667 int index; 668 669 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 670 "option", 0, &index) != TCL_OK) { 671 return TCL_ERROR; 672 } 673 674 if (arg + 1 == objc) { 675 Tcl_AppendResult(interp, "No argument given for ", 676 subOptionStrings[index], " option", (char *) NULL); 677 return TCL_ERROR; 678 } 679 680 switch ((enum subOptions) index) { 681 case START: 682 { 683 if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK) 684 return TCL_ERROR; 685 break; 686 } 687 case END: 688 { 689 if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK) 690 return TCL_ERROR; 691 break; 692 } 693 case CHANNEL: 694 { 695 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 696 if (GetChannel(interp, str, s->nchannels, &channel) != TCL_OK) { 697 return TCL_ERROR; 698 } 699 break; 700 } 701 case FFTLEN: 702 { 703 if (Tcl_GetIntFromObj(interp, objv[arg+1], &fftlen) != TCL_OK) 704 return TCL_ERROR; 705 break; 706 } 707 case WINDOWLEN: 708 case WINLEN: 709 { 710 if (Tcl_GetIntFromObj(interp, objv[arg+1], &winlen) != TCL_OK) 711 return TCL_ERROR; 712 break; 713 } 714 case PREEMPH: 715 { 716 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dpreemph) != TCL_OK) 717 return TCL_ERROR; 718 break; 719 } 720 case SKIP: 721 { 722 if (Tcl_GetIntFromObj(interp, objv[arg+1], &skip) != TCL_OK) 723 return TCL_ERROR; 724 break; 725 } 726 case WINTYPE: 727 { 728 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 729 if (GetWindowType(interp, str, &wintype) != TCL_OK) 730 return TCL_ERROR; 731 break; 732 } 733 case ANATYPE: 734 { 735 int len; 736 char *str = Tcl_GetStringFromObj(objv[arg+1], &len); 737 738 if (strncasecmp(str, "lpc", len) == 0) { 739 type = 1; 740 } else if (strncasecmp(str, "fft", len) == 0) { 741 type = 0; 742 } else { 743 Tcl_AppendResult(interp, "-type should be FFT or LPC", (char *)NULL); 744 return TCL_ERROR; 745 } 746 break; 747 } 748 case LPCORDER: 749 { 750 if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpcOrder) != TCL_OK) 751 return TCL_ERROR; 752 if (CheckLPCorder(interp, lpcOrder) != TCL_OK) 753 return TCL_ERROR; 754 break; 755 } 756 } 757 } 758 759 if (CheckFFTlen(interp, fftlen) != TCL_OK) return TCL_ERROR; 760 761 if (CheckWinlen(interp, winlen, fftlen) != TCL_OK) return TCL_ERROR; 762 763 preemph = (float) dpreemph; 764 765 if (startpos < 0 || startpos > s->length - fftlen) { 766 Tcl_AppendResult(interp, "FFT window out of bounds", NULL); 767 return TCL_ERROR; 768 } 769 770 if (endpos < 0) 771 endpos = startpos; 772 773 if (endpos > s->length - 1) { 774 Tcl_AppendResult(interp, "FFT window out of bounds", NULL); 775 return TCL_ERROR; 776 } 777 778 for (i = 0; i < NMAX/2; i++) { 779 ffts[i] = 0.0; 780 } 781 782 if (s->storeType != SOUND_IN_MEMORY) { 783 if (OpenLinkedFile(s, &info) != TCL_OK) { 784 return TCL_OK; 785 } 786 } 787 788 Snack_InitWindow(hamwin, winlen, fftlen, wintype); 789 790 Snack_InitFFT(fftlen); 791 792 if (skip < 1) { 793 skip = fftlen; 794 } 795 siglen = endpos - startpos; 796 n = siglen / skip; 797 if (n < 1) { 798 n = 1; 799 } 800 801 if (s->nchannels == 1) { 802 channel = 0; 803 } 804 805 if (type != 0 && n > 0) { /* LPC + FFT */ 806 if (siglen < fftlen) siglen = fftlen; 807 sig_lpc = (float *) ckalloc(siglen * sizeof(float)); 808 809 GetFloatMonoSig(s, &info, sig_lpc, startpos, siglen, channel); 810 if (startpos > 0) 811 GetFloatMonoSig(s, &info, &presample, startpos - 1, 1, channel); 812 PreEmphase(sig_lpc, presample, siglen, preemph); 813 814 /* windowing signal to make lpc look more like the fft spectrum ??? */ 815 for (i = 0; i < winlen/2; i++) { 816 sig_lpc[i] = sig_lpc[i] * hamwin[i]; 817 } 818 for (i = winlen/2; i < winlen; i++) { 819 sig_lpc[i + siglen - winlen] = sig_lpc[i + siglen - winlen] * hamwin[i]; 820 } 821 822 g_lpc = LpcAnalysis(sig_lpc, siglen, xfft, lpcOrder); 823 ckfree((char *) sig_lpc); 824 825 for (i = 0; i <= lpcOrder; i++) { 826 /* the factor is a guess, try looking for analytical value */ 827 xfft[i] = xfft[i] * 5000000000.0f; 828 } 829 for (i = lpcOrder + 1; i < fftlen; i++) { 830 xfft[i] = 0.0; 831 } 832 833 Snack_PowerSpectrum(xfft); 834 835 for (i = 0; i < fftlen/2; i++) { 836 ffts[i] = -xfft[i]; 837 } 838 839 } else { /* usual FFT */ 840 841 for (j = 0; j < n; j++) { 842 if (s->storeType == SOUND_IN_MEMORY) { 843 if (s->nchannels == 1 || channel != -1) { 844 int p = (startpos + j * skip) * s->nchannels + channel; 845 846 for (i = 0; i < fftlen; i++) { 847 /* if (i < 4) printf("a %f %d\n", FSAMPLE(s, i), n);*/ 848 xfft[i] = (float) ((FSAMPLE(s, p + s->nchannels) 849 - preemph * FSAMPLE(s, p)) 850 * hamwin[i]); 851 /* if (i < 4) printf("b %f %f\n", xfft[i], hamwin[i]);*/ 852 p += s->nchannels; 853 } 854 } else { 855 int c; 856 857 for (i = 0; i < fftlen; i++) { 858 xfft[i] = 0.0; 859 } 860 for (c = 0; c < s->nchannels; c++) { 861 int p = (startpos + j * skip) * s->nchannels + c; 862 863 for (i = 0; i < fftlen; i++) { 864 xfft[i] += (float) ((FSAMPLE(s, p + s->nchannels) 865 - preemph * FSAMPLE(s, p)) 866 * hamwin[i]); 867 p += s->nchannels; 868 } 869 } 870 for (i = 0; i < fftlen; i++) { 871 xfft[i] /= s->nchannels; 872 } 873 } 874 } else { /* storeType != SOUND_IN_MEMORY */ 875 if (s->nchannels == 1 || channel != -1) { 876 int p = (startpos + j * skip) * s->nchannels + channel; 877 878 for (i = 0; i < fftlen; i++) { 879 xfft[i] = (float) ((GetSample(&info, p + s->nchannels) 880 - preemph * GetSample(&info, p)) 881 * hamwin[i]); 882 p += s->nchannels; 883 } 884 } else { 885 int c; 886 887 for (i = 0; i < fftlen; i++) { 888 xfft[i] = 0.0; 889 } 890 for (c = 0; c < s->nchannels; c++) { 891 int p = (startpos + j * skip) * s->nchannels + c; 892 893 for (i = 0; i < fftlen; i++) { 894 xfft[i] += (float) ((GetSample(&info, p + s->nchannels) 895 - preemph * GetSample(&info, p)) 896 * hamwin[i]); 897 p += s->nchannels; 898 } 899 } 900 for (i = 0; i < fftlen; i++) { 901 xfft[i] /= s->nchannels; 902 } 903 } 904 } 905 906 Snack_PowerSpectrum(xfft); 907 for (i = 0; i < fftlen/2; i++) { 908 ffts[i] += (float)sqrt(xfft[i]); 909 /* if (i < 4) printf("power %f %f\n", xfft[i],ffts[i]);*/ 910 } 911 } 912 } 913 914 if (s->storeType != SOUND_IN_MEMORY) { 915 CloseLinkedFile(&info); 916 } 917 918 for (i = 0; i < fftlen/2; i++) { 919 ffts[i] = ffts[i] / (float) n; 920 } 921 922 list = Tcl_NewListObj(0, NULL); 923 for (i = 0; i < fftlen/2; i++) { 924 Tcl_ListObjAppendElement(interp, list, Tcl_NewDoubleObj(ffts[i])); 925 } 926 927 Tcl_SetObjResult(interp, list); 928 929 if (s->debug > 0) Snack_WriteLog("Exit powerSpectrumCmd\n"); 930 931 return TCL_OK; 932} 933 934#define SNACK_DEFAULT_POWERWINTYPE SNACK_WIN_HAMMING 935 936int 937powerCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 938{ 939 double dpreemph = 0.0, dscale = 1.0, dframelen = -1.0; 940 float preemph = 0.0, scale = 1.0; 941 int i, j, n = 0; 942 int channel = 0, winlen = 256; 943 int arg, startpos = 0, endpos = -1, framelen; 944 float *powers = NULL; 945 Tcl_Obj *list; 946 SnackLinkedFileInfo info; 947 SnackWindowType wintype = SNACK_DEFAULT_POWERWINTYPE; 948 static CONST84 char *subOptionStrings[] = { 949 "-start", "-end", "-channel", "-framelength", "-windowlength", 950 "-windowtype", "-preemphasisfactor", "-scale", "-progress", NULL 951 }; 952 enum subOptions { 953 START, END, CHANNEL, FRAMELEN, WINDOWLEN, WINTYPE, PREEMPH, SCALE, 954 PROGRESS 955 }; 956 957 if (s->debug > 0) { Snack_WriteLog("Enter powerCmd\n"); } 958 959 if (s->cmdPtr != NULL) { 960 Tcl_DecrRefCount(s->cmdPtr); 961 s->cmdPtr = NULL; 962 } 963 964 for (arg = 2; arg < objc; arg += 2) { 965 int index; 966 967 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 968 "option", 0, &index) != TCL_OK) { 969 return TCL_ERROR; 970 } 971 972 if (arg + 1 == objc) { 973 Tcl_AppendResult(interp, "No argument given for ", 974 subOptionStrings[index], " option", (char *) NULL); 975 return TCL_ERROR; 976 } 977 978 switch ((enum subOptions) index) { 979 case START: 980 { 981 if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK) 982 return TCL_ERROR; 983 break; 984 } 985 case END: 986 { 987 if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK) 988 return TCL_ERROR; 989 break; 990 } 991 case CHANNEL: 992 { 993 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 994 if (GetChannel(interp, str, s->nchannels, &channel) != TCL_OK) { 995 return TCL_ERROR; 996 } 997 break; 998 } 999 case FRAMELEN: 1000 { 1001 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dframelen) != TCL_OK) 1002 return TCL_ERROR; 1003 break; 1004 } 1005 case WINDOWLEN: 1006 { 1007 if (Tcl_GetIntFromObj(interp, objv[arg+1], &winlen) != TCL_OK) 1008 return TCL_ERROR; 1009 break; 1010 } 1011 case WINTYPE: 1012 { 1013 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 1014 if (GetWindowType(interp, str, &wintype) != TCL_OK) 1015 return TCL_ERROR; 1016 break; 1017 } 1018 case PREEMPH: 1019 { 1020 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dpreemph) != TCL_OK) 1021 return TCL_ERROR; 1022 break; 1023 } 1024 case SCALE: 1025 { 1026 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dscale) != TCL_OK) 1027 return TCL_ERROR; 1028 break; 1029 } 1030 case PROGRESS: 1031 { 1032 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 1033 1034 if (strlen(str) > 0) { 1035 Tcl_IncrRefCount(objv[arg+1]); 1036 s->cmdPtr = objv[arg+1]; 1037 } 1038 break; 1039 } 1040 } 1041 } 1042 1043 if (winlen < 1) { 1044 Tcl_AppendResult(interp, "-windowlength must be > 0", NULL); 1045 return TCL_ERROR; 1046 } 1047 if (winlen > NMAX) { 1048 char str[10]; 1049 1050 sprintf(str, "%d", NMAX); 1051 Tcl_AppendResult(interp, "-windowlength must be <= ", str, NULL); 1052 return TCL_ERROR; 1053 } 1054 1055 preemph = (float) dpreemph; 1056 scale = (float) dscale; 1057 1058 if (startpos < 0) startpos = 0; 1059 if (endpos >= (s->length - 1) || endpos == -1) 1060 endpos = s->length - 1; 1061 if (startpos > endpos) return TCL_OK; 1062 1063 if (s->storeType != SOUND_IN_MEMORY) { 1064 if (OpenLinkedFile(s, &info) != TCL_OK) { 1065 return TCL_OK; 1066 } 1067 } 1068 1069 Snack_InitWindow(hamwin, winlen, winlen, wintype); 1070 1071 if (dframelen == -1.0) { 1072 dframelen = 0.01; 1073 } 1074 framelen = (int) (dframelen * s->samprate); 1075 n = (endpos - startpos - winlen / 2) / framelen + 1; 1076 if (n < 1) { 1077 n = 1; 1078 } 1079 if (s->nchannels == 1) { 1080 channel = 0; 1081 } 1082 1083 powers = (float *) ckalloc(sizeof(float) * n); 1084 1085 Snack_ProgressCallback(s->cmdPtr, interp, "Computing power", 0.0); 1086 1087 for (j = 0; j < n; j++) { 1088 float power; 1089 int winstart = 0; 1090 if (s->storeType == SOUND_IN_MEMORY) { 1091 if (s->nchannels == 1 || channel != -1) { 1092 int p = (startpos + j * framelen - winlen/2) * s->nchannels + channel; 1093 1094 if (p < 0) p = 0; 1095 if (p < winlen / 2) winstart = winlen / 2 - p; 1096 for (i = winstart; i < winlen; i++) { 1097 xfft[i] = (float) ((FSAMPLE(s, p + s->nchannels) 1098 - preemph * FSAMPLE(s, p)) 1099 * hamwin[i]); 1100 p += s->nchannels; 1101 } 1102 } else { 1103 int c; 1104 1105 for (i = 0; i < winlen; i++) { 1106 xfft[i] = 0.0; 1107 } 1108 for (c = 0; c < s->nchannels; c++) { 1109 int p = (startpos + j * framelen - winlen/2) * s->nchannels + c; 1110 1111 if (p < 0) p = 0; 1112 if (p < winlen / 2) winstart = winlen / 2 - p; 1113 for (i = winstart; i < winlen; i++) { 1114 xfft[i] += (float) ((FSAMPLE(s, p + s->nchannels) 1115 - preemph * FSAMPLE(s, p)) 1116 * hamwin[i]); 1117 p += s->nchannels; 1118 } 1119 } 1120 for (i = 0; i < winlen; i++) { 1121 xfft[i] /= s->nchannels; 1122 } 1123 } 1124 } else { /* storeType != SOUND_IN_MEMORY */ 1125 if (s->nchannels == 1 || channel != -1) { 1126 int p = (startpos + j * framelen - winlen/2) * s->nchannels + channel; 1127 1128 if (p < 0) p = 0; 1129 if (p < winlen / 2) winstart = winlen / 2 - p; 1130 for (i = winstart; i < winlen; i++) { 1131 xfft[i] = (float) ((GetSample(&info, p + s->nchannels) 1132 - preemph * GetSample(&info, p)) 1133 * hamwin[i]); 1134 p += s->nchannels; 1135 } 1136 } else { 1137 int c; 1138 1139 for (i = 0; i < winlen; i++) { 1140 xfft[i] = 0.0; 1141 } 1142 for (c = 0; c < s->nchannels; c++) { 1143 int p = (startpos + j * framelen - winlen/2) * s->nchannels + c; 1144 1145 if (p < 0) p = 0; 1146 if (p < winlen / 2) winstart = winlen / 2 - p; 1147 for (i = winstart; i < winlen; i++) { 1148 xfft[i] += (float) ((GetSample(&info, p + s->nchannels) 1149 - preemph * GetSample(&info, p)) 1150 * hamwin[i]); 1151 p += s->nchannels; 1152 } 1153 } 1154 for (i = 0; i < winlen; i++) { 1155 xfft[i] /= s->nchannels; 1156 } 1157 } 1158 } 1159 1160 power = 0.0f; 1161 for (i = winstart; i < winlen; i++) { 1162 power += xfft[i] * xfft[i]; 1163 } 1164 if (power < 1.0) power = 1.0; 1165 powers[j] = (float) (SNACK_DB * log(scale * power / (float)(winlen - winstart))); 1166 1167 if ((j % 10000) == 9999) { 1168 int res = Snack_ProgressCallback(s->cmdPtr, interp, "Computing power", 1169 (double) j / n); 1170 if (res != TCL_OK) { 1171 ckfree((char *) powers); 1172 return TCL_ERROR; 1173 } 1174 } 1175 } 1176 1177 Snack_ProgressCallback(s->cmdPtr, interp, "Computing power", 1.0); 1178 1179 list = Tcl_NewListObj(0, NULL); 1180 for (i = 0; i < n; i++) { 1181 Tcl_ListObjAppendElement(interp,list, Tcl_NewDoubleObj((double)powers[i])); 1182 } 1183 Tcl_SetObjResult(interp, list); 1184 1185 if (s->storeType != SOUND_IN_MEMORY) { 1186 CloseLinkedFile(&info); 1187 } 1188 ckfree((char *) powers); 1189 1190 if (s->debug > 0) { Snack_WriteLog("Exit powerCmd\n"); } 1191 1192 return TCL_OK; 1193} 1194 1195float 1196mel(float f) 1197{ 1198 return((float)(1127.0 * log(1.0f + f / 700.0f))); 1199} 1200 1201float regarr[15][5]; 1202 1203static void 1204obsv(Sound *s, int nStatCoeffs, float *v, int t, int nReg) 1205{ 1206 int i, j, ne, pr; 1207 float sum; 1208 1209 for (i = 0; i < nStatCoeffs; i++) { 1210 for (j = 1; j < 5; j++) { 1211 regarr[i][j-1] = regarr[i][j]; 1212 } 1213 } 1214 1215 if (t >= 0) { 1216 for (i = 0; i < nStatCoeffs; i++) { 1217 v[i] = FSAMPLE(s, s->nchannels * t + i); 1218 } 1219 } 1220 if (nReg) { 1221 for (i = 0; i < nStatCoeffs; i++) { 1222 sum = 0.0f; 1223 for (j = 1; j <= 2; j++) { 1224 pr = t + 2 - j; 1225 if (pr < 0) pr = 0; 1226 ne = t + 2 + j; 1227 if (ne >= s->length) ne = s->length - 1; 1228 sum += j * (FSAMPLE(s, s->nchannels * ne + i)-FSAMPLE(s, s->nchannels * pr + i)); 1229 } 1230 regarr[i][4] = sum / 10.0f; 1231 v[i+nStatCoeffs] = regarr[i][2]; 1232 } 1233 } 1234 if (nReg > 1) { 1235 for (i = 0; i < nStatCoeffs; i++) { 1236 sum = 0.0f; 1237 for (j = 1; j <= 2; j++) { 1238 pr = 2 - j; 1239 if (t == 0) pr = 2; 1240 if (t == 1 && j == 2) pr = 1; 1241 ne = 2 + j; 1242 if (t == s->length-1) ne = 2; 1243 if (t == s->length-2 && j == 2) ne = 3; 1244 sum += j * (regarr[i][ne]-regarr[i][pr]); 1245 } 1246 v[i+2*nStatCoeffs] = sum / 10.0f; 1247 } 1248 } 1249 1250 /* for (i = 0; i < 13; i++) { printf("%f ",v[i]); } printf("\n"); 1251 for (i=13; i < 26; i++) { printf("%f ",v[i]); } printf("\n"); 1252 for (i=26; i < 39; i++) { printf("%f ",v[i]); } printf("\n");*/ 1253} 1254 1255int 1256speaturesCmd(Sound *s, Tcl_Interp *interp, int objc, 1257 Tcl_Obj *CONST objv[]) 1258{ 1259 double tmpdouble = 0.0, dframelen = 0.01; 1260 float preemph = 0.97f, lowcut = 0.0f, highcut = (float) (s->samprate / 2.0); 1261 int i, j, k, n = 1, arg; 1262 int channel = 0; 1263 double dwinlen = 0.0256; 1264 int winlen; 1265 int fftlen = 2; 1266 int startpos = 0, endpos = -1, framelen; 1267 SnackLinkedFileInfo info; 1268 SnackWindowType wintype = SNACK_DEFAULT_DBPWINTYPE; 1269 static CONST84 char *subOptionStrings[] = { 1270 "-start", "-end", "-channel", "-framelength", "-windowlength", 1271 "-preemphasisfactor", "-windowtype", 1272 "-nchannels", "-ncoeff", "-cepstrallifter", "-energy", "-zeromean", 1273 "-zerothcepstralcoeff", "-lowcutoff", "-highcutoff", 1274 "-regressionorder", NULL 1275 }; 1276 enum subOptions { 1277 START, END, CHANNEL, FRAMELEN, WINDOWLEN, PREEMPH, WINTYPE, 1278 NCHANNELS, NCOEFF, CEPLIFT, ENERGY, ZEROMEAN, ZEROTHCC, 1279 LOWCUT, HIGHCUT, REGRESSION 1280 }; 1281 float *outarr; 1282 int numchannels = 20, ncoeff = 12; 1283 int lowInd, highInd; 1284 float lowMel, highMel; 1285 float cf[40]; 1286 float fb[40]; 1287 float lifter[20]; 1288 float mfcc[20]; 1289 int map[512]; 1290 float fbw[512]; 1291 float frame[1024]; 1292 double ceplifter = 22.0; 1293 int doEnergy = 0, doZeroMean = 0, doZerothCC = 0, regression = 0; 1294 int vectorLength, regVecLen; 1295 Sound *outsnd; 1296 char *string; 1297 1298 if (s->debug > 0) Snack_WriteLog("Enter speaturesCmd\n"); 1299 1300 string = Tcl_GetStringFromObj(objv[2], NULL); 1301 1302 if ((outsnd = Snack_GetSound(interp, string)) == NULL) { 1303 return TCL_ERROR; 1304 } 1305 1306 for (arg = 3; arg < objc; arg += 2) { 1307 int index; 1308 1309 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 1310 "option", 0, &index) != TCL_OK) { 1311 return TCL_ERROR; 1312 } 1313 1314 if (arg + 1 == objc) { 1315 Tcl_AppendResult(interp, "No argument given for ", 1316 subOptionStrings[index], " option", (char *) NULL); 1317 return TCL_ERROR; 1318 } 1319 1320 switch ((enum subOptions) index) { 1321 case START: 1322 { 1323 if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK) 1324 return TCL_ERROR; 1325 break; 1326 } 1327 case END: 1328 { 1329 if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK) 1330 return TCL_ERROR; 1331 break; 1332 } 1333 case CHANNEL: 1334 { 1335 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 1336 if (GetChannel(interp, str, s->nchannels, &channel) != TCL_OK) { 1337 return TCL_ERROR; 1338 } 1339 break; 1340 } 1341 case FRAMELEN: 1342 { 1343 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dframelen) != TCL_OK) 1344 return TCL_ERROR; 1345 break; 1346 } 1347 case WINDOWLEN: 1348 { 1349 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &dwinlen) != TCL_OK) 1350 return TCL_ERROR; 1351 break; 1352 } 1353 case PREEMPH: 1354 { 1355 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &tmpdouble) != TCL_OK) 1356 return TCL_ERROR; 1357 preemph = (float) tmpdouble; 1358 break; 1359 } 1360 case WINTYPE: 1361 { 1362 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 1363 if (GetWindowType(interp, str, &wintype) != TCL_OK) 1364 return TCL_ERROR; 1365 break; 1366 } 1367 case NCHANNELS: 1368 { 1369 if (Tcl_GetIntFromObj(interp, objv[arg+1], &numchannels) != TCL_OK) 1370 return TCL_ERROR; 1371 break; 1372 } 1373 case NCOEFF: 1374 { 1375 if (Tcl_GetIntFromObj(interp, objv[arg+1], &ncoeff) != TCL_OK) 1376 return TCL_ERROR; 1377 break; 1378 } 1379 case CEPLIFT: 1380 { 1381 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &ceplifter) != TCL_OK) 1382 return TCL_ERROR; 1383 break; 1384 } 1385 case ENERGY: 1386 { 1387 if (Tcl_GetIntFromObj(interp, objv[arg+1], &doEnergy) != TCL_OK) 1388 return TCL_ERROR; 1389 break; 1390 } 1391 case ZEROMEAN: 1392 { 1393 if (Tcl_GetIntFromObj(interp, objv[arg+1], &doZeroMean) != TCL_OK) 1394 return TCL_ERROR; 1395 break; 1396 } 1397 case ZEROTHCC: 1398 { 1399 if (Tcl_GetIntFromObj(interp, objv[arg+1], &doZerothCC) != TCL_OK) 1400 return TCL_ERROR; 1401 break; 1402 } 1403 case LOWCUT: 1404 { 1405 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &tmpdouble) != TCL_OK) 1406 return TCL_ERROR; 1407 lowcut = (float) tmpdouble; 1408 break; 1409 } 1410 case HIGHCUT: 1411 { 1412 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &tmpdouble) != TCL_OK) 1413 return TCL_ERROR; 1414 highcut = (float) tmpdouble; 1415 break; 1416 } 1417 case REGRESSION: 1418 { 1419 if (Tcl_GetIntFromObj(interp, objv[arg+1], ®ression) != TCL_OK) 1420 return TCL_ERROR; 1421 break; 1422 } 1423 } 1424 } 1425 1426 winlen = (int) (s->samprate * dwinlen); 1427 1428 if ((startpos < 0 || startpos > s->length - fftlen) && s->length > 0) { 1429 Tcl_AppendResult(interp, "FFT window out of bounds", NULL); 1430 return TCL_ERROR; 1431 } 1432 1433 if (startpos < 0) startpos = 0; 1434 if (endpos >= (s->length - 1) || endpos == -1) 1435 endpos = s->length - 1; 1436 if (startpos > endpos) return TCL_OK; 1437 1438 if (s->storeType != SOUND_IN_MEMORY) { 1439 if (OpenLinkedFile(s, &info) != TCL_OK) { 1440 return TCL_OK; 1441 } 1442 } 1443 1444 while (fftlen < winlen) fftlen *= 2; 1445 1446 framelen = (int) (dframelen * s->samprate); 1447 n = (endpos - startpos - winlen) / framelen + 1; 1448 if (n < 1) { 1449 n = 0; 1450 } 1451 1452 vectorLength = ncoeff; 1453 if (doEnergy) vectorLength++; 1454 if (doZerothCC) vectorLength++; 1455 1456 outarr = (float *) ckalloc(sizeof(float) * vectorLength * n + 1); 1457 1458 lowMel = (float) mel(lowcut); 1459 lowInd = (int) ((lowcut * fftlen / s->samprate) + 1.5); 1460 if (lowInd < 1) lowInd = 1; 1461 highMel = (float) mel(highcut); 1462 highInd = (int) ((highcut * fftlen / s->samprate) - 0.5); 1463 if (highInd > fftlen / 2 - 1) highInd = fftlen / 2 - 1; 1464 1465 for (i = 0; i < numchannels + 1; i++) { 1466 cf[i] = ((float) (i+1) / (numchannels+1)) * (highMel - lowMel) + lowMel; 1467 } 1468 1469 for (i = 0; i < fftlen / 2; i++) { 1470 float melInd = mel((float) i * s->samprate / fftlen); 1471 float new; 1472 int ch = 0; 1473 1474 if (i < lowInd || i > highInd) { 1475 map[i] = -1; 1476 new = 0.0; 1477 } else { 1478 while (cf[ch] < melInd && ch < numchannels + 1) ++ch; 1479 map[i] = ch - 1; 1480 if (ch - 1 >= 0) { 1481 new = ((cf[ch] - mel((float) i*s->samprate/fftlen)) / 1482 (cf[ch] - cf[ch-1])); 1483 } else { 1484 new = (cf[0] - mel((float) i*s->samprate/fftlen)) / (cf[0] - lowMel); 1485 } 1486 } 1487 fbw[i] = new; 1488 } 1489 1490 for (i = 0; i <= ncoeff; i++) { 1491 lifter[i] = (float)(1.0 + ceplifter /2.0 * sin(SNACK_PI * i / ceplifter)); 1492 } 1493 1494 Snack_InitWindow(hamwin, winlen, fftlen, wintype); 1495 1496 Snack_InitFFT(fftlen); 1497 1498 for (j = 0; j < n; j++) { 1499 float sum = 0.0; 1500 1501 for (i = 0; i < fftlen; i++) { 1502 frame[i] = 0.0f; 1503 } 1504 if (s->storeType == SOUND_IN_MEMORY) { 1505 if (s->nchannels == 1 || channel != -1) { 1506 int p = (startpos + j * framelen) * s->nchannels + channel; 1507 1508 for (i = 0; i < winlen; i++) { 1509 frame[i] = FSAMPLE(s, p); 1510 p += s->nchannels; 1511 } 1512 } else { 1513 int c; 1514 1515 for (c = 0; c < s->nchannels; c++) { 1516 int p = (startpos + j * framelen) * s->nchannels + c; 1517 1518 for (i = 0; i < winlen; i++) { 1519 frame[i] += FSAMPLE(s, p); 1520 p += s->nchannels; 1521 } 1522 } 1523 for (i = 0; i < winlen; i++) { 1524 frame[i] /= s->nchannels; 1525 } 1526 } 1527 } else { /* storeType != SOUND_IN_MEMORY */ 1528 if (s->nchannels == 1 || channel != -1) { 1529 int p = (startpos + j * framelen) * s->nchannels + channel; 1530 1531 for (i = 0; i < winlen; i++) { 1532 frame[i] = GetSample(&info, p); 1533 p += s->nchannels; 1534 } 1535 } else { 1536 int c; 1537 1538 for (c = 0; c < s->nchannels; c++) { 1539 int p = (startpos + j * framelen) * s->nchannels + c; 1540 1541 for (i = 0; i < winlen; i++) { 1542 frame[i] += GetSample(&info, p); 1543 p += s->nchannels; 1544 } 1545 } 1546 for (i = 0; i < winlen; i++) { 1547 frame[i] /= s->nchannels; 1548 } 1549 } 1550 } 1551 1552 for (i = 0; i < winlen; i++) { 1553 float sample = frame[i]; 1554 sum += sample * sample; 1555 } 1556 if (sum < 1.0) sum = 1.0f; 1557 1558 xfft[0] = (float) (1.0f - preemph) * frame[0] * hamwin[0]; 1559 for (i = 1; i < fftlen; i++) { 1560 xfft[i] = (float) ((frame[i] - preemph * frame[i - 1]) * hamwin[i]); 1561 } 1562 1563 Snack_PowerSpectrum(xfft); 1564 1565 for (i = 0; i < numchannels + 1; i++) { 1566 fb[i] = 0.0; 1567 } 1568 for (i = lowInd; i <= highInd; i++) { 1569 float e = (float) (0.5 * sqrt(xfft[i])); 1570 int bin = map[i]; 1571 float we = fbw[i] * e; 1572 1573 if (bin > -1) fb[bin] += we; 1574 if (bin < numchannels - 1) fb[bin + 1] += e - we; 1575 } 1576 for (i = 0; i < numchannels; i++) { 1577 if (fb[i] < 1.0) { 1578 fb[i] = 0.0f; 1579 } else { 1580 fb[i] = (float) log(fb[i]); 1581 } 1582 } 1583 1584 for (i = 0; i < ncoeff; i++) { 1585 mfcc[i] = 0.0f; 1586 for (k = 0; k < numchannels; k++) { 1587 mfcc[i] += (float) (fb[k] * cos(SNACK_PI * (i + 1) / numchannels * 1588 (k + 0.5f))); 1589 } 1590 mfcc[i] *= (float) (sqrt(2.0f / numchannels) * lifter[i + 1]); 1591 } 1592 mfcc[ncoeff] = (float) log(sum); 1593 for (i = 0; i < ncoeff + 1; i++) { 1594 outarr[j * vectorLength + i] = mfcc[i]; 1595 } 1596 if (doZerothCC) { 1597 float sum = 0.0f; 1598 1599 for (i = 0; i < numchannels; i++) { 1600 sum += fb[i]; 1601 } 1602 outarr[j*vectorLength+ncoeff] = (float) (sum * sqrt(2.0 / numchannels)); 1603 } 1604 } /* for (j = 0;... */ 1605 1606 if (doZeroMean) { 1607 for (i = 0; i < ncoeff; i++) { 1608 float sum = 0.0f, offset; 1609 1610 for (j = 0; j < n; j++) { 1611 sum += outarr[j * vectorLength + i]; 1612 } 1613 offset = sum / n; 1614 for (j = 0; j < n; j++) { 1615 outarr[j * vectorLength + i] -= offset; 1616 } 1617 } 1618 } 1619 if (doEnergy) { 1620 float max = -1000000.0f, min; 1621 1622 for (i = ncoeff; i < n * vectorLength; i += vectorLength) { 1623 if (outarr[i] > max) max = outarr[i]; 1624 } 1625 min = (float) (max - 50.0f * log(10.0f) / 10.0f); 1626 for (i = ncoeff; i < n * vectorLength; i += vectorLength) { 1627 if (outarr[i] < min) outarr[i] = min; 1628 outarr[i] = 1.0f - 0.1f * (max - outarr[i]); 1629 } 1630 } 1631 1632 if (s->storeType != SOUND_IN_MEMORY) { 1633 CloseLinkedFile(&info); 1634 } 1635 regVecLen = vectorLength * (regression + 1); 1636 if (outsnd->nchannels < regVecLen) { 1637 outsnd->nchannels = regVecLen; 1638 } 1639 if (Snack_ResizeSoundStorage(outsnd, n) != TCL_OK) { 1640 return TCL_ERROR; 1641 } 1642 outsnd->length = n; 1643 1644 for (i = 0; i < n; i++) { 1645 for (k = 0; k < vectorLength; k++) { 1646 FSAMPLE(outsnd, i*outsnd->nchannels+k) = outarr[i*vectorLength+k]; 1647 } 1648 } 1649 1650 if (regression) { 1651 float obs[45]; 1652 1653 for (k = -2; k < 2; k++) { 1654 for (i = 0; i < vectorLength; i++) { 1655 float sum = 0.0f; 1656 for (j = 1; j <= 2; j++) { 1657 int pr = k - j; 1658 int ne = k + j; 1659 if (pr < 0) pr = 0; 1660 if (ne < 0) ne = 0; 1661 if (ne >= n) ne = n - 1; 1662 sum += j * (FSAMPLE(outsnd, outsnd->nchannels * ne + i) 1663 - FSAMPLE(outsnd, outsnd->nchannels * pr + i)); 1664 } 1665 regarr[i][k+3] = sum / 10.0f; 1666 } 1667 } 1668 for (i = 0; i < n; i++) { 1669 obsv(outsnd, vectorLength, obs, i, regression); 1670 for (k = vectorLength; k < regVecLen; k++) { 1671 FSAMPLE(outsnd, i * outsnd->nchannels + k) = obs[k]; 1672 } 1673 } 1674 } 1675 1676 ckfree((char*) outarr); 1677 1678 outsnd->encoding = SNACK_FLOAT; 1679 outsnd->samprate = (int) (1.0 / dframelen + 0.5); 1680 Snack_UpdateExtremes(outsnd, 0, n, SNACK_NEW_SOUND); 1681 Snack_ExecCallbacks(outsnd, SNACK_NEW_SOUND); 1682 1683 if (s->debug > 0) Snack_WriteLog("Exit speaturesCmd\n"); 1684 1685 return TCL_OK; 1686} 1687 1688extern int cGet_f0(Sound *s, Tcl_Interp *interp, float **outlist, int *length); 1689 1690static int 1691searchZX(Sound *s, int pos) 1692{ 1693 int i, j; 1694 1695 for(i = 0; i < 20000; i++) { 1696 j = pos + i; 1697 if (j>0 && j<s->length && FSAMPLE(s, j-1) < 0.0 && FSAMPLE(s, j) >= 0.0) { 1698 return(j); 1699 } 1700 j = pos - i; 1701 if (j>0 && j<s->length && FSAMPLE(s, j-1) < 0.0 && FSAMPLE(s, j) >= 0.0) { 1702 return(j); 1703 } 1704 } 1705 return(pos); 1706} 1707 1708int 1709stretchCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1710{ 1711 int i, ind, last, start, arg, segOnly = 0; 1712 static CONST84 char *subOptionStrings[] = { 1713 "-segments", NULL 1714 }; 1715 enum subOptions { 1716 SEGMENTS 1717 }; 1718 float *cPitchList; 1719 int *segs; 1720 int *sege; 1721 int cPitchLength = 0; 1722 int skip = s->samprate / 100; 1723 1724 if (s->debug > 0) Snack_WriteLog("Enter stretchCmd\n"); 1725 1726 for (arg = 2; arg < objc; arg += 2) { 1727 int index; 1728 1729 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 1730 "option", 0, &index) != TCL_OK) { 1731 return TCL_ERROR; 1732 } 1733 1734 if (arg + 1 == objc) { 1735 Tcl_AppendResult(interp, "No argument given for ", 1736 subOptionStrings[index], " option", (char *) NULL); 1737 return TCL_ERROR; 1738 } 1739 1740 switch ((enum subOptions) index) { 1741 case SEGMENTS: 1742 { 1743 if (Tcl_GetIntFromObj(interp, objv[arg+1], &segOnly) != TCL_OK) 1744 return TCL_ERROR; 1745 break; 1746 } 1747 } 1748 } 1749 1750 if (s->length == 0) { 1751 return TCL_OK; 1752 } 1753 1754 cGet_f0(s, interp, &cPitchList, &cPitchLength); 1755 1756 ind = 0; 1757 start = 0; 1758 last = 0; 1759 segs = (int *) ckalloc(sizeof(int) * cPitchLength * 2); 1760 sege = (int *) ckalloc(sizeof(int) * cPitchLength * 2); 1761 1762 if (s->length < 8000 && cPitchList[0] == 0.0 && cPitchList[1] == 0.0 && \ 1763 cPitchList[2] == 0.0) { 1764 } else { 1765 for (i = 1; i < s->length; i++) { 1766 int pitchListIndex = (int) (i/(float)skip+.5); 1767 float point; 1768 1769 if (pitchListIndex >= cPitchLength) { 1770 pitchListIndex = cPitchLength - 1; 1771 } 1772 if (ind >= cPitchLength*2) { 1773 ind = cPitchLength * 2 - 1; 1774 } 1775 point = cPitchList[pitchListIndex]; 1776 1777 if (point == 0.0) { 1778 i += 9; 1779 continue; 1780 } 1781 if (start == 0) { 1782 i = searchZX(s, (int)(i+s->samprate/point)); 1783 segs[ind] = start; 1784 sege[ind] = i; 1785 start = i; 1786 ind++; 1787 } else { 1788 i = searchZX(s, (int)(i+s->samprate/point)); 1789 if (i == last) { 1790 int j = i + 10; 1791 while (last == i) { 1792 i = searchZX(s, j); 1793 j += 10; 1794 } 1795 } 1796 /* this is needed to make stretch.test happy, can surely be improved*/ 1797 if (i - last < (int)(0.8*s->samprate/point) && s->length - i < 200) { 1798 i = -1; 1799 } 1800 last = i; 1801 if (i > 0) { 1802 segs[ind] = start; 1803 sege[ind] = i; 1804 start = i; 1805 ind++; 1806 } else { 1807 segs[ind] = start; 1808 sege[ind] = s->length; 1809 start = s->length; 1810 ind++; 1811 break; 1812 } 1813 } 1814 } 1815 if (ind == 0) { 1816 segs[ind] = start; 1817 ind = 1; 1818 } 1819 sege[ind-1] = s->length-1; 1820 } 1821 1822 if (segOnly) { 1823 Tcl_Obj *list = Tcl_NewListObj(0, NULL); 1824 for (i = 0; i < ind; i++) { 1825 Tcl_ListObjAppendElement(interp, list, 1826 Tcl_NewIntObj((int) segs[i])); 1827 } 1828 Tcl_SetObjResult(interp, list); 1829 ckfree((char *)segs); 1830 ckfree((char *)sege); 1831 ckfree((char *)cPitchList); 1832 1833 if (s->debug > 0) Snack_WriteLog("Exit stretchCmd\n"); 1834 1835 return TCL_OK; 1836 } 1837 1838 return TCL_OK; 1839} 1840 1841int 1842joinCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1843{ 1844 return TCL_OK; 1845} 1846 1847int 1848ocCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1849{ 1850 return TCL_OK; 1851} 1852 1853int 1854fitCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1855{ 1856 return TCL_OK; 1857} 1858 1859#define PI 3.141592653589793 1860 1861double singtabf[32]; 1862double singtabb[32]; 1863float futdat[512+10]; 1864float smerg[512+10]; 1865 1866int 1867inaCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1868{ 1869 float a1[32],a2[32],mn[32]; 1870 int i,j,noli,nofpts=512,shobeg=2,nosing; 1871 int start; 1872 int plLen = 0; 1873 Tcl_Obj** plElems; 1874 Tcl_Obj *list, *listInv, *listFlow; 1875 1876 if (Tcl_GetIntFromObj(interp, objv[2], &start) != TCL_OK) return TCL_ERROR; 1877 1878 if (Tcl_ListObjGetElements(interp, objv[3], &plLen, &plElems) != TCL_OK) 1879 return TCL_ERROR; 1880 1881 nosing = plLen / 2; 1882 1883 for (i=0;i<nosing;i++) { 1884 if (Tcl_GetDoubleFromObj(interp, plElems[i], &singtabf[i]) != TCL_OK) 1885 return TCL_ERROR; 1886 if (Tcl_GetDoubleFromObj(interp, plElems[i+nosing], &singtabb[i]) != TCL_OK) 1887 return TCL_ERROR; 1888 } 1889 1890 for (i = 0; i < nofpts; i++) { 1891 futdat[i] = FSAMPLE(s, start + i); 1892 } 1893 for (i = nofpts; i < nofpts+4; i++) { 1894 futdat[i] = 0.0f; 1895 } 1896 1897 noli=0; /* zero 2nd order filter */ 1898 for (i=0;i<nosing;i++) { 1899 if ((singtabf[i]>0.0) && (singtabb[i]>0.0)) { 1900 a2[noli]=(float)exp((double)(-PI*singtabb[i]/s->samprate)); 1901 a1[noli]= -2*a2[noli]*(float)cos((double)(2.0*PI*singtabf[i]/s->samprate)); 1902 a2[noli]=a2[noli]*a2[noli]; 1903 mn[noli]=1.0f/(1.0f+a1[noli]+a2[noli]); 1904 a1[noli]=mn[noli]*a1[noli]; 1905 a2[noli]=mn[noli]*a2[noli]; 1906 noli++; 1907 } 1908 } 1909 for (j=0;j<noli;j++) { 1910 for (i=shobeg+nofpts;i>=shobeg;i--) { 1911 futdat[i]=mn[j]*futdat[i]+a1[j]*futdat[i-1]+a2[j]*futdat[i-2]; 1912 } 1913 } 1914 1915 noli=0; /* pole 2nd order filter */ 1916 for (i=0;i<nosing;i++) { 1917 if ((singtabf[i]>0.0) && (singtabb[i]<0.0)) { 1918 a2[noli]=(float)exp((double)(PI*singtabb[i]/s->samprate)); 1919 a1[noli]= -2*a2[noli]*(float)cos((double)(2.0*PI*singtabf[i]/s->samprate)); 1920 a2[noli]=a2[noli]*a2[noli]; 1921 mn[noli]=1.0f+a1[noli]+a2[noli]; 1922 noli++; 1923 } 1924 } 1925 for (j=0;j<noli;j++) { 1926 for (i=shobeg;i<shobeg+nofpts;i++) { 1927 futdat[i]=mn[j]*futdat[i]-a1[j]*futdat[i-1]-a2[j]*futdat[i-2]; 1928 } 1929 } 1930 1931 noli=0; /* pole 1st order filter */ 1932 for (i=0;i<nosing;i++) { 1933 if ((singtabf[i]==0.0) && (singtabb[i]<0.0)) { 1934 a1[noli]= -(float)exp((double)(PI*singtabb[i]/s->samprate)); 1935 mn[noli]=1.0f+a1[noli]; 1936 noli++; 1937 } 1938 } 1939 for (j=0;j<noli;j++) { 1940 for (i=shobeg;i<shobeg+nofpts;i++) { 1941 futdat[i]=mn[j]*(futdat[i]-futdat[i-1])+futdat[i-1]; 1942 } 1943 } 1944 /*shobeg = 1;*/ /* ugly fix, think about this*/ 1945 smerg[shobeg-1]=0.0; 1946 for (i=shobeg;i<shobeg+nofpts;i++) { 1947 smerg[i]=(futdat[i]-smerg[i-1])/32.0f+smerg[i-1]; 1948 } 1949 1950 list = Tcl_NewListObj(0, NULL); 1951 listInv = Tcl_NewListObj(0, NULL); 1952 listFlow = Tcl_NewListObj(0, NULL); 1953 for (i = shobeg; i < shobeg+nofpts; i++) { 1954 Tcl_ListObjAppendElement(interp, listInv, Tcl_NewDoubleObj(futdat[i])); 1955 Tcl_ListObjAppendElement(interp, listFlow, Tcl_NewDoubleObj(smerg[i])); 1956 } 1957 Tcl_ListObjAppendElement(interp, list, listInv); 1958 Tcl_ListObjAppendElement(interp, list, listFlow); 1959 Tcl_SetObjResult(interp, list); 1960 1961 return TCL_OK; 1962} 1963 1964Tcl_HashTable *arHashTable; 1965 1966int 1967Snack_arCmd(ClientData cdata, Tcl_Interp *interp, int objc, 1968 Tcl_Obj *CONST objv[]) 1969{ 1970 return TCL_OK; 1971} 1972 1973void 1974Snack_arDeleteCmd(ClientData clientData) 1975{ 1976} 1977 1978int 1979vpCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 1980{ 1981 return TCL_OK; 1982} 1983 1984Tcl_HashTable *hsetHashTable; 1985 1986int 1987Snack_HSetCmd(ClientData cdata, Tcl_Interp *interp, int objc, 1988 Tcl_Obj *CONST objv[]) 1989{ 1990 1991 return TCL_OK; 1992} 1993 1994void 1995Snack_HSetDeleteCmd(ClientData clientData) 1996{ 1997} 1998 1999int 2000alCmd(Sound *s, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 2001{ 2002 return TCL_OK; 2003} 2004 2005int 2006isynCmd(ClientData cdata, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 2007{ 2008 return TCL_OK; 2009} 2010 2011int 2012osynCmd(ClientData cdata, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]) 2013{ 2014 return TCL_OK; 2015} 2016