1/* 2 * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "config.h" 22#include "libavcodec/avfft.h" 23#include "libavutil/avassert.h" 24#include "libavutil/channel_layout.h" 25#include "libavutil/opt.h" 26#include "libavutil/xga_font_data.h" 27#include "libavutil/qsort.h" 28#include "libavutil/time.h" 29#include "avfilter.h" 30#include "internal.h" 31 32#include <math.h> 33#include <stdlib.h> 34 35#if CONFIG_LIBFREETYPE 36#include <ft2build.h> 37#include FT_FREETYPE_H 38#endif 39 40/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm 41 * start from E0 to D#10 (10 octaves) 42 * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins 43 * match with full HD resolution */ 44 45#define VIDEO_WIDTH 1920 46#define VIDEO_HEIGHT 1080 47#define FONT_HEIGHT 32 48#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2) 49#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT) 50#define BASE_FREQ 20.051392800492 51#define COEFF_CLAMP 1.0e-4 52 53typedef struct { 54 FFTSample value; 55 int index; 56} SparseCoeff; 57 58typedef struct { 59 const AVClass *class; 60 AVFrame *outpicref; 61 FFTContext *fft_context; 62 FFTComplex *fft_data; 63 FFTComplex *fft_result_left; 64 FFTComplex *fft_result_right; 65 uint8_t *spectogram; 66 SparseCoeff *coeff_sort; 67 SparseCoeff *coeffs[VIDEO_WIDTH]; 68 uint8_t *font_alpha; 69 char *fontfile; /* using freetype */ 70 int coeffs_len[VIDEO_WIDTH]; 71 uint8_t font_color[VIDEO_WIDTH]; 72 int64_t frame_count; 73 int spectogram_count; 74 int spectogram_index; 75 int fft_bits; 76 int req_fullfilled; 77 int remaining_fill; 78 double volume; 79 double timeclamp; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/ 80 float coeffclamp; /* lower coeffclamp, more precise, higher coeffclamp, faster */ 81 int fullhd; /* if true, output video is at full HD resolution, otherwise it will be halved */ 82 float gamma; /* lower gamma, more contrast, higher gamma, more range */ 83 int fps; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */ 84 int count; /* fps * count = transform rate */ 85} ShowCQTContext; 86 87#define OFFSET(x) offsetof(ShowCQTContext, x) 88#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM 89 90static const AVOption showcqt_options[] = { 91 { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_DOUBLE, { .dbl = 16 }, 0.1, 100, FLAGS }, 92 { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, 93 { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS }, 94 { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS }, 95 { "fullhd", "set full HD resolution", OFFSET(fullhd), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS }, 96 { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS }, 97 { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS }, 98 { "fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, 99 { NULL } 100}; 101 102AVFILTER_DEFINE_CLASS(showcqt); 103 104static av_cold void uninit(AVFilterContext *ctx) 105{ 106 int k; 107 108 ShowCQTContext *s = ctx->priv; 109 av_fft_end(s->fft_context); 110 s->fft_context = NULL; 111 for (k = 0; k < VIDEO_WIDTH; k++) 112 av_freep(&s->coeffs[k]); 113 av_freep(&s->fft_data); 114 av_freep(&s->fft_result_left); 115 av_freep(&s->fft_result_right); 116 av_freep(&s->coeff_sort); 117 av_freep(&s->spectogram); 118 av_freep(&s->font_alpha); 119 av_frame_free(&s->outpicref); 120} 121 122static int query_formats(AVFilterContext *ctx) 123{ 124 AVFilterFormats *formats = NULL; 125 AVFilterChannelLayouts *layouts = NULL; 126 AVFilterLink *inlink = ctx->inputs[0]; 127 AVFilterLink *outlink = ctx->outputs[0]; 128 static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }; 129 static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE }; 130 static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 }; 131 static const int samplerates[] = { 44100, 48000, -1 }; 132 133 /* set input audio formats */ 134 formats = ff_make_format_list(sample_fmts); 135 if (!formats) 136 return AVERROR(ENOMEM); 137 ff_formats_ref(formats, &inlink->out_formats); 138 139 layouts = avfilter_make_format64_list(channel_layouts); 140 if (!layouts) 141 return AVERROR(ENOMEM); 142 ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts); 143 144 formats = ff_make_format_list(samplerates); 145 if (!formats) 146 return AVERROR(ENOMEM); 147 ff_formats_ref(formats, &inlink->out_samplerates); 148 149 /* set output video format */ 150 formats = ff_make_format_list(pix_fmts); 151 if (!formats) 152 return AVERROR(ENOMEM); 153 ff_formats_ref(formats, &outlink->in_formats); 154 155 return 0; 156} 157 158#if CONFIG_LIBFREETYPE 159static void load_freetype_font(AVFilterContext *ctx) 160{ 161 static const char str[] = "EF G A BC D "; 162 ShowCQTContext *s = ctx->priv; 163 FT_Library lib = NULL; 164 FT_Face face = NULL; 165 int video_scale = s->fullhd ? 2 : 1; 166 int video_width = (VIDEO_WIDTH/2) * video_scale; 167 int font_height = (FONT_HEIGHT/2) * video_scale; 168 int font_width = 8 * video_scale; 169 int font_repeat = font_width * 12; 170 int linear_hori_advance = font_width * 65536; 171 int non_monospace_warning = 0; 172 int x; 173 174 s->font_alpha = NULL; 175 176 if (!s->fontfile) 177 return; 178 179 if (FT_Init_FreeType(&lib)) 180 goto fail; 181 182 if (FT_New_Face(lib, s->fontfile, 0, &face)) 183 goto fail; 184 185 if (FT_Set_Char_Size(face, 16*64, 0, 0, 0)) 186 goto fail; 187 188 if (FT_Load_Char(face, 'A', FT_LOAD_RENDER)) 189 goto fail; 190 191 if (FT_Set_Char_Size(face, 16*64 * linear_hori_advance / face->glyph->linearHoriAdvance, 0, 0, 0)) 192 goto fail; 193 194 s->font_alpha = av_malloc(font_height * video_width); 195 if (!s->font_alpha) 196 goto fail; 197 198 memset(s->font_alpha, 0, font_height * video_width); 199 200 for (x = 0; x < 12; x++) { 201 int sx, sy, rx, bx, by, dx, dy; 202 203 if (str[x] == ' ') 204 continue; 205 206 if (FT_Load_Char(face, str[x], FT_LOAD_RENDER)) 207 goto fail; 208 209 if (face->glyph->advance.x != font_width*64 && !non_monospace_warning) { 210 av_log(ctx, AV_LOG_WARNING, "Font is not monospace\n"); 211 non_monospace_warning = 1; 212 } 213 214 sy = font_height - 4*video_scale - face->glyph->bitmap_top; 215 for (rx = 0; rx < 10; rx++) { 216 sx = rx * font_repeat + x * font_width + face->glyph->bitmap_left; 217 for (by = 0; by < face->glyph->bitmap.rows; by++) { 218 dy = by + sy; 219 if (dy < 0) 220 continue; 221 if (dy >= font_height) 222 break; 223 224 for (bx = 0; bx < face->glyph->bitmap.width; bx++) { 225 dx = bx + sx; 226 if (dx < 0) 227 continue; 228 if (dx >= video_width) 229 break; 230 s->font_alpha[dy*video_width+dx] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx]; 231 } 232 } 233 } 234 } 235 236 FT_Done_Face(face); 237 FT_Done_FreeType(lib); 238 return; 239 240 fail: 241 av_log(ctx, AV_LOG_WARNING, "Error while loading freetype font, using default font instead\n"); 242 FT_Done_Face(face); 243 FT_Done_FreeType(lib); 244 av_freep(&s->font_alpha); 245 return; 246} 247#endif 248 249static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b) 250{ 251 if (fabsf(a->value) >= fabsf(b->value)) 252 return 1; 253 else 254 return -1; 255} 256 257static int config_output(AVFilterLink *outlink) 258{ 259 AVFilterContext *ctx = outlink->src; 260 AVFilterLink *inlink = ctx->inputs[0]; 261 ShowCQTContext *s = ctx->priv; 262 int fft_len, k, x, y; 263 int num_coeffs = 0; 264 int rate = inlink->sample_rate; 265 double max_len = rate * (double) s->timeclamp; 266 int64_t start_time, end_time; 267 int video_scale = s->fullhd ? 2 : 1; 268 int video_width = (VIDEO_WIDTH/2) * video_scale; 269 int video_height = (VIDEO_HEIGHT/2) * video_scale; 270 int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; 271 272 s->fft_bits = ceil(log2(max_len)); 273 fft_len = 1 << s->fft_bits; 274 275 if (rate % (s->fps * s->count)) { 276 av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count); 277 return AVERROR(EINVAL); 278 } 279 280 s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data)); 281 s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort)); 282 s->fft_result_left = av_malloc_array(fft_len, sizeof(*s->fft_result_left)); 283 s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right)); 284 s->fft_context = av_fft_init(s->fft_bits, 0); 285 286 if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context) 287 return AVERROR(ENOMEM); 288 289 /* initializing font */ 290 for (x = 0; x < video_width; x++) { 291 if (x >= (12*3+8)*8*video_scale && x < (12*4+8)*8*video_scale) { 292 float fx = (x-(12*3+8)*8*video_scale) * (2.0f/(192.0f*video_scale)); 293 float sv = sinf(M_PI*fx); 294 s->font_color[x] = sv*sv*255.0f + 0.5f; 295 } else { 296 s->font_color[x] = 0; 297 } 298 } 299 300#if CONFIG_LIBFREETYPE 301 load_freetype_font(ctx); 302#else 303 if (s->fontfile) 304 av_log(ctx, AV_LOG_WARNING, "Freetype is not available, ignoring fontfile option\n"); 305 s->font_alpha = NULL; 306#endif 307 308 av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n"); 309 start_time = av_gettime_relative(); 310 for (k = 0; k < VIDEO_WIDTH; k++) { 311 int hlen = fft_len >> 1; 312 float total = 0; 313 float partial = 0; 314 double freq = BASE_FREQ * exp2(k * (1.0/192.0)); 315 double tlen = rate * (24.0 * 16.0) /freq; 316 /* a window function from Albert H. Nuttall, 317 * "Some Windows with Very Good Sidelobe Behavior" 318 * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay 319 * coefficient normalized to a0 = 1 */ 320 double a0 = 0.355768; 321 double a1 = 0.487396/a0; 322 double a2 = 0.144232/a0; 323 double a3 = 0.012604/a0; 324 double sv_step, cv_step, sv, cv; 325 double sw_step, cw_step, sw, cw, w; 326 327 tlen = tlen * max_len / (tlen + max_len); 328 s->fft_data[0].re = 0; 329 s->fft_data[0].im = 0; 330 s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * s->volume * (1.0/fft_len); 331 s->fft_data[hlen].im = 0; 332 sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate)); 333 cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate)); 334 /* also optimizing window func */ 335 sw_step = sw = sin(2.0*M_PI*(1.0/tlen)); 336 cw_step = cw = cos(2.0*M_PI*(1.0/tlen)); 337 for (x = 1; x < 0.5 * tlen; x++) { 338 double cv_tmp, cw_tmp; 339 double cw2, cw3, sw2; 340 341 cw2 = cw * cw - sw * sw; 342 sw2 = cw * sw + sw * cw; 343 cw3 = cw * cw2 - sw * sw2; 344 w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * s->volume * (1.0/fft_len); 345 s->fft_data[hlen + x].re = w * cv; 346 s->fft_data[hlen + x].im = w * sv; 347 s->fft_data[hlen - x].re = s->fft_data[hlen + x].re; 348 s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im; 349 350 cv_tmp = cv * cv_step - sv * sv_step; 351 sv = sv * cv_step + cv * sv_step; 352 cv = cv_tmp; 353 cw_tmp = cw * cw_step - sw * sw_step; 354 sw = sw * cw_step + cw * sw_step; 355 cw = cw_tmp; 356 } 357 for (; x < hlen; x++) { 358 s->fft_data[hlen + x].re = 0; 359 s->fft_data[hlen + x].im = 0; 360 s->fft_data[hlen - x].re = 0; 361 s->fft_data[hlen - x].im = 0; 362 } 363 av_fft_permute(s->fft_context, s->fft_data); 364 av_fft_calc(s->fft_context, s->fft_data); 365 366 for (x = 0; x < fft_len; x++) { 367 s->coeff_sort[x].index = x; 368 s->coeff_sort[x].value = s->fft_data[x].re; 369 } 370 371 AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff); 372 for (x = 0; x < fft_len; x++) 373 total += fabsf(s->coeff_sort[x].value); 374 375 for (x = 0; x < fft_len; x++) { 376 partial += fabsf(s->coeff_sort[x].value); 377 if (partial > total * s->coeffclamp * COEFF_CLAMP) { 378 s->coeffs_len[k] = fft_len - x; 379 num_coeffs += s->coeffs_len[k]; 380 s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k])); 381 if (!s->coeffs[k]) 382 return AVERROR(ENOMEM); 383 for (y = 0; y < s->coeffs_len[k]; y++) 384 s->coeffs[k][y] = s->coeff_sort[x+y]; 385 break; 386 } 387 } 388 } 389 end_time = av_gettime_relative(); 390 av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs); 391 392 outlink->w = video_width; 393 outlink->h = video_height; 394 395 s->req_fullfilled = 0; 396 s->spectogram_index = 0; 397 s->frame_count = 0; 398 s->spectogram_count = 0; 399 s->remaining_fill = fft_len >> 1; 400 memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data)); 401 402 s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); 403 if (!s->outpicref) 404 return AVERROR(ENOMEM); 405 406 s->spectogram = av_calloc(spectogram_height, s->outpicref->linesize[0]); 407 if (!s->spectogram) 408 return AVERROR(ENOMEM); 409 410 outlink->sample_aspect_ratio = av_make_q(1, 1); 411 outlink->time_base = av_make_q(1, s->fps); 412 outlink->frame_rate = av_make_q(s->fps, 1); 413 return 0; 414} 415 416static int plot_cqt(AVFilterLink *inlink) 417{ 418 AVFilterContext *ctx = inlink->dst; 419 ShowCQTContext *s = ctx->priv; 420 AVFilterLink *outlink = ctx->outputs[0]; 421 int fft_len = 1 << s->fft_bits; 422 FFTSample result[VIDEO_WIDTH][4]; 423 int x, y, ret = 0; 424 int linesize = s->outpicref->linesize[0]; 425 int video_scale = s->fullhd ? 2 : 1; 426 int video_width = (VIDEO_WIDTH/2) * video_scale; 427 int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; 428 int spectogram_start = (SPECTOGRAM_START/2) * video_scale; 429 int font_height = (FONT_HEIGHT/2) * video_scale; 430 431 /* real part contains left samples, imaginary part contains right samples */ 432 memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data)); 433 av_fft_permute(s->fft_context, s->fft_result_left); 434 av_fft_calc(s->fft_context, s->fft_result_left); 435 436 /* separate left and right, (and multiply by 2.0) */ 437 s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im; 438 s->fft_result_right[0].im = 0; 439 s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re; 440 s->fft_result_left[0].im = 0; 441 for (x = 1; x <= fft_len >> 1; x++) { 442 FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im; 443 444 s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im; 445 s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re; 446 s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re; 447 s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im; 448 449 s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re; 450 s->fft_result_left[x].im = tmpy; 451 s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re; 452 s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im; 453 } 454 455 /* calculating cqt */ 456 for (x = 0; x < VIDEO_WIDTH; x++) { 457 int u; 458 float g = 1.0f / s->gamma; 459 FFTComplex l = {0,0}; 460 FFTComplex r = {0,0}; 461 462 for (u = 0; u < s->coeffs_len[x]; u++) { 463 FFTSample value = s->coeffs[x][u].value; 464 int index = s->coeffs[x][u].index; 465 l.re += value * s->fft_result_left[index].re; 466 l.im += value * s->fft_result_left[index].im; 467 r.re += value * s->fft_result_right[index].re; 468 r.im += value * s->fft_result_right[index].im; 469 } 470 /* result is power, not amplitude */ 471 result[x][0] = l.re * l.re + l.im * l.im; 472 result[x][2] = r.re * r.re + r.im * r.im; 473 result[x][1] = 0.5f * (result[x][0] + result[x][2]); 474 result[x][3] = result[x][1]; 475 result[x][0] = 255.0f * powf(FFMIN(1.0f,result[x][0]), g); 476 result[x][1] = 255.0f * powf(FFMIN(1.0f,result[x][1]), g); 477 result[x][2] = 255.0f * powf(FFMIN(1.0f,result[x][2]), g); 478 } 479 480 if (!s->fullhd) { 481 for (x = 0; x < video_width; x++) { 482 result[x][0] = 0.5f * (result[2*x][0] + result[2*x+1][0]); 483 result[x][1] = 0.5f * (result[2*x][1] + result[2*x+1][1]); 484 result[x][2] = 0.5f * (result[2*x][2] + result[2*x+1][2]); 485 result[x][3] = 0.5f * (result[2*x][3] + result[2*x+1][3]); 486 } 487 } 488 489 for (x = 0; x < video_width; x++) { 490 s->spectogram[s->spectogram_index*linesize + 3*x] = result[x][0] + 0.5f; 491 s->spectogram[s->spectogram_index*linesize + 3*x + 1] = result[x][1] + 0.5f; 492 s->spectogram[s->spectogram_index*linesize + 3*x + 2] = result[x][2] + 0.5f; 493 } 494 495 /* drawing */ 496 if (!s->spectogram_count) { 497 uint8_t *data = (uint8_t*) s->outpicref->data[0]; 498 float rcp_result[VIDEO_WIDTH]; 499 int total_length = linesize * spectogram_height; 500 int back_length = linesize * s->spectogram_index; 501 502 for (x = 0; x < video_width; x++) 503 rcp_result[x] = 1.0f / (result[x][3]+0.0001f); 504 505 /* drawing bar */ 506 for (y = 0; y < spectogram_height; y++) { 507 float height = (spectogram_height - y) * (1.0f/spectogram_height); 508 uint8_t *lineptr = data + y * linesize; 509 for (x = 0; x < video_width; x++) { 510 float mul; 511 if (result[x][3] <= height) { 512 *lineptr++ = 0; 513 *lineptr++ = 0; 514 *lineptr++ = 0; 515 } else { 516 mul = (result[x][3] - height) * rcp_result[x]; 517 *lineptr++ = mul * result[x][0] + 0.5f; 518 *lineptr++ = mul * result[x][1] + 0.5f; 519 *lineptr++ = mul * result[x][2] + 0.5f; 520 } 521 } 522 } 523 524 /* drawing font */ 525 if (s->font_alpha) { 526 for (y = 0; y < font_height; y++) { 527 uint8_t *lineptr = data + (spectogram_height + y) * linesize; 528 uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize; 529 for (x = 0; x < video_width; x++) { 530 uint8_t alpha = s->font_alpha[y*video_width+x]; 531 uint8_t color = s->font_color[x]; 532 lineptr[3*x] = (spectogram_src[3*x] * (255-alpha) + (255-color) * alpha + 255) >> 8; 533 lineptr[3*x+1] = (spectogram_src[3*x+1] * (255-alpha) + 255) >> 8; 534 lineptr[3*x+2] = (spectogram_src[3*x+2] * (255-alpha) + color * alpha + 255) >> 8; 535 } 536 } 537 } else { 538 for (y = 0; y < font_height; y++) { 539 uint8_t *lineptr = data + (spectogram_height + y) * linesize; 540 memcpy(lineptr, s->spectogram + s->spectogram_index * linesize, video_width*3); 541 } 542 for (x = 0; x < video_width; x += video_width/10) { 543 int u; 544 static const char str[] = "EF G A BC D "; 545 uint8_t *startptr = data + spectogram_height * linesize + x * 3; 546 for (u = 0; str[u]; u++) { 547 int v; 548 for (v = 0; v < 16; v++) { 549 uint8_t *p = startptr + v * linesize * video_scale + 8 * 3 * u * video_scale; 550 int ux = x + 8 * u * video_scale; 551 int mask; 552 for (mask = 0x80; mask; mask >>= 1) { 553 if (mask & avpriv_vga16_font[str[u] * 16 + v]) { 554 p[0] = 255 - s->font_color[ux]; 555 p[1] = 0; 556 p[2] = s->font_color[ux]; 557 if (video_scale == 2) { 558 p[linesize] = p[0]; 559 p[linesize+1] = p[1]; 560 p[linesize+2] = p[2]; 561 p[3] = p[linesize+3] = 255 - s->font_color[ux+1]; 562 p[4] = p[linesize+4] = 0; 563 p[5] = p[linesize+5] = s->font_color[ux+1]; 564 } 565 } 566 p += 3 * video_scale; 567 ux += video_scale; 568 } 569 } 570 } 571 } 572 } 573 574 /* drawing spectogram/sonogram */ 575 data += spectogram_start * linesize; 576 memcpy(data, s->spectogram + s->spectogram_index*linesize, total_length - back_length); 577 578 data += total_length - back_length; 579 if (back_length) 580 memcpy(data, s->spectogram, back_length); 581 582 s->outpicref->pts = s->frame_count; 583 ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref)); 584 s->req_fullfilled = 1; 585 s->frame_count++; 586 } 587 s->spectogram_count = (s->spectogram_count + 1) % s->count; 588 s->spectogram_index = (s->spectogram_index + spectogram_height - 1) % spectogram_height; 589 return ret; 590} 591 592static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) 593{ 594 AVFilterContext *ctx = inlink->dst; 595 ShowCQTContext *s = ctx->priv; 596 int step = inlink->sample_rate / (s->fps * s->count); 597 int fft_len = 1 << s->fft_bits; 598 int remaining; 599 float *audio_data; 600 601 if (!insamples) { 602 while (s->remaining_fill < (fft_len >> 1)) { 603 int ret, x; 604 memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); 605 ret = plot_cqt(inlink); 606 if (ret < 0) 607 return ret; 608 for (x = 0; x < (fft_len-step); x++) 609 s->fft_data[x] = s->fft_data[x+step]; 610 s->remaining_fill += step; 611 } 612 return AVERROR(EOF); 613 } 614 615 remaining = insamples->nb_samples; 616 audio_data = (float*) insamples->data[0]; 617 618 while (remaining) { 619 if (remaining >= s->remaining_fill) { 620 int i = insamples->nb_samples - remaining; 621 int j = fft_len - s->remaining_fill; 622 int m, ret; 623 for (m = 0; m < s->remaining_fill; m++) { 624 s->fft_data[j+m].re = audio_data[2*(i+m)]; 625 s->fft_data[j+m].im = audio_data[2*(i+m)+1]; 626 } 627 ret = plot_cqt(inlink); 628 if (ret < 0) { 629 av_frame_free(&insamples); 630 return ret; 631 } 632 remaining -= s->remaining_fill; 633 for (m = 0; m < fft_len-step; m++) 634 s->fft_data[m] = s->fft_data[m+step]; 635 s->remaining_fill = step; 636 } else { 637 int i = insamples->nb_samples - remaining; 638 int j = fft_len - s->remaining_fill; 639 int m; 640 for (m = 0; m < remaining; m++) { 641 s->fft_data[m+j].re = audio_data[2*(i+m)]; 642 s->fft_data[m+j].im = audio_data[2*(i+m)+1]; 643 } 644 s->remaining_fill -= remaining; 645 remaining = 0; 646 } 647 } 648 av_frame_free(&insamples); 649 return 0; 650} 651 652static int request_frame(AVFilterLink *outlink) 653{ 654 ShowCQTContext *s = outlink->src->priv; 655 AVFilterLink *inlink = outlink->src->inputs[0]; 656 int ret; 657 658 s->req_fullfilled = 0; 659 do { 660 ret = ff_request_frame(inlink); 661 } while (!s->req_fullfilled && ret >= 0); 662 663 if (ret == AVERROR_EOF && s->outpicref) 664 filter_frame(inlink, NULL); 665 return ret; 666} 667 668static const AVFilterPad showcqt_inputs[] = { 669 { 670 .name = "default", 671 .type = AVMEDIA_TYPE_AUDIO, 672 .filter_frame = filter_frame, 673 }, 674 { NULL } 675}; 676 677static const AVFilterPad showcqt_outputs[] = { 678 { 679 .name = "default", 680 .type = AVMEDIA_TYPE_VIDEO, 681 .config_props = config_output, 682 .request_frame = request_frame, 683 }, 684 { NULL } 685}; 686 687AVFilter ff_avf_showcqt = { 688 .name = "showcqt", 689 .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."), 690 .uninit = uninit, 691 .query_formats = query_formats, 692 .priv_size = sizeof(ShowCQTContext), 693 .inputs = showcqt_inputs, 694 .outputs = showcqt_outputs, 695 .priv_class = &showcqt_class, 696}; 697