1/* 2 * Simple free lossless/lossy audio codec 3 * Copyright (c) 2004 Alex Beregszaszi 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21#include "avcodec.h" 22#include "get_bits.h" 23#include "golomb.h" 24#include "internal.h" 25#include "rangecoder.h" 26 27 28/** 29 * @file 30 * Simple free lossless/lossy audio codec 31 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) 32 * Written and designed by Alex Beregszaszi 33 * 34 * TODO: 35 * - CABAC put/get_symbol 36 * - independent quantizer for channels 37 * - >2 channels support 38 * - more decorrelation types 39 * - more tap_quant tests 40 * - selectable intlist writers/readers (bonk-style, golomb, cabac) 41 */ 42 43#define MAX_CHANNELS 2 44 45#define MID_SIDE 0 46#define LEFT_SIDE 1 47#define RIGHT_SIDE 2 48 49typedef struct SonicContext { 50 int version; 51 int minor_version; 52 int lossless, decorrelation; 53 54 int num_taps, downsampling; 55 double quantization; 56 57 int channels, samplerate, block_align, frame_size; 58 59 int *tap_quant; 60 int *int_samples; 61 int *coded_samples[MAX_CHANNELS]; 62 63 // for encoding 64 int *tail; 65 int tail_size; 66 int *window; 67 int window_size; 68 69 // for decoding 70 int *predictor_k; 71 int *predictor_state[MAX_CHANNELS]; 72} SonicContext; 73 74#define LATTICE_SHIFT 10 75#define SAMPLE_SHIFT 4 76#define LATTICE_FACTOR (1 << LATTICE_SHIFT) 77#define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) 78 79#define BASE_QUANT 0.6 80#define RATE_VARIATION 3.0 81 82static inline int shift(int a,int b) 83{ 84 return (a+(1<<(b-1))) >> b; 85} 86 87static inline int shift_down(int a,int b) 88{ 89 return (a>>b)+(a<0); 90} 91 92static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){ 93 int i; 94 95#define put_rac(C,S,B) \ 96do{\ 97 if(rc_stat){\ 98 rc_stat[*(S)][B]++;\ 99 rc_stat2[(S)-state][B]++;\ 100 }\ 101 put_rac(C,S,B);\ 102}while(0) 103 104 if(v){ 105 const int a= FFABS(v); 106 const int e= av_log2(a); 107 put_rac(c, state+0, 0); 108 if(e<=9){ 109 for(i=0; i<e; i++){ 110 put_rac(c, state+1+i, 1); //1..10 111 } 112 put_rac(c, state+1+i, 0); 113 114 for(i=e-1; i>=0; i--){ 115 put_rac(c, state+22+i, (a>>i)&1); //22..31 116 } 117 118 if(is_signed) 119 put_rac(c, state+11 + e, v < 0); //11..21 120 }else{ 121 for(i=0; i<e; i++){ 122 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 123 } 124 put_rac(c, state+1+9, 0); 125 126 for(i=e-1; i>=0; i--){ 127 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 128 } 129 130 if(is_signed) 131 put_rac(c, state+11 + 10, v < 0); //11..21 132 } 133 }else{ 134 put_rac(c, state+0, 1); 135 } 136#undef put_rac 137} 138 139static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ 140 if(get_rac(c, state+0)) 141 return 0; 142 else{ 143 int i, e, a; 144 e= 0; 145 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 146 e++; 147 } 148 149 a= 1; 150 for(i=e-1; i>=0; i--){ 151 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 152 } 153 154 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21 155 return (a^e)-e; 156 } 157} 158 159#if 1 160static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) 161{ 162 int i; 163 164 for (i = 0; i < entries; i++) 165 put_symbol(c, state, buf[i], 1, NULL, NULL); 166 167 return 1; 168} 169 170static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) 171{ 172 int i; 173 174 for (i = 0; i < entries; i++) 175 buf[i] = get_symbol(c, state, 1); 176 177 return 1; 178} 179#elif 1 180static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) 181{ 182 int i; 183 184 for (i = 0; i < entries; i++) 185 set_se_golomb(pb, buf[i]); 186 187 return 1; 188} 189 190static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) 191{ 192 int i; 193 194 for (i = 0; i < entries; i++) 195 buf[i] = get_se_golomb(gb); 196 197 return 1; 198} 199 200#else 201 202#define ADAPT_LEVEL 8 203 204static int bits_to_store(uint64_t x) 205{ 206 int res = 0; 207 208 while(x) 209 { 210 res++; 211 x >>= 1; 212 } 213 return res; 214} 215 216static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) 217{ 218 int i, bits; 219 220 if (!max) 221 return; 222 223 bits = bits_to_store(max); 224 225 for (i = 0; i < bits-1; i++) 226 put_bits(pb, 1, value & (1 << i)); 227 228 if ( (value | (1 << (bits-1))) <= max) 229 put_bits(pb, 1, value & (1 << (bits-1))); 230} 231 232static unsigned int read_uint_max(GetBitContext *gb, int max) 233{ 234 int i, bits, value = 0; 235 236 if (!max) 237 return 0; 238 239 bits = bits_to_store(max); 240 241 for (i = 0; i < bits-1; i++) 242 if (get_bits1(gb)) 243 value += 1 << i; 244 245 if ( (value | (1<<(bits-1))) <= max) 246 if (get_bits1(gb)) 247 value += 1 << (bits-1); 248 249 return value; 250} 251 252static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) 253{ 254 int i, j, x = 0, low_bits = 0, max = 0; 255 int step = 256, pos = 0, dominant = 0, any = 0; 256 int *copy, *bits; 257 258 copy = av_calloc(entries, sizeof(*copy)); 259 if (!copy) 260 return AVERROR(ENOMEM); 261 262 if (base_2_part) 263 { 264 int energy = 0; 265 266 for (i = 0; i < entries; i++) 267 energy += abs(buf[i]); 268 269 low_bits = bits_to_store(energy / (entries * 2)); 270 if (low_bits > 15) 271 low_bits = 15; 272 273 put_bits(pb, 4, low_bits); 274 } 275 276 for (i = 0; i < entries; i++) 277 { 278 put_bits(pb, low_bits, abs(buf[i])); 279 copy[i] = abs(buf[i]) >> low_bits; 280 if (copy[i] > max) 281 max = abs(copy[i]); 282 } 283 284 bits = av_calloc(entries*max, sizeof(*bits)); 285 if (!bits) 286 { 287 av_free(copy); 288 return AVERROR(ENOMEM); 289 } 290 291 for (i = 0; i <= max; i++) 292 { 293 for (j = 0; j < entries; j++) 294 if (copy[j] >= i) 295 bits[x++] = copy[j] > i; 296 } 297 298 // store bitstream 299 while (pos < x) 300 { 301 int steplet = step >> 8; 302 303 if (pos + steplet > x) 304 steplet = x - pos; 305 306 for (i = 0; i < steplet; i++) 307 if (bits[i+pos] != dominant) 308 any = 1; 309 310 put_bits(pb, 1, any); 311 312 if (!any) 313 { 314 pos += steplet; 315 step += step / ADAPT_LEVEL; 316 } 317 else 318 { 319 int interloper = 0; 320 321 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) 322 interloper++; 323 324 // note change 325 write_uint_max(pb, interloper, (step >> 8) - 1); 326 327 pos += interloper + 1; 328 step -= step / ADAPT_LEVEL; 329 } 330 331 if (step < 256) 332 { 333 step = 65536 / step; 334 dominant = !dominant; 335 } 336 } 337 338 // store signs 339 for (i = 0; i < entries; i++) 340 if (buf[i]) 341 put_bits(pb, 1, buf[i] < 0); 342 343 av_free(bits); 344 av_free(copy); 345 346 return 0; 347} 348 349static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) 350{ 351 int i, low_bits = 0, x = 0; 352 int n_zeros = 0, step = 256, dominant = 0; 353 int pos = 0, level = 0; 354 int *bits = av_calloc(entries, sizeof(*bits)); 355 356 if (!bits) 357 return AVERROR(ENOMEM); 358 359 if (base_2_part) 360 { 361 low_bits = get_bits(gb, 4); 362 363 if (low_bits) 364 for (i = 0; i < entries; i++) 365 buf[i] = get_bits(gb, low_bits); 366 } 367 368// av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); 369 370 while (n_zeros < entries) 371 { 372 int steplet = step >> 8; 373 374 if (!get_bits1(gb)) 375 { 376 for (i = 0; i < steplet; i++) 377 bits[x++] = dominant; 378 379 if (!dominant) 380 n_zeros += steplet; 381 382 step += step / ADAPT_LEVEL; 383 } 384 else 385 { 386 int actual_run = read_uint_max(gb, steplet-1); 387 388// av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); 389 390 for (i = 0; i < actual_run; i++) 391 bits[x++] = dominant; 392 393 bits[x++] = !dominant; 394 395 if (!dominant) 396 n_zeros += actual_run; 397 else 398 n_zeros++; 399 400 step -= step / ADAPT_LEVEL; 401 } 402 403 if (step < 256) 404 { 405 step = 65536 / step; 406 dominant = !dominant; 407 } 408 } 409 410 // reconstruct unsigned values 411 n_zeros = 0; 412 for (i = 0; n_zeros < entries; i++) 413 { 414 while(1) 415 { 416 if (pos >= entries) 417 { 418 pos = 0; 419 level += 1 << low_bits; 420 } 421 422 if (buf[pos] >= level) 423 break; 424 425 pos++; 426 } 427 428 if (bits[i]) 429 buf[pos] += 1 << low_bits; 430 else 431 n_zeros++; 432 433 pos++; 434 } 435 av_free(bits); 436 437 // read signs 438 for (i = 0; i < entries; i++) 439 if (buf[i] && get_bits1(gb)) 440 buf[i] = -buf[i]; 441 442// av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); 443 444 return 0; 445} 446#endif 447 448static void predictor_init_state(int *k, int *state, int order) 449{ 450 int i; 451 452 for (i = order-2; i >= 0; i--) 453 { 454 int j, p, x = state[i]; 455 456 for (j = 0, p = i+1; p < order; j++,p++) 457 { 458 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); 459 state[p] += shift_down(k[j]*x, LATTICE_SHIFT); 460 x = tmp; 461 } 462 } 463} 464 465static int predictor_calc_error(int *k, int *state, int order, int error) 466{ 467 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); 468 469#if 1 470 int *k_ptr = &(k[order-2]), 471 *state_ptr = &(state[order-2]); 472 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) 473 { 474 int k_value = *k_ptr, state_value = *state_ptr; 475 x -= shift_down(k_value * state_value, LATTICE_SHIFT); 476 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); 477 } 478#else 479 for (i = order-2; i >= 0; i--) 480 { 481 x -= shift_down(k[i] * state[i], LATTICE_SHIFT); 482 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); 483 } 484#endif 485 486 // don't drift too far, to avoid overflows 487 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); 488 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); 489 490 state[0] = x; 491 492 return x; 493} 494 495#if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER 496// Heavily modified Levinson-Durbin algorithm which 497// copes better with quantization, and calculates the 498// actual whitened result as it goes. 499 500static void modified_levinson_durbin(int *window, int window_entries, 501 int *out, int out_entries, int channels, int *tap_quant) 502{ 503 int i; 504 int *state = av_calloc(window_entries, sizeof(*state)); 505 506 memcpy(state, window, 4* window_entries); 507 508 for (i = 0; i < out_entries; i++) 509 { 510 int step = (i+1)*channels, k, j; 511 double xx = 0.0, xy = 0.0; 512#if 1 513 int *x_ptr = &(window[step]); 514 int *state_ptr = &(state[0]); 515 j = window_entries - step; 516 for (;j>0;j--,x_ptr++,state_ptr++) 517 { 518 double x_value = *x_ptr; 519 double state_value = *state_ptr; 520 xx += state_value*state_value; 521 xy += x_value*state_value; 522 } 523#else 524 for (j = 0; j <= (window_entries - step); j++); 525 { 526 double stepval = window[step+j]; 527 double stateval = window[j]; 528// xx += (double)window[j]*(double)window[j]; 529// xy += (double)window[step+j]*(double)window[j]; 530 xx += stateval*stateval; 531 xy += stepval*stateval; 532 } 533#endif 534 if (xx == 0.0) 535 k = 0; 536 else 537 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); 538 539 if (k > (LATTICE_FACTOR/tap_quant[i])) 540 k = LATTICE_FACTOR/tap_quant[i]; 541 if (-k > (LATTICE_FACTOR/tap_quant[i])) 542 k = -(LATTICE_FACTOR/tap_quant[i]); 543 544 out[i] = k; 545 k *= tap_quant[i]; 546 547#if 1 548 x_ptr = &(window[step]); 549 state_ptr = &(state[0]); 550 j = window_entries - step; 551 for (;j>0;j--,x_ptr++,state_ptr++) 552 { 553 int x_value = *x_ptr; 554 int state_value = *state_ptr; 555 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); 556 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); 557 } 558#else 559 for (j=0; j <= (window_entries - step); j++) 560 { 561 int stepval = window[step+j]; 562 int stateval=state[j]; 563 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); 564 state[j] += shift_down(k * stepval, LATTICE_SHIFT); 565 } 566#endif 567 } 568 569 av_free(state); 570} 571 572static inline int code_samplerate(int samplerate) 573{ 574 switch (samplerate) 575 { 576 case 44100: return 0; 577 case 22050: return 1; 578 case 11025: return 2; 579 case 96000: return 3; 580 case 48000: return 4; 581 case 32000: return 5; 582 case 24000: return 6; 583 case 16000: return 7; 584 case 8000: return 8; 585 } 586 return AVERROR(EINVAL); 587} 588 589static av_cold int sonic_encode_init(AVCodecContext *avctx) 590{ 591 SonicContext *s = avctx->priv_data; 592 PutBitContext pb; 593 int i; 594 595 s->version = 2; 596 597 if (avctx->channels > MAX_CHANNELS) 598 { 599 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); 600 return AVERROR(EINVAL); /* only stereo or mono for now */ 601 } 602 603 if (avctx->channels == 2) 604 s->decorrelation = MID_SIDE; 605 else 606 s->decorrelation = 3; 607 608 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS) 609 { 610 s->lossless = 1; 611 s->num_taps = 32; 612 s->downsampling = 1; 613 s->quantization = 0.0; 614 } 615 else 616 { 617 s->num_taps = 128; 618 s->downsampling = 2; 619 s->quantization = 1.0; 620 } 621 622 // max tap 2048 623 if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) { 624 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); 625 return AVERROR_INVALIDDATA; 626 } 627 628 // generate taps 629 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); 630 for (i = 0; i < s->num_taps; i++) 631 s->tap_quant[i] = ff_sqrt(i+1); 632 633 s->channels = avctx->channels; 634 s->samplerate = avctx->sample_rate; 635 636 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); 637 s->frame_size = s->channels*s->block_align*s->downsampling; 638 639 s->tail_size = s->num_taps*s->channels; 640 s->tail = av_calloc(s->tail_size, sizeof(*s->tail)); 641 if (!s->tail) 642 return AVERROR(ENOMEM); 643 644 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) ); 645 if (!s->predictor_k) 646 return AVERROR(ENOMEM); 647 648 for (i = 0; i < s->channels; i++) 649 { 650 s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); 651 if (!s->coded_samples[i]) 652 return AVERROR(ENOMEM); 653 } 654 655 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); 656 657 s->window_size = ((2*s->tail_size)+s->frame_size); 658 s->window = av_calloc(s->window_size, sizeof(*s->window)); 659 if (!s->window) 660 return AVERROR(ENOMEM); 661 662 avctx->extradata = av_mallocz(16); 663 if (!avctx->extradata) 664 return AVERROR(ENOMEM); 665 init_put_bits(&pb, avctx->extradata, 16*8); 666 667 put_bits(&pb, 2, s->version); // version 668 if (s->version >= 1) 669 { 670 if (s->version >= 2) { 671 put_bits(&pb, 8, s->version); 672 put_bits(&pb, 8, s->minor_version); 673 } 674 put_bits(&pb, 2, s->channels); 675 put_bits(&pb, 4, code_samplerate(s->samplerate)); 676 } 677 put_bits(&pb, 1, s->lossless); 678 if (!s->lossless) 679 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision 680 put_bits(&pb, 2, s->decorrelation); 681 put_bits(&pb, 2, s->downsampling); 682 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 683 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table 684 685 flush_put_bits(&pb); 686 avctx->extradata_size = put_bits_count(&pb)/8; 687 688 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", 689 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); 690 691 avctx->frame_size = s->block_align*s->downsampling; 692 693 return 0; 694} 695 696static av_cold int sonic_encode_close(AVCodecContext *avctx) 697{ 698 SonicContext *s = avctx->priv_data; 699 int i; 700 701 for (i = 0; i < s->channels; i++) 702 av_freep(&s->coded_samples[i]); 703 704 av_freep(&s->predictor_k); 705 av_freep(&s->tail); 706 av_freep(&s->tap_quant); 707 av_freep(&s->window); 708 av_freep(&s->int_samples); 709 710 return 0; 711} 712 713static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 714 const AVFrame *frame, int *got_packet_ptr) 715{ 716 SonicContext *s = avctx->priv_data; 717 RangeCoder c; 718 int i, j, ch, quant = 0, x = 0; 719 int ret; 720 const short *samples = (const int16_t*)frame->data[0]; 721 uint8_t state[32]; 722 723 if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0) 724 return ret; 725 726 ff_init_range_encoder(&c, avpkt->data, avpkt->size); 727 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); 728 memset(state, 128, sizeof(state)); 729 730 // short -> internal 731 for (i = 0; i < s->frame_size; i++) 732 s->int_samples[i] = samples[i]; 733 734 if (!s->lossless) 735 for (i = 0; i < s->frame_size; i++) 736 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; 737 738 switch(s->decorrelation) 739 { 740 case MID_SIDE: 741 for (i = 0; i < s->frame_size; i += s->channels) 742 { 743 s->int_samples[i] += s->int_samples[i+1]; 744 s->int_samples[i+1] -= shift(s->int_samples[i], 1); 745 } 746 break; 747 case LEFT_SIDE: 748 for (i = 0; i < s->frame_size; i += s->channels) 749 s->int_samples[i+1] -= s->int_samples[i]; 750 break; 751 case RIGHT_SIDE: 752 for (i = 0; i < s->frame_size; i += s->channels) 753 s->int_samples[i] -= s->int_samples[i+1]; 754 break; 755 } 756 757 memset(s->window, 0, 4* s->window_size); 758 759 for (i = 0; i < s->tail_size; i++) 760 s->window[x++] = s->tail[i]; 761 762 for (i = 0; i < s->frame_size; i++) 763 s->window[x++] = s->int_samples[i]; 764 765 for (i = 0; i < s->tail_size; i++) 766 s->window[x++] = 0; 767 768 for (i = 0; i < s->tail_size; i++) 769 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; 770 771 // generate taps 772 modified_levinson_durbin(s->window, s->window_size, 773 s->predictor_k, s->num_taps, s->channels, s->tap_quant); 774 if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0) 775 return ret; 776 777 for (ch = 0; ch < s->channels; ch++) 778 { 779 x = s->tail_size+ch; 780 for (i = 0; i < s->block_align; i++) 781 { 782 int sum = 0; 783 for (j = 0; j < s->downsampling; j++, x += s->channels) 784 sum += s->window[x]; 785 s->coded_samples[ch][i] = sum; 786 } 787 } 788 789 // simple rate control code 790 if (!s->lossless) 791 { 792 double energy1 = 0.0, energy2 = 0.0; 793 for (ch = 0; ch < s->channels; ch++) 794 { 795 for (i = 0; i < s->block_align; i++) 796 { 797 double sample = s->coded_samples[ch][i]; 798 energy2 += sample*sample; 799 energy1 += fabs(sample); 800 } 801 } 802 803 energy2 = sqrt(energy2/(s->channels*s->block_align)); 804 energy1 = M_SQRT2*energy1/(s->channels*s->block_align); 805 806 // increase bitrate when samples are like a gaussian distribution 807 // reduce bitrate when samples are like a two-tailed exponential distribution 808 809 if (energy2 > energy1) 810 energy2 += (energy2-energy1)*RATE_VARIATION; 811 812 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); 813// av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); 814 815 quant = av_clip(quant, 1, 65534); 816 817 put_symbol(&c, state, quant, 0, NULL, NULL); 818 819 quant *= SAMPLE_FACTOR; 820 } 821 822 // write out coded samples 823 for (ch = 0; ch < s->channels; ch++) 824 { 825 if (!s->lossless) 826 for (i = 0; i < s->block_align; i++) 827 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant); 828 829 if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0) 830 return ret; 831 } 832 833// av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); 834 835 avpkt->size = ff_rac_terminate(&c); 836 *got_packet_ptr = 1; 837 return 0; 838 839} 840#endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ 841 842#if CONFIG_SONIC_DECODER 843static const int samplerate_table[] = 844 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; 845 846static av_cold int sonic_decode_init(AVCodecContext *avctx) 847{ 848 SonicContext *s = avctx->priv_data; 849 GetBitContext gb; 850 int i; 851 852 s->channels = avctx->channels; 853 s->samplerate = avctx->sample_rate; 854 855 if (!avctx->extradata) 856 { 857 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); 858 return AVERROR_INVALIDDATA; 859 } 860 861 init_get_bits8(&gb, avctx->extradata, avctx->extradata_size); 862 863 s->version = get_bits(&gb, 2); 864 if (s->version >= 2) { 865 s->version = get_bits(&gb, 8); 866 s->minor_version = get_bits(&gb, 8); 867 } 868 if (s->version != 2) 869 { 870 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); 871 return AVERROR_INVALIDDATA; 872 } 873 874 if (s->version >= 1) 875 { 876 s->channels = get_bits(&gb, 2); 877 s->samplerate = samplerate_table[get_bits(&gb, 4)]; 878 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", 879 s->channels, s->samplerate); 880 } 881 882 if (s->channels > MAX_CHANNELS) 883 { 884 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); 885 return AVERROR_INVALIDDATA; 886 } 887 888 s->lossless = get_bits1(&gb); 889 if (!s->lossless) 890 skip_bits(&gb, 3); // XXX FIXME 891 s->decorrelation = get_bits(&gb, 2); 892 if (s->decorrelation != 3 && s->channels != 2) { 893 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation); 894 return AVERROR_INVALIDDATA; 895 } 896 897 s->downsampling = get_bits(&gb, 2); 898 if (!s->downsampling) { 899 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n"); 900 return AVERROR_INVALIDDATA; 901 } 902 903 s->num_taps = (get_bits(&gb, 5)+1)<<5; 904 if (get_bits1(&gb)) // XXX FIXME 905 av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); 906 907 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); 908 s->frame_size = s->channels*s->block_align*s->downsampling; 909// avctx->frame_size = s->block_align; 910 911 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", 912 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); 913 914 // generate taps 915 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); 916 for (i = 0; i < s->num_taps; i++) 917 s->tap_quant[i] = ff_sqrt(i+1); 918 919 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k)); 920 921 for (i = 0; i < s->channels; i++) 922 { 923 s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state)); 924 if (!s->predictor_state[i]) 925 return AVERROR(ENOMEM); 926 } 927 928 for (i = 0; i < s->channels; i++) 929 { 930 s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); 931 if (!s->coded_samples[i]) 932 return AVERROR(ENOMEM); 933 } 934 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); 935 936 avctx->sample_fmt = AV_SAMPLE_FMT_S16; 937 return 0; 938} 939 940static av_cold int sonic_decode_close(AVCodecContext *avctx) 941{ 942 SonicContext *s = avctx->priv_data; 943 int i; 944 945 av_freep(&s->int_samples); 946 av_freep(&s->tap_quant); 947 av_freep(&s->predictor_k); 948 949 for (i = 0; i < s->channels; i++) 950 { 951 av_freep(&s->predictor_state[i]); 952 av_freep(&s->coded_samples[i]); 953 } 954 955 return 0; 956} 957 958static int sonic_decode_frame(AVCodecContext *avctx, 959 void *data, int *got_frame_ptr, 960 AVPacket *avpkt) 961{ 962 const uint8_t *buf = avpkt->data; 963 int buf_size = avpkt->size; 964 SonicContext *s = avctx->priv_data; 965 RangeCoder c; 966 uint8_t state[32]; 967 int i, quant, ch, j, ret; 968 int16_t *samples; 969 AVFrame *frame = data; 970 971 if (buf_size == 0) return 0; 972 973 frame->nb_samples = s->frame_size / avctx->channels; 974 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) 975 return ret; 976 samples = (int16_t *)frame->data[0]; 977 978// av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); 979 980 memset(state, 128, sizeof(state)); 981 ff_init_range_decoder(&c, buf, buf_size); 982 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); 983 984 intlist_read(&c, state, s->predictor_k, s->num_taps, 0); 985 986 // dequantize 987 for (i = 0; i < s->num_taps; i++) 988 s->predictor_k[i] *= s->tap_quant[i]; 989 990 if (s->lossless) 991 quant = 1; 992 else 993 quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR; 994 995// av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); 996 997 for (ch = 0; ch < s->channels; ch++) 998 { 999 int x = ch; 1000 1001 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); 1002 1003 intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1); 1004 1005 for (i = 0; i < s->block_align; i++) 1006 { 1007 for (j = 0; j < s->downsampling - 1; j++) 1008 { 1009 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); 1010 x += s->channels; 1011 } 1012 1013 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); 1014 x += s->channels; 1015 } 1016 1017 for (i = 0; i < s->num_taps; i++) 1018 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; 1019 } 1020 1021 switch(s->decorrelation) 1022 { 1023 case MID_SIDE: 1024 for (i = 0; i < s->frame_size; i += s->channels) 1025 { 1026 s->int_samples[i+1] += shift(s->int_samples[i], 1); 1027 s->int_samples[i] -= s->int_samples[i+1]; 1028 } 1029 break; 1030 case LEFT_SIDE: 1031 for (i = 0; i < s->frame_size; i += s->channels) 1032 s->int_samples[i+1] += s->int_samples[i]; 1033 break; 1034 case RIGHT_SIDE: 1035 for (i = 0; i < s->frame_size; i += s->channels) 1036 s->int_samples[i] += s->int_samples[i+1]; 1037 break; 1038 } 1039 1040 if (!s->lossless) 1041 for (i = 0; i < s->frame_size; i++) 1042 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); 1043 1044 // internal -> short 1045 for (i = 0; i < s->frame_size; i++) 1046 samples[i] = av_clip_int16(s->int_samples[i]); 1047 1048 *got_frame_ptr = 1; 1049 1050 return buf_size; 1051} 1052 1053AVCodec ff_sonic_decoder = { 1054 .name = "sonic", 1055 .long_name = NULL_IF_CONFIG_SMALL("Sonic"), 1056 .type = AVMEDIA_TYPE_AUDIO, 1057 .id = AV_CODEC_ID_SONIC, 1058 .priv_data_size = sizeof(SonicContext), 1059 .init = sonic_decode_init, 1060 .close = sonic_decode_close, 1061 .decode = sonic_decode_frame, 1062 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL, 1063}; 1064#endif /* CONFIG_SONIC_DECODER */ 1065 1066#if CONFIG_SONIC_ENCODER 1067AVCodec ff_sonic_encoder = { 1068 .name = "sonic", 1069 .long_name = NULL_IF_CONFIG_SMALL("Sonic"), 1070 .type = AVMEDIA_TYPE_AUDIO, 1071 .id = AV_CODEC_ID_SONIC, 1072 .priv_data_size = sizeof(SonicContext), 1073 .init = sonic_encode_init, 1074 .encode2 = sonic_encode_frame, 1075 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, 1076 .capabilities = CODEC_CAP_EXPERIMENTAL, 1077 .close = sonic_encode_close, 1078}; 1079#endif 1080 1081#if CONFIG_SONIC_LS_ENCODER 1082AVCodec ff_sonic_ls_encoder = { 1083 .name = "sonicls", 1084 .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), 1085 .type = AVMEDIA_TYPE_AUDIO, 1086 .id = AV_CODEC_ID_SONIC_LS, 1087 .priv_data_size = sizeof(SonicContext), 1088 .init = sonic_encode_init, 1089 .encode2 = sonic_encode_frame, 1090 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, 1091 .capabilities = CODEC_CAP_EXPERIMENTAL, 1092 .close = sonic_encode_close, 1093}; 1094#endif 1095