1/* 2 * AAC decoder 3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) 4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * AAC decoder 26 * @author Oded Shimon ( ods15 ods15 dyndns org ) 27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) 28 */ 29 30/* 31 * supported tools 32 * 33 * Support? Name 34 * N (code in SoC repo) gain control 35 * Y block switching 36 * Y window shapes - standard 37 * N window shapes - Low Delay 38 * Y filterbank - standard 39 * N (code in SoC repo) filterbank - Scalable Sample Rate 40 * Y Temporal Noise Shaping 41 * N (code in SoC repo) Long Term Prediction 42 * Y intensity stereo 43 * Y channel coupling 44 * Y frequency domain prediction 45 * Y Perceptual Noise Substitution 46 * Y Mid/Side stereo 47 * N Scalable Inverse AAC Quantization 48 * N Frequency Selective Switch 49 * N upsampling filter 50 * Y quantization & coding - AAC 51 * N quantization & coding - TwinVQ 52 * N quantization & coding - BSAC 53 * N AAC Error Resilience tools 54 * N Error Resilience payload syntax 55 * N Error Protection tool 56 * N CELP 57 * N Silence Compression 58 * N HVXC 59 * N HVXC 4kbits/s VR 60 * N Structured Audio tools 61 * N Structured Audio Sample Bank Format 62 * N MIDI 63 * N Harmonic and Individual Lines plus Noise 64 * N Text-To-Speech Interface 65 * Y Spectral Band Replication 66 * Y (not in this code) Layer-1 67 * Y (not in this code) Layer-2 68 * Y (not in this code) Layer-3 69 * N SinuSoidal Coding (Transient, Sinusoid, Noise) 70 * N (planned) Parametric Stereo 71 * N Direct Stream Transfer 72 * 73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication. 74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and 75 Parametric Stereo. 76 */ 77 78 79#include "avcodec.h" 80#include "internal.h" 81#include "get_bits.h" 82#include "dsputil.h" 83#include "fft.h" 84#include "lpc.h" 85 86#include "aac.h" 87#include "aactab.h" 88#include "aacdectab.h" 89#include "cbrt_tablegen.h" 90#include "sbr.h" 91#include "aacsbr.h" 92#include "mpeg4audio.h" 93#include "aac_parser.h" 94 95#include <assert.h> 96#include <errno.h> 97#include <math.h> 98#include <string.h> 99 100#if ARCH_ARM 101# include "arm/aac.h" 102#endif 103 104union float754 { 105 float f; 106 uint32_t i; 107}; 108 109static VLC vlc_scalefactors; 110static VLC vlc_spectral[11]; 111 112static const char overread_err[] = "Input buffer exhausted before END element found\n"; 113 114static ChannelElement *get_che(AACContext *ac, int type, int elem_id) 115{ 116 if (ac->tag_che_map[type][elem_id]) { 117 return ac->tag_che_map[type][elem_id]; 118 } 119 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) { 120 return NULL; 121 } 122 switch (ac->m4ac.chan_config) { 123 case 7: 124 if (ac->tags_mapped == 3 && type == TYPE_CPE) { 125 ac->tags_mapped++; 126 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2]; 127 } 128 case 6: 129 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1] 130 instead of SCE[0] CPE[0] CPE[0] LFE[0]. If we seem to have 131 encountered such a stream, transfer the LFE[0] element to SCE[1] */ 132 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { 133 ac->tags_mapped++; 134 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0]; 135 } 136 case 5: 137 if (ac->tags_mapped == 2 && type == TYPE_CPE) { 138 ac->tags_mapped++; 139 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1]; 140 } 141 case 4: 142 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) { 143 ac->tags_mapped++; 144 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; 145 } 146 case 3: 147 case 2: 148 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) { 149 ac->tags_mapped++; 150 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0]; 151 } else if (ac->m4ac.chan_config == 2) { 152 return NULL; 153 } 154 case 1: 155 if (!ac->tags_mapped && type == TYPE_SCE) { 156 ac->tags_mapped++; 157 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0]; 158 } 159 default: 160 return NULL; 161 } 162} 163 164/** 165 * Check for the channel element in the current channel position configuration. 166 * If it exists, make sure the appropriate element is allocated and map the 167 * channel order to match the internal FFmpeg channel layout. 168 * 169 * @param che_pos current channel position configuration 170 * @param type channel element type 171 * @param id channel element id 172 * @param channels count of the number of channels in the configuration 173 * 174 * @return Returns error status. 0 - OK, !0 - error 175 */ 176static av_cold int che_configure(AACContext *ac, 177 enum ChannelPosition che_pos[4][MAX_ELEM_ID], 178 int type, int id, 179 int *channels) 180{ 181 if (che_pos[type][id]) { 182 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement)))) 183 return AVERROR(ENOMEM); 184 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr); 185 if (type != TYPE_CCE) { 186 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret; 187 if (type == TYPE_CPE) { 188 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret; 189 } 190 } 191 } else { 192 if (ac->che[type][id]) 193 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr); 194 av_freep(&ac->che[type][id]); 195 } 196 return 0; 197} 198 199/** 200 * Configure output channel order based on the current program configuration element. 201 * 202 * @param che_pos current channel position configuration 203 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one. 204 * 205 * @return Returns error status. 0 - OK, !0 - error 206 */ 207static av_cold int output_configure(AACContext *ac, 208 enum ChannelPosition che_pos[4][MAX_ELEM_ID], 209 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID], 210 int channel_config, enum OCStatus oc_type) 211{ 212 AVCodecContext *avctx = ac->avccontext; 213 int i, type, channels = 0, ret; 214 215 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])); 216 217 if (channel_config) { 218 for (i = 0; i < tags_per_config[channel_config]; i++) { 219 if ((ret = che_configure(ac, che_pos, 220 aac_channel_layout_map[channel_config - 1][i][0], 221 aac_channel_layout_map[channel_config - 1][i][1], 222 &channels))) 223 return ret; 224 } 225 226 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0])); 227 ac->tags_mapped = 0; 228 229 avctx->channel_layout = aac_channel_layout[channel_config - 1]; 230 } else { 231 /* Allocate or free elements depending on if they are in the 232 * current program configuration. 233 * 234 * Set up default 1:1 output mapping. 235 * 236 * For a 5.1 stream the output order will be: 237 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ] 238 */ 239 240 for (i = 0; i < MAX_ELEM_ID; i++) { 241 for (type = 0; type < 4; type++) { 242 if ((ret = che_configure(ac, che_pos, type, i, &channels))) 243 return ret; 244 } 245 } 246 247 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0])); 248 ac->tags_mapped = 4 * MAX_ELEM_ID; 249 250 avctx->channel_layout = 0; 251 } 252 253 avctx->channels = channels; 254 255 ac->output_configured = oc_type; 256 257 return 0; 258} 259 260/** 261 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit. 262 * 263 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present. 264 * @param sce_map mono (Single Channel Element) map 265 * @param type speaker type/position for these channels 266 */ 267static void decode_channel_map(enum ChannelPosition *cpe_map, 268 enum ChannelPosition *sce_map, 269 enum ChannelPosition type, 270 GetBitContext *gb, int n) 271{ 272 while (n--) { 273 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map 274 map[get_bits(gb, 4)] = type; 275 } 276} 277 278/** 279 * Decode program configuration element; reference: table 4.2. 280 * 281 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one. 282 * 283 * @return Returns error status. 0 - OK, !0 - error 284 */ 285static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID], 286 GetBitContext *gb) 287{ 288 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index; 289 int comment_len; 290 291 skip_bits(gb, 2); // object_type 292 293 sampling_index = get_bits(gb, 4); 294 if (ac->m4ac.sampling_index != sampling_index) 295 av_log(ac->avccontext, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n"); 296 297 num_front = get_bits(gb, 4); 298 num_side = get_bits(gb, 4); 299 num_back = get_bits(gb, 4); 300 num_lfe = get_bits(gb, 2); 301 num_assoc_data = get_bits(gb, 3); 302 num_cc = get_bits(gb, 4); 303 304 if (get_bits1(gb)) 305 skip_bits(gb, 4); // mono_mixdown_tag 306 if (get_bits1(gb)) 307 skip_bits(gb, 4); // stereo_mixdown_tag 308 309 if (get_bits1(gb)) 310 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround 311 312 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front); 313 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side ); 314 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back ); 315 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe ); 316 317 skip_bits_long(gb, 4 * num_assoc_data); 318 319 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc ); 320 321 align_get_bits(gb); 322 323 /* comment field, first byte is length */ 324 comment_len = get_bits(gb, 8) * 8; 325 if (get_bits_left(gb) < comment_len) { 326 av_log(ac->avccontext, AV_LOG_ERROR, overread_err); 327 return -1; 328 } 329 skip_bits_long(gb, comment_len); 330 return 0; 331} 332 333/** 334 * Set up channel positions based on a default channel configuration 335 * as specified in table 1.17. 336 * 337 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one. 338 * 339 * @return Returns error status. 0 - OK, !0 - error 340 */ 341static av_cold int set_default_channel_config(AACContext *ac, 342 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID], 343 int channel_config) 344{ 345 if (channel_config < 1 || channel_config > 7) { 346 av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d)\n", 347 channel_config); 348 return -1; 349 } 350 351 /* default channel configurations: 352 * 353 * 1ch : front center (mono) 354 * 2ch : L + R (stereo) 355 * 3ch : front center + L + R 356 * 4ch : front center + L + R + back center 357 * 5ch : front center + L + R + back stereo 358 * 6ch : front center + L + R + back stereo + LFE 359 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE 360 */ 361 362 if (channel_config != 2) 363 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono) 364 if (channel_config > 1) 365 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo) 366 if (channel_config == 4) 367 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center 368 if (channel_config > 4) 369 new_che_pos[TYPE_CPE][(channel_config == 7) + 1] 370 = AAC_CHANNEL_BACK; // back stereo 371 if (channel_config > 5) 372 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE 373 if (channel_config == 7) 374 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right 375 376 return 0; 377} 378 379/** 380 * Decode GA "General Audio" specific configuration; reference: table 4.1. 381 * 382 * @return Returns error status. 0 - OK, !0 - error 383 */ 384static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb, 385 int channel_config) 386{ 387 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]; 388 int extension_flag, ret; 389 390 if (get_bits1(gb)) { // frameLengthFlag 391 av_log_missing_feature(ac->avccontext, "960/120 MDCT window is", 1); 392 return -1; 393 } 394 395 if (get_bits1(gb)) // dependsOnCoreCoder 396 skip_bits(gb, 14); // coreCoderDelay 397 extension_flag = get_bits1(gb); 398 399 if (ac->m4ac.object_type == AOT_AAC_SCALABLE || 400 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE) 401 skip_bits(gb, 3); // layerNr 402 403 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])); 404 if (channel_config == 0) { 405 skip_bits(gb, 4); // element_instance_tag 406 if ((ret = decode_pce(ac, new_che_pos, gb))) 407 return ret; 408 } else { 409 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config))) 410 return ret; 411 } 412 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR))) 413 return ret; 414 415 if (extension_flag) { 416 switch (ac->m4ac.object_type) { 417 case AOT_ER_BSAC: 418 skip_bits(gb, 5); // numOfSubFrame 419 skip_bits(gb, 11); // layer_length 420 break; 421 case AOT_ER_AAC_LC: 422 case AOT_ER_AAC_LTP: 423 case AOT_ER_AAC_SCALABLE: 424 case AOT_ER_AAC_LD: 425 skip_bits(gb, 3); /* aacSectionDataResilienceFlag 426 * aacScalefactorDataResilienceFlag 427 * aacSpectralDataResilienceFlag 428 */ 429 break; 430 } 431 skip_bits1(gb); // extensionFlag3 (TBD in version 3) 432 } 433 return 0; 434} 435 436/** 437 * Decode audio specific configuration; reference: table 1.13. 438 * 439 * @param data pointer to AVCodecContext extradata 440 * @param data_size size of AVCCodecContext extradata 441 * 442 * @return Returns error status. 0 - OK, !0 - error 443 */ 444static int decode_audio_specific_config(AACContext *ac, void *data, 445 int data_size) 446{ 447 GetBitContext gb; 448 int i; 449 450 init_get_bits(&gb, data, data_size * 8); 451 452 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0) 453 return -1; 454 if (ac->m4ac.sampling_index > 12) { 455 av_log(ac->avccontext, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index); 456 return -1; 457 } 458 459 skip_bits_long(&gb, i); 460 461 switch (ac->m4ac.object_type) { 462 case AOT_AAC_MAIN: 463 case AOT_AAC_LC: 464 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config)) 465 return -1; 466 break; 467 default: 468 av_log(ac->avccontext, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n", 469 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type); 470 return -1; 471 } 472 return 0; 473} 474 475/** 476 * linear congruential pseudorandom number generator 477 * 478 * @param previous_val pointer to the current state of the generator 479 * 480 * @return Returns a 32-bit pseudorandom integer 481 */ 482static av_always_inline int lcg_random(int previous_val) 483{ 484 return previous_val * 1664525 + 1013904223; 485} 486 487static av_always_inline void reset_predict_state(PredictorState *ps) 488{ 489 ps->r0 = 0.0f; 490 ps->r1 = 0.0f; 491 ps->cor0 = 0.0f; 492 ps->cor1 = 0.0f; 493 ps->var0 = 1.0f; 494 ps->var1 = 1.0f; 495} 496 497static void reset_all_predictors(PredictorState *ps) 498{ 499 int i; 500 for (i = 0; i < MAX_PREDICTORS; i++) 501 reset_predict_state(&ps[i]); 502} 503 504static void reset_predictor_group(PredictorState *ps, int group_num) 505{ 506 int i; 507 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) 508 reset_predict_state(&ps[i]); 509} 510 511static av_cold int aac_decode_init(AVCodecContext *avccontext) 512{ 513 AACContext *ac = avccontext->priv_data; 514 int i; 515 516 ac->avccontext = avccontext; 517 ac->m4ac.sample_rate = avccontext->sample_rate; 518 519 if (avccontext->extradata_size > 0) { 520 if (decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size)) 521 return -1; 522 } 523 524 avccontext->sample_fmt = SAMPLE_FMT_S16; 525 526 AAC_INIT_VLC_STATIC( 0, 304); 527 AAC_INIT_VLC_STATIC( 1, 270); 528 AAC_INIT_VLC_STATIC( 2, 550); 529 AAC_INIT_VLC_STATIC( 3, 300); 530 AAC_INIT_VLC_STATIC( 4, 328); 531 AAC_INIT_VLC_STATIC( 5, 294); 532 AAC_INIT_VLC_STATIC( 6, 306); 533 AAC_INIT_VLC_STATIC( 7, 268); 534 AAC_INIT_VLC_STATIC( 8, 510); 535 AAC_INIT_VLC_STATIC( 9, 366); 536 AAC_INIT_VLC_STATIC(10, 462); 537 538 ff_aac_sbr_init(); 539 540 dsputil_init(&ac->dsp, avccontext); 541 542 ac->random_state = 0x1f2e3d4c; 543 544 // -1024 - Compensate wrong IMDCT method. 545 // 32768 - Required to scale values to the correct range for the bias method 546 // for float to int16 conversion. 547 548 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) { 549 ac->add_bias = 385.0f; 550 ac->sf_scale = 1. / (-1024. * 32768.); 551 ac->sf_offset = 0; 552 } else { 553 ac->add_bias = 0.0f; 554 ac->sf_scale = 1. / -1024.; 555 ac->sf_offset = 60; 556 } 557 558#if !CONFIG_HARDCODED_TABLES 559 for (i = 0; i < 428; i++) 560 ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.); 561#endif /* CONFIG_HARDCODED_TABLES */ 562 563 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code), 564 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]), 565 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]), 566 352); 567 568 ff_mdct_init(&ac->mdct, 11, 1, 1.0); 569 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0); 570 // window initialization 571 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); 572 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); 573 ff_init_ff_sine_windows(10); 574 ff_init_ff_sine_windows( 7); 575 576 cbrt_tableinit(); 577 578 return 0; 579} 580 581/** 582 * Skip data_stream_element; reference: table 4.10. 583 */ 584static int skip_data_stream_element(AACContext *ac, GetBitContext *gb) 585{ 586 int byte_align = get_bits1(gb); 587 int count = get_bits(gb, 8); 588 if (count == 255) 589 count += get_bits(gb, 8); 590 if (byte_align) 591 align_get_bits(gb); 592 593 if (get_bits_left(gb) < 8 * count) { 594 av_log(ac->avccontext, AV_LOG_ERROR, overread_err); 595 return -1; 596 } 597 skip_bits_long(gb, 8 * count); 598 return 0; 599} 600 601static int decode_prediction(AACContext *ac, IndividualChannelStream *ics, 602 GetBitContext *gb) 603{ 604 int sfb; 605 if (get_bits1(gb)) { 606 ics->predictor_reset_group = get_bits(gb, 5); 607 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) { 608 av_log(ac->avccontext, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n"); 609 return -1; 610 } 611 } 612 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) { 613 ics->prediction_used[sfb] = get_bits1(gb); 614 } 615 return 0; 616} 617 618/** 619 * Decode Individual Channel Stream info; reference: table 4.6. 620 * 621 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. 622 */ 623static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, 624 GetBitContext *gb, int common_window) 625{ 626 if (get_bits1(gb)) { 627 av_log(ac->avccontext, AV_LOG_ERROR, "Reserved bit set.\n"); 628 memset(ics, 0, sizeof(IndividualChannelStream)); 629 return -1; 630 } 631 ics->window_sequence[1] = ics->window_sequence[0]; 632 ics->window_sequence[0] = get_bits(gb, 2); 633 ics->use_kb_window[1] = ics->use_kb_window[0]; 634 ics->use_kb_window[0] = get_bits1(gb); 635 ics->num_window_groups = 1; 636 ics->group_len[0] = 1; 637 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 638 int i; 639 ics->max_sfb = get_bits(gb, 4); 640 for (i = 0; i < 7; i++) { 641 if (get_bits1(gb)) { 642 ics->group_len[ics->num_window_groups - 1]++; 643 } else { 644 ics->num_window_groups++; 645 ics->group_len[ics->num_window_groups - 1] = 1; 646 } 647 } 648 ics->num_windows = 8; 649 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index]; 650 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index]; 651 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index]; 652 ics->predictor_present = 0; 653 } else { 654 ics->max_sfb = get_bits(gb, 6); 655 ics->num_windows = 1; 656 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index]; 657 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index]; 658 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index]; 659 ics->predictor_present = get_bits1(gb); 660 ics->predictor_reset_group = 0; 661 if (ics->predictor_present) { 662 if (ac->m4ac.object_type == AOT_AAC_MAIN) { 663 if (decode_prediction(ac, ics, gb)) { 664 memset(ics, 0, sizeof(IndividualChannelStream)); 665 return -1; 666 } 667 } else if (ac->m4ac.object_type == AOT_AAC_LC) { 668 av_log(ac->avccontext, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n"); 669 memset(ics, 0, sizeof(IndividualChannelStream)); 670 return -1; 671 } else { 672 av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1); 673 memset(ics, 0, sizeof(IndividualChannelStream)); 674 return -1; 675 } 676 } 677 } 678 679 if (ics->max_sfb > ics->num_swb) { 680 av_log(ac->avccontext, AV_LOG_ERROR, 681 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n", 682 ics->max_sfb, ics->num_swb); 683 memset(ics, 0, sizeof(IndividualChannelStream)); 684 return -1; 685 } 686 687 return 0; 688} 689 690/** 691 * Decode band types (section_data payload); reference: table 4.46. 692 * 693 * @param band_type array of the used band type 694 * @param band_type_run_end array of the last scalefactor band of a band type run 695 * 696 * @return Returns error status. 0 - OK, !0 - error 697 */ 698static int decode_band_types(AACContext *ac, enum BandType band_type[120], 699 int band_type_run_end[120], GetBitContext *gb, 700 IndividualChannelStream *ics) 701{ 702 int g, idx = 0; 703 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5; 704 for (g = 0; g < ics->num_window_groups; g++) { 705 int k = 0; 706 while (k < ics->max_sfb) { 707 uint8_t sect_end = k; 708 int sect_len_incr; 709 int sect_band_type = get_bits(gb, 4); 710 if (sect_band_type == 12) { 711 av_log(ac->avccontext, AV_LOG_ERROR, "invalid band type\n"); 712 return -1; 713 } 714 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1) 715 sect_end += sect_len_incr; 716 sect_end += sect_len_incr; 717 if (get_bits_left(gb) < 0) { 718 av_log(ac->avccontext, AV_LOG_ERROR, overread_err); 719 return -1; 720 } 721 if (sect_end > ics->max_sfb) { 722 av_log(ac->avccontext, AV_LOG_ERROR, 723 "Number of bands (%d) exceeds limit (%d).\n", 724 sect_end, ics->max_sfb); 725 return -1; 726 } 727 for (; k < sect_end; k++) { 728 band_type [idx] = sect_band_type; 729 band_type_run_end[idx++] = sect_end; 730 } 731 } 732 } 733 return 0; 734} 735 736/** 737 * Decode scalefactors; reference: table 4.47. 738 * 739 * @param global_gain first scalefactor value as scalefactors are differentially coded 740 * @param band_type array of the used band type 741 * @param band_type_run_end array of the last scalefactor band of a band type run 742 * @param sf array of scalefactors or intensity stereo positions 743 * 744 * @return Returns error status. 0 - OK, !0 - error 745 */ 746static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, 747 unsigned int global_gain, 748 IndividualChannelStream *ics, 749 enum BandType band_type[120], 750 int band_type_run_end[120]) 751{ 752 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0); 753 int g, i, idx = 0; 754 int offset[3] = { global_gain, global_gain - 90, 100 }; 755 int noise_flag = 1; 756 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" }; 757 for (g = 0; g < ics->num_window_groups; g++) { 758 for (i = 0; i < ics->max_sfb;) { 759 int run_end = band_type_run_end[idx]; 760 if (band_type[idx] == ZERO_BT) { 761 for (; i < run_end; i++, idx++) 762 sf[idx] = 0.; 763 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) { 764 for (; i < run_end; i++, idx++) { 765 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 766 if (offset[2] > 255U) { 767 av_log(ac->avccontext, AV_LOG_ERROR, 768 "%s (%d) out of range.\n", sf_str[2], offset[2]); 769 return -1; 770 } 771 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300]; 772 } 773 } else if (band_type[idx] == NOISE_BT) { 774 for (; i < run_end; i++, idx++) { 775 if (noise_flag-- > 0) 776 offset[1] += get_bits(gb, 9) - 256; 777 else 778 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 779 if (offset[1] > 255U) { 780 av_log(ac->avccontext, AV_LOG_ERROR, 781 "%s (%d) out of range.\n", sf_str[1], offset[1]); 782 return -1; 783 } 784 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100]; 785 } 786 } else { 787 for (; i < run_end; i++, idx++) { 788 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 789 if (offset[0] > 255U) { 790 av_log(ac->avccontext, AV_LOG_ERROR, 791 "%s (%d) out of range.\n", sf_str[0], offset[0]); 792 return -1; 793 } 794 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset]; 795 } 796 } 797 } 798 } 799 return 0; 800} 801 802/** 803 * Decode pulse data; reference: table 4.7. 804 */ 805static int decode_pulses(Pulse *pulse, GetBitContext *gb, 806 const uint16_t *swb_offset, int num_swb) 807{ 808 int i, pulse_swb; 809 pulse->num_pulse = get_bits(gb, 2) + 1; 810 pulse_swb = get_bits(gb, 6); 811 if (pulse_swb >= num_swb) 812 return -1; 813 pulse->pos[0] = swb_offset[pulse_swb]; 814 pulse->pos[0] += get_bits(gb, 5); 815 if (pulse->pos[0] > 1023) 816 return -1; 817 pulse->amp[0] = get_bits(gb, 4); 818 for (i = 1; i < pulse->num_pulse; i++) { 819 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1]; 820 if (pulse->pos[i] > 1023) 821 return -1; 822 pulse->amp[i] = get_bits(gb, 4); 823 } 824 return 0; 825} 826 827/** 828 * Decode Temporal Noise Shaping data; reference: table 4.48. 829 * 830 * @return Returns error status. 0 - OK, !0 - error 831 */ 832static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns, 833 GetBitContext *gb, const IndividualChannelStream *ics) 834{ 835 int w, filt, i, coef_len, coef_res, coef_compress; 836 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE; 837 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12; 838 for (w = 0; w < ics->num_windows; w++) { 839 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) { 840 coef_res = get_bits1(gb); 841 842 for (filt = 0; filt < tns->n_filt[w]; filt++) { 843 int tmp2_idx; 844 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8); 845 846 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) { 847 av_log(ac->avccontext, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n", 848 tns->order[w][filt], tns_max_order); 849 tns->order[w][filt] = 0; 850 return -1; 851 } 852 if (tns->order[w][filt]) { 853 tns->direction[w][filt] = get_bits1(gb); 854 coef_compress = get_bits1(gb); 855 coef_len = coef_res + 3 - coef_compress; 856 tmp2_idx = 2 * coef_compress + coef_res; 857 858 for (i = 0; i < tns->order[w][filt]; i++) 859 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]; 860 } 861 } 862 } 863 } 864 return 0; 865} 866 867/** 868 * Decode Mid/Side data; reference: table 4.54. 869 * 870 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 871 * [1] mask is decoded from bitstream; [2] mask is all 1s; 872 * [3] reserved for scalable AAC 873 */ 874static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb, 875 int ms_present) 876{ 877 int idx; 878 if (ms_present == 1) { 879 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++) 880 cpe->ms_mask[idx] = get_bits1(gb); 881 } else if (ms_present == 2) { 882 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0])); 883 } 884} 885 886#ifndef VMUL2 887static inline float *VMUL2(float *dst, const float *v, unsigned idx, 888 const float *scale) 889{ 890 float s = *scale; 891 *dst++ = v[idx & 15] * s; 892 *dst++ = v[idx>>4 & 15] * s; 893 return dst; 894} 895#endif 896 897#ifndef VMUL4 898static inline float *VMUL4(float *dst, const float *v, unsigned idx, 899 const float *scale) 900{ 901 float s = *scale; 902 *dst++ = v[idx & 3] * s; 903 *dst++ = v[idx>>2 & 3] * s; 904 *dst++ = v[idx>>4 & 3] * s; 905 *dst++ = v[idx>>6 & 3] * s; 906 return dst; 907} 908#endif 909 910#ifndef VMUL2S 911static inline float *VMUL2S(float *dst, const float *v, unsigned idx, 912 unsigned sign, const float *scale) 913{ 914 union float754 s0, s1; 915 916 s0.f = s1.f = *scale; 917 s0.i ^= sign >> 1 << 31; 918 s1.i ^= sign << 31; 919 920 *dst++ = v[idx & 15] * s0.f; 921 *dst++ = v[idx>>4 & 15] * s1.f; 922 923 return dst; 924} 925#endif 926 927#ifndef VMUL4S 928static inline float *VMUL4S(float *dst, const float *v, unsigned idx, 929 unsigned sign, const float *scale) 930{ 931 unsigned nz = idx >> 12; 932 union float754 s = { .f = *scale }; 933 union float754 t; 934 935 t.i = s.i ^ (sign & 1<<31); 936 *dst++ = v[idx & 3] * t.f; 937 938 sign <<= nz & 1; nz >>= 1; 939 t.i = s.i ^ (sign & 1<<31); 940 *dst++ = v[idx>>2 & 3] * t.f; 941 942 sign <<= nz & 1; nz >>= 1; 943 t.i = s.i ^ (sign & 1<<31); 944 *dst++ = v[idx>>4 & 3] * t.f; 945 946 sign <<= nz & 1; nz >>= 1; 947 t.i = s.i ^ (sign & 1<<31); 948 *dst++ = v[idx>>6 & 3] * t.f; 949 950 return dst; 951} 952#endif 953 954/** 955 * Decode spectral data; reference: table 4.50. 956 * Dequantize and scale spectral data; reference: 4.6.3.3. 957 * 958 * @param coef array of dequantized, scaled spectral data 959 * @param sf array of scalefactors or intensity stereo positions 960 * @param pulse_present set if pulses are present 961 * @param pulse pointer to pulse data struct 962 * @param band_type array of the used band type 963 * 964 * @return Returns error status. 0 - OK, !0 - error 965 */ 966static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], 967 GetBitContext *gb, const float sf[120], 968 int pulse_present, const Pulse *pulse, 969 const IndividualChannelStream *ics, 970 enum BandType band_type[120]) 971{ 972 int i, k, g, idx = 0; 973 const int c = 1024 / ics->num_windows; 974 const uint16_t *offsets = ics->swb_offset; 975 float *coef_base = coef; 976 int err_idx; 977 978 for (g = 0; g < ics->num_windows; g++) 979 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb])); 980 981 for (g = 0; g < ics->num_window_groups; g++) { 982 unsigned g_len = ics->group_len[g]; 983 984 for (i = 0; i < ics->max_sfb; i++, idx++) { 985 const unsigned cbt_m1 = band_type[idx] - 1; 986 float *cfo = coef + offsets[i]; 987 int off_len = offsets[i + 1] - offsets[i]; 988 int group; 989 990 if (cbt_m1 >= INTENSITY_BT2 - 1) { 991 for (group = 0; group < g_len; group++, cfo+=128) { 992 memset(cfo, 0, off_len * sizeof(float)); 993 } 994 } else if (cbt_m1 == NOISE_BT - 1) { 995 for (group = 0; group < g_len; group++, cfo+=128) { 996 float scale; 997 float band_energy; 998 999 for (k = 0; k < off_len; k++) { 1000 ac->random_state = lcg_random(ac->random_state); 1001 cfo[k] = ac->random_state; 1002 } 1003 1004 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); 1005 scale = sf[idx] / sqrtf(band_energy); 1006 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len); 1007 } 1008 } else { 1009 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; 1010 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; 1011 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; 1012 const int cb_size = ff_aac_spectral_sizes[cbt_m1]; 1013 OPEN_READER(re, gb); 1014 1015 switch (cbt_m1 >> 1) { 1016 case 0: 1017 for (group = 0; group < g_len; group++, cfo+=128) { 1018 float *cf = cfo; 1019 int len = off_len; 1020 1021 do { 1022 int code; 1023 unsigned cb_idx; 1024 1025 UPDATE_CACHE(re, gb); 1026 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1027 1028 if (code >= cb_size) { 1029 err_idx = code; 1030 goto err_cb_overflow; 1031 } 1032 1033 cb_idx = cb_vector_idx[code]; 1034 cf = VMUL4(cf, vq, cb_idx, sf + idx); 1035 } while (len -= 4); 1036 } 1037 break; 1038 1039 case 1: 1040 for (group = 0; group < g_len; group++, cfo+=128) { 1041 float *cf = cfo; 1042 int len = off_len; 1043 1044 do { 1045 int code; 1046 unsigned nnz; 1047 unsigned cb_idx; 1048 uint32_t bits; 1049 1050 UPDATE_CACHE(re, gb); 1051 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1052 1053 if (code >= cb_size) { 1054 err_idx = code; 1055 goto err_cb_overflow; 1056 } 1057 1058#if MIN_CACHE_BITS < 20 1059 UPDATE_CACHE(re, gb); 1060#endif 1061 cb_idx = cb_vector_idx[code]; 1062 nnz = cb_idx >> 8 & 15; 1063 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); 1064 LAST_SKIP_BITS(re, gb, nnz); 1065 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); 1066 } while (len -= 4); 1067 } 1068 break; 1069 1070 case 2: 1071 for (group = 0; group < g_len; group++, cfo+=128) { 1072 float *cf = cfo; 1073 int len = off_len; 1074 1075 do { 1076 int code; 1077 unsigned cb_idx; 1078 1079 UPDATE_CACHE(re, gb); 1080 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1081 1082 if (code >= cb_size) { 1083 err_idx = code; 1084 goto err_cb_overflow; 1085 } 1086 1087 cb_idx = cb_vector_idx[code]; 1088 cf = VMUL2(cf, vq, cb_idx, sf + idx); 1089 } while (len -= 2); 1090 } 1091 break; 1092 1093 case 3: 1094 case 4: 1095 for (group = 0; group < g_len; group++, cfo+=128) { 1096 float *cf = cfo; 1097 int len = off_len; 1098 1099 do { 1100 int code; 1101 unsigned nnz; 1102 unsigned cb_idx; 1103 unsigned sign; 1104 1105 UPDATE_CACHE(re, gb); 1106 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1107 1108 if (code >= cb_size) { 1109 err_idx = code; 1110 goto err_cb_overflow; 1111 } 1112 1113 cb_idx = cb_vector_idx[code]; 1114 nnz = cb_idx >> 8 & 15; 1115 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12); 1116 LAST_SKIP_BITS(re, gb, nnz); 1117 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); 1118 } while (len -= 2); 1119 } 1120 break; 1121 1122 default: 1123 for (group = 0; group < g_len; group++, cfo+=128) { 1124 float *cf = cfo; 1125 uint32_t *icf = (uint32_t *) cf; 1126 int len = off_len; 1127 1128 do { 1129 int code; 1130 unsigned nzt, nnz; 1131 unsigned cb_idx; 1132 uint32_t bits; 1133 int j; 1134 1135 UPDATE_CACHE(re, gb); 1136 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1137 1138 if (!code) { 1139 *icf++ = 0; 1140 *icf++ = 0; 1141 continue; 1142 } 1143 1144 if (code >= cb_size) { 1145 err_idx = code; 1146 goto err_cb_overflow; 1147 } 1148 1149 cb_idx = cb_vector_idx[code]; 1150 nnz = cb_idx >> 12; 1151 nzt = cb_idx >> 8; 1152 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); 1153 LAST_SKIP_BITS(re, gb, nnz); 1154 1155 for (j = 0; j < 2; j++) { 1156 if (nzt & 1<<j) { 1157 uint32_t b; 1158 int n; 1159 /* The total length of escape_sequence must be < 22 bits according 1160 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ 1161 UPDATE_CACHE(re, gb); 1162 b = GET_CACHE(re, gb); 1163 b = 31 - av_log2(~b); 1164 1165 if (b > 8) { 1166 av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); 1167 return -1; 1168 } 1169 1170#if MIN_CACHE_BITS < 21 1171 LAST_SKIP_BITS(re, gb, b + 1); 1172 UPDATE_CACHE(re, gb); 1173#else 1174 SKIP_BITS(re, gb, b + 1); 1175#endif 1176 b += 4; 1177 n = (1 << b) + SHOW_UBITS(re, gb, b); 1178 LAST_SKIP_BITS(re, gb, b); 1179 *icf++ = cbrt_tab[n] | (bits & 1<<31); 1180 bits <<= 1; 1181 } else { 1182 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; 1183 *icf++ = (bits & 1<<31) | v; 1184 bits <<= !!v; 1185 } 1186 cb_idx >>= 4; 1187 } 1188 } while (len -= 2); 1189 1190 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); 1191 } 1192 } 1193 1194 CLOSE_READER(re, gb); 1195 } 1196 } 1197 coef += g_len << 7; 1198 } 1199 1200 if (pulse_present) { 1201 idx = 0; 1202 for (i = 0; i < pulse->num_pulse; i++) { 1203 float co = coef_base[ pulse->pos[i] ]; 1204 while (offsets[idx + 1] <= pulse->pos[i]) 1205 idx++; 1206 if (band_type[idx] != NOISE_BT && sf[idx]) { 1207 float ico = -pulse->amp[i]; 1208 if (co) { 1209 co /= sf[idx]; 1210 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico); 1211 } 1212 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx]; 1213 } 1214 } 1215 } 1216 return 0; 1217 1218err_cb_overflow: 1219 av_log(ac->avccontext, AV_LOG_ERROR, 1220 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n", 1221 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]); 1222 return -1; 1223} 1224 1225static av_always_inline float flt16_round(float pf) 1226{ 1227 union float754 tmp; 1228 tmp.f = pf; 1229 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; 1230 return tmp.f; 1231} 1232 1233static av_always_inline float flt16_even(float pf) 1234{ 1235 union float754 tmp; 1236 tmp.f = pf; 1237 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; 1238 return tmp.f; 1239} 1240 1241static av_always_inline float flt16_trunc(float pf) 1242{ 1243 union float754 pun; 1244 pun.f = pf; 1245 pun.i &= 0xFFFF0000U; 1246 return pun.f; 1247} 1248 1249static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef, 1250 int output_enable) 1251{ 1252 const float a = 0.953125; // 61.0 / 64 1253 const float alpha = 0.90625; // 29.0 / 32 1254 float e0, e1; 1255 float pv; 1256 float k1, k2; 1257 1258 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0; 1259 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0; 1260 1261 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1); 1262 if (output_enable) 1263 *coef += pv * ac->sf_scale; 1264 1265 e0 = *coef / ac->sf_scale; 1266 e1 = e0 - k1 * ps->r0; 1267 1268 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1); 1269 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1)); 1270 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0); 1271 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0)); 1272 1273 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0)); 1274 ps->r0 = flt16_trunc(a * e0); 1275} 1276 1277/** 1278 * Apply AAC-Main style frequency domain prediction. 1279 */ 1280static void apply_prediction(AACContext *ac, SingleChannelElement *sce) 1281{ 1282 int sfb, k; 1283 1284 if (!sce->ics.predictor_initialized) { 1285 reset_all_predictors(sce->predictor_state); 1286 sce->ics.predictor_initialized = 1; 1287 } 1288 1289 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 1290 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) { 1291 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) { 1292 predict(ac, &sce->predictor_state[k], &sce->coeffs[k], 1293 sce->ics.predictor_present && sce->ics.prediction_used[sfb]); 1294 } 1295 } 1296 if (sce->ics.predictor_reset_group) 1297 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group); 1298 } else 1299 reset_all_predictors(sce->predictor_state); 1300} 1301 1302/** 1303 * Decode an individual_channel_stream payload; reference: table 4.44. 1304 * 1305 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. 1306 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) 1307 * 1308 * @return Returns error status. 0 - OK, !0 - error 1309 */ 1310static int decode_ics(AACContext *ac, SingleChannelElement *sce, 1311 GetBitContext *gb, int common_window, int scale_flag) 1312{ 1313 Pulse pulse; 1314 TemporalNoiseShaping *tns = &sce->tns; 1315 IndividualChannelStream *ics = &sce->ics; 1316 float *out = sce->coeffs; 1317 int global_gain, pulse_present = 0; 1318 1319 /* This assignment is to silence a GCC warning about the variable being used 1320 * uninitialized when in fact it always is. 1321 */ 1322 pulse.num_pulse = 0; 1323 1324 global_gain = get_bits(gb, 8); 1325 1326 if (!common_window && !scale_flag) { 1327 if (decode_ics_info(ac, ics, gb, 0) < 0) 1328 return -1; 1329 } 1330 1331 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0) 1332 return -1; 1333 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0) 1334 return -1; 1335 1336 pulse_present = 0; 1337 if (!scale_flag) { 1338 if ((pulse_present = get_bits1(gb))) { 1339 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1340 av_log(ac->avccontext, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n"); 1341 return -1; 1342 } 1343 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) { 1344 av_log(ac->avccontext, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n"); 1345 return -1; 1346 } 1347 } 1348 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics)) 1349 return -1; 1350 if (get_bits1(gb)) { 1351 av_log_missing_feature(ac->avccontext, "SSR", 1); 1352 return -1; 1353 } 1354 } 1355 1356 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0) 1357 return -1; 1358 1359 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window) 1360 apply_prediction(ac, sce); 1361 1362 return 0; 1363} 1364 1365/** 1366 * Mid/Side stereo decoding; reference: 4.6.8.1.3. 1367 */ 1368static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) 1369{ 1370 const IndividualChannelStream *ics = &cpe->ch[0].ics; 1371 float *ch0 = cpe->ch[0].coeffs; 1372 float *ch1 = cpe->ch[1].coeffs; 1373 int g, i, group, idx = 0; 1374 const uint16_t *offsets = ics->swb_offset; 1375 for (g = 0; g < ics->num_window_groups; g++) { 1376 for (i = 0; i < ics->max_sfb; i++, idx++) { 1377 if (cpe->ms_mask[idx] && 1378 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { 1379 for (group = 0; group < ics->group_len[g]; group++) { 1380 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i], 1381 ch1 + group * 128 + offsets[i], 1382 offsets[i+1] - offsets[i]); 1383 } 1384 } 1385 } 1386 ch0 += ics->group_len[g] * 128; 1387 ch1 += ics->group_len[g] * 128; 1388 } 1389} 1390 1391/** 1392 * intensity stereo decoding; reference: 4.6.8.2.3 1393 * 1394 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 1395 * [1] mask is decoded from bitstream; [2] mask is all 1s; 1396 * [3] reserved for scalable AAC 1397 */ 1398static void apply_intensity_stereo(ChannelElement *cpe, int ms_present) 1399{ 1400 const IndividualChannelStream *ics = &cpe->ch[1].ics; 1401 SingleChannelElement *sce1 = &cpe->ch[1]; 1402 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs; 1403 const uint16_t *offsets = ics->swb_offset; 1404 int g, group, i, k, idx = 0; 1405 int c; 1406 float scale; 1407 for (g = 0; g < ics->num_window_groups; g++) { 1408 for (i = 0; i < ics->max_sfb;) { 1409 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) { 1410 const int bt_run_end = sce1->band_type_run_end[idx]; 1411 for (; i < bt_run_end; i++, idx++) { 1412 c = -1 + 2 * (sce1->band_type[idx] - 14); 1413 if (ms_present) 1414 c *= 1 - 2 * cpe->ms_mask[idx]; 1415 scale = c * sce1->sf[idx]; 1416 for (group = 0; group < ics->group_len[g]; group++) 1417 for (k = offsets[i]; k < offsets[i + 1]; k++) 1418 coef1[group * 128 + k] = scale * coef0[group * 128 + k]; 1419 } 1420 } else { 1421 int bt_run_end = sce1->band_type_run_end[idx]; 1422 idx += bt_run_end - i; 1423 i = bt_run_end; 1424 } 1425 } 1426 coef0 += ics->group_len[g] * 128; 1427 coef1 += ics->group_len[g] * 128; 1428 } 1429} 1430 1431/** 1432 * Decode a channel_pair_element; reference: table 4.4. 1433 * 1434 * @param elem_id Identifies the instance of a syntax element. 1435 * 1436 * @return Returns error status. 0 - OK, !0 - error 1437 */ 1438static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) 1439{ 1440 int i, ret, common_window, ms_present = 0; 1441 1442 common_window = get_bits1(gb); 1443 if (common_window) { 1444 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1)) 1445 return -1; 1446 i = cpe->ch[1].ics.use_kb_window[0]; 1447 cpe->ch[1].ics = cpe->ch[0].ics; 1448 cpe->ch[1].ics.use_kb_window[1] = i; 1449 ms_present = get_bits(gb, 2); 1450 if (ms_present == 3) { 1451 av_log(ac->avccontext, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); 1452 return -1; 1453 } else if (ms_present) 1454 decode_mid_side_stereo(cpe, gb, ms_present); 1455 } 1456 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0))) 1457 return ret; 1458 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0))) 1459 return ret; 1460 1461 if (common_window) { 1462 if (ms_present) 1463 apply_mid_side_stereo(ac, cpe); 1464 if (ac->m4ac.object_type == AOT_AAC_MAIN) { 1465 apply_prediction(ac, &cpe->ch[0]); 1466 apply_prediction(ac, &cpe->ch[1]); 1467 } 1468 } 1469 1470 apply_intensity_stereo(cpe, ms_present); 1471 return 0; 1472} 1473 1474/** 1475 * Decode coupling_channel_element; reference: table 4.8. 1476 * 1477 * @param elem_id Identifies the instance of a syntax element. 1478 * 1479 * @return Returns error status. 0 - OK, !0 - error 1480 */ 1481static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) 1482{ 1483 int num_gain = 0; 1484 int c, g, sfb, ret; 1485 int sign; 1486 float scale; 1487 SingleChannelElement *sce = &che->ch[0]; 1488 ChannelCoupling *coup = &che->coup; 1489 1490 coup->coupling_point = 2 * get_bits1(gb); 1491 coup->num_coupled = get_bits(gb, 3); 1492 for (c = 0; c <= coup->num_coupled; c++) { 1493 num_gain++; 1494 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE; 1495 coup->id_select[c] = get_bits(gb, 4); 1496 if (coup->type[c] == TYPE_CPE) { 1497 coup->ch_select[c] = get_bits(gb, 2); 1498 if (coup->ch_select[c] == 3) 1499 num_gain++; 1500 } else 1501 coup->ch_select[c] = 2; 1502 } 1503 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1); 1504 1505 sign = get_bits(gb, 1); 1506 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3)); 1507 1508 if ((ret = decode_ics(ac, sce, gb, 0, 0))) 1509 return ret; 1510 1511 for (c = 0; c < num_gain; c++) { 1512 int idx = 0; 1513 int cge = 1; 1514 int gain = 0; 1515 float gain_cache = 1.; 1516 if (c) { 1517 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb); 1518 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0; 1519 gain_cache = pow(scale, -gain); 1520 } 1521 if (coup->coupling_point == AFTER_IMDCT) { 1522 coup->gain[c][0] = gain_cache; 1523 } else { 1524 for (g = 0; g < sce->ics.num_window_groups; g++) { 1525 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) { 1526 if (sce->band_type[idx] != ZERO_BT) { 1527 if (!cge) { 1528 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 1529 if (t) { 1530 int s = 1; 1531 t = gain += t; 1532 if (sign) { 1533 s -= 2 * (t & 0x1); 1534 t >>= 1; 1535 } 1536 gain_cache = pow(scale, -t) * s; 1537 } 1538 } 1539 coup->gain[c][idx] = gain_cache; 1540 } 1541 } 1542 } 1543 } 1544 } 1545 return 0; 1546} 1547 1548/** 1549 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53. 1550 * 1551 * @return Returns number of bytes consumed. 1552 */ 1553static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, 1554 GetBitContext *gb) 1555{ 1556 int i; 1557 int num_excl_chan = 0; 1558 1559 do { 1560 for (i = 0; i < 7; i++) 1561 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb); 1562 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb)); 1563 1564 return num_excl_chan / 7; 1565} 1566 1567/** 1568 * Decode dynamic range information; reference: table 4.52. 1569 * 1570 * @param cnt length of TYPE_FIL syntactic element in bytes 1571 * 1572 * @return Returns number of bytes consumed. 1573 */ 1574static int decode_dynamic_range(DynamicRangeControl *che_drc, 1575 GetBitContext *gb, int cnt) 1576{ 1577 int n = 1; 1578 int drc_num_bands = 1; 1579 int i; 1580 1581 /* pce_tag_present? */ 1582 if (get_bits1(gb)) { 1583 che_drc->pce_instance_tag = get_bits(gb, 4); 1584 skip_bits(gb, 4); // tag_reserved_bits 1585 n++; 1586 } 1587 1588 /* excluded_chns_present? */ 1589 if (get_bits1(gb)) { 1590 n += decode_drc_channel_exclusions(che_drc, gb); 1591 } 1592 1593 /* drc_bands_present? */ 1594 if (get_bits1(gb)) { 1595 che_drc->band_incr = get_bits(gb, 4); 1596 che_drc->interpolation_scheme = get_bits(gb, 4); 1597 n++; 1598 drc_num_bands += che_drc->band_incr; 1599 for (i = 0; i < drc_num_bands; i++) { 1600 che_drc->band_top[i] = get_bits(gb, 8); 1601 n++; 1602 } 1603 } 1604 1605 /* prog_ref_level_present? */ 1606 if (get_bits1(gb)) { 1607 che_drc->prog_ref_level = get_bits(gb, 7); 1608 skip_bits1(gb); // prog_ref_level_reserved_bits 1609 n++; 1610 } 1611 1612 for (i = 0; i < drc_num_bands; i++) { 1613 che_drc->dyn_rng_sgn[i] = get_bits1(gb); 1614 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7); 1615 n++; 1616 } 1617 1618 return n; 1619} 1620 1621/** 1622 * Decode extension data (incomplete); reference: table 4.51. 1623 * 1624 * @param cnt length of TYPE_FIL syntactic element in bytes 1625 * 1626 * @return Returns number of bytes consumed 1627 */ 1628static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt, 1629 ChannelElement *che, enum RawDataBlockType elem_type) 1630{ 1631 int crc_flag = 0; 1632 int res = cnt; 1633 switch (get_bits(gb, 4)) { // extension type 1634 case EXT_SBR_DATA_CRC: 1635 crc_flag++; 1636 case EXT_SBR_DATA: 1637 if (!che) { 1638 av_log(ac->avccontext, AV_LOG_ERROR, "SBR was found before the first channel element.\n"); 1639 return res; 1640 } else if (!ac->m4ac.sbr) { 1641 av_log(ac->avccontext, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n"); 1642 skip_bits_long(gb, 8 * cnt - 4); 1643 return res; 1644 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) { 1645 av_log(ac->avccontext, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n"); 1646 skip_bits_long(gb, 8 * cnt - 4); 1647 return res; 1648 } else { 1649 ac->m4ac.sbr = 1; 1650 } 1651 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type); 1652 break; 1653 case EXT_DYNAMIC_RANGE: 1654 res = decode_dynamic_range(&ac->che_drc, gb, cnt); 1655 break; 1656 case EXT_FILL: 1657 case EXT_FILL_DATA: 1658 case EXT_DATA_ELEMENT: 1659 default: 1660 skip_bits_long(gb, 8 * cnt - 4); 1661 break; 1662 }; 1663 return res; 1664} 1665 1666/** 1667 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3. 1668 * 1669 * @param decode 1 if tool is used normally, 0 if tool is used in LTP. 1670 * @param coef spectral coefficients 1671 */ 1672static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, 1673 IndividualChannelStream *ics, int decode) 1674{ 1675 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb); 1676 int w, filt, m, i; 1677 int bottom, top, order, start, end, size, inc; 1678 float lpc[TNS_MAX_ORDER]; 1679 1680 for (w = 0; w < ics->num_windows; w++) { 1681 bottom = ics->num_swb; 1682 for (filt = 0; filt < tns->n_filt[w]; filt++) { 1683 top = bottom; 1684 bottom = FFMAX(0, top - tns->length[w][filt]); 1685 order = tns->order[w][filt]; 1686 if (order == 0) 1687 continue; 1688 1689 // tns_decode_coef 1690 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0); 1691 1692 start = ics->swb_offset[FFMIN(bottom, mmm)]; 1693 end = ics->swb_offset[FFMIN( top, mmm)]; 1694 if ((size = end - start) <= 0) 1695 continue; 1696 if (tns->direction[w][filt]) { 1697 inc = -1; 1698 start = end - 1; 1699 } else { 1700 inc = 1; 1701 } 1702 start += w * 128; 1703 1704 // ar filter 1705 for (m = 0; m < size; m++, start += inc) 1706 for (i = 1; i <= FFMIN(m, order); i++) 1707 coef[start] -= coef[start - i * inc] * lpc[i - 1]; 1708 } 1709 } 1710} 1711 1712/** 1713 * Conduct IMDCT and windowing. 1714 */ 1715static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias) 1716{ 1717 IndividualChannelStream *ics = &sce->ics; 1718 float *in = sce->coeffs; 1719 float *out = sce->ret; 1720 float *saved = sce->saved; 1721 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 1722 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 1723 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 1724 float *buf = ac->buf_mdct; 1725 float *temp = ac->temp; 1726 int i; 1727 1728 // imdct 1729 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1730 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) 1731 av_log(ac->avccontext, AV_LOG_WARNING, 1732 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. " 1733 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n"); 1734 for (i = 0; i < 1024; i += 128) 1735 ff_imdct_half(&ac->mdct_small, buf + i, in + i); 1736 } else 1737 ff_imdct_half(&ac->mdct, buf, in); 1738 1739 /* window overlapping 1740 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 1741 * and long to short transitions are considered to be short to short 1742 * transitions. This leaves just two cases (long to long and short to short) 1743 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 1744 */ 1745 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 1746 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 1747 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512); 1748 } else { 1749 for (i = 0; i < 448; i++) 1750 out[i] = saved[i] + bias; 1751 1752 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1753 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64); 1754 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64); 1755 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64); 1756 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64); 1757 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64); 1758 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); 1759 } else { 1760 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64); 1761 for (i = 576; i < 1024; i++) 1762 out[i] = buf[i-512] + bias; 1763 } 1764 } 1765 1766 // buffer update 1767 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1768 for (i = 0; i < 64; i++) 1769 saved[i] = temp[64 + i] - bias; 1770 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64); 1771 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64); 1772 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64); 1773 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); 1774 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 1775 memcpy( saved, buf + 512, 448 * sizeof(float)); 1776 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); 1777 } else { // LONG_STOP or ONLY_LONG 1778 memcpy( saved, buf + 512, 512 * sizeof(float)); 1779 } 1780} 1781 1782/** 1783 * Apply dependent channel coupling (applied before IMDCT). 1784 * 1785 * @param index index into coupling gain array 1786 */ 1787static void apply_dependent_coupling(AACContext *ac, 1788 SingleChannelElement *target, 1789 ChannelElement *cce, int index) 1790{ 1791 IndividualChannelStream *ics = &cce->ch[0].ics; 1792 const uint16_t *offsets = ics->swb_offset; 1793 float *dest = target->coeffs; 1794 const float *src = cce->ch[0].coeffs; 1795 int g, i, group, k, idx = 0; 1796 if (ac->m4ac.object_type == AOT_AAC_LTP) { 1797 av_log(ac->avccontext, AV_LOG_ERROR, 1798 "Dependent coupling is not supported together with LTP\n"); 1799 return; 1800 } 1801 for (g = 0; g < ics->num_window_groups; g++) { 1802 for (i = 0; i < ics->max_sfb; i++, idx++) { 1803 if (cce->ch[0].band_type[idx] != ZERO_BT) { 1804 const float gain = cce->coup.gain[index][idx]; 1805 for (group = 0; group < ics->group_len[g]; group++) { 1806 for (k = offsets[i]; k < offsets[i + 1]; k++) { 1807 // XXX dsputil-ize 1808 dest[group * 128 + k] += gain * src[group * 128 + k]; 1809 } 1810 } 1811 } 1812 } 1813 dest += ics->group_len[g] * 128; 1814 src += ics->group_len[g] * 128; 1815 } 1816} 1817 1818/** 1819 * Apply independent channel coupling (applied after IMDCT). 1820 * 1821 * @param index index into coupling gain array 1822 */ 1823static void apply_independent_coupling(AACContext *ac, 1824 SingleChannelElement *target, 1825 ChannelElement *cce, int index) 1826{ 1827 int i; 1828 const float gain = cce->coup.gain[index][0]; 1829 const float bias = ac->add_bias; 1830 const float *src = cce->ch[0].ret; 1831 float *dest = target->ret; 1832 const int len = 1024 << (ac->m4ac.sbr == 1); 1833 1834 for (i = 0; i < len; i++) 1835 dest[i] += gain * (src[i] - bias); 1836} 1837 1838/** 1839 * channel coupling transformation interface 1840 * 1841 * @param index index into coupling gain array 1842 * @param apply_coupling_method pointer to (in)dependent coupling function 1843 */ 1844static void apply_channel_coupling(AACContext *ac, ChannelElement *cc, 1845 enum RawDataBlockType type, int elem_id, 1846 enum CouplingPoint coupling_point, 1847 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)) 1848{ 1849 int i, c; 1850 1851 for (i = 0; i < MAX_ELEM_ID; i++) { 1852 ChannelElement *cce = ac->che[TYPE_CCE][i]; 1853 int index = 0; 1854 1855 if (cce && cce->coup.coupling_point == coupling_point) { 1856 ChannelCoupling *coup = &cce->coup; 1857 1858 for (c = 0; c <= coup->num_coupled; c++) { 1859 if (coup->type[c] == type && coup->id_select[c] == elem_id) { 1860 if (coup->ch_select[c] != 1) { 1861 apply_coupling_method(ac, &cc->ch[0], cce, index); 1862 if (coup->ch_select[c] != 0) 1863 index++; 1864 } 1865 if (coup->ch_select[c] != 2) 1866 apply_coupling_method(ac, &cc->ch[1], cce, index++); 1867 } else 1868 index += 1 + (coup->ch_select[c] == 3); 1869 } 1870 } 1871 } 1872} 1873 1874/** 1875 * Convert spectral data to float samples, applying all supported tools as appropriate. 1876 */ 1877static void spectral_to_sample(AACContext *ac) 1878{ 1879 int i, type; 1880 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f; 1881 for (type = 3; type >= 0; type--) { 1882 for (i = 0; i < MAX_ELEM_ID; i++) { 1883 ChannelElement *che = ac->che[type][i]; 1884 if (che) { 1885 if (type <= TYPE_CPE) 1886 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling); 1887 if (che->ch[0].tns.present) 1888 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1); 1889 if (che->ch[1].tns.present) 1890 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1); 1891 if (type <= TYPE_CPE) 1892 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); 1893 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { 1894 imdct_and_windowing(ac, &che->ch[0], imdct_bias); 1895 if (type == TYPE_CPE) { 1896 imdct_and_windowing(ac, &che->ch[1], imdct_bias); 1897 } 1898 if (ac->m4ac.sbr > 0) { 1899 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); 1900 } 1901 } 1902 if (type <= TYPE_CCE) 1903 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling); 1904 } 1905 } 1906 } 1907} 1908 1909static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb) 1910{ 1911 int size; 1912 AACADTSHeaderInfo hdr_info; 1913 1914 size = ff_aac_parse_header(gb, &hdr_info); 1915 if (size > 0) { 1916 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) { 1917 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]; 1918 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])); 1919 ac->m4ac.chan_config = hdr_info.chan_config; 1920 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config)) 1921 return -7; 1922 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME)) 1923 return -7; 1924 } else if (ac->output_configured != OC_LOCKED) { 1925 ac->output_configured = OC_NONE; 1926 } 1927 if (ac->output_configured != OC_LOCKED) 1928 ac->m4ac.sbr = -1; 1929 ac->m4ac.sample_rate = hdr_info.sample_rate; 1930 ac->m4ac.sampling_index = hdr_info.sampling_index; 1931 ac->m4ac.object_type = hdr_info.object_type; 1932 if (!ac->avccontext->sample_rate) 1933 ac->avccontext->sample_rate = hdr_info.sample_rate; 1934 if (hdr_info.num_aac_frames == 1) { 1935 if (!hdr_info.crc_absent) 1936 skip_bits(gb, 16); 1937 } else { 1938 av_log_missing_feature(ac->avccontext, "More than one AAC RDB per ADTS frame is", 0); 1939 return -1; 1940 } 1941 } 1942 return size; 1943} 1944 1945static int aac_decode_frame(AVCodecContext *avccontext, void *data, 1946 int *data_size, AVPacket *avpkt) 1947{ 1948 const uint8_t *buf = avpkt->data; 1949 int buf_size = avpkt->size; 1950 AACContext *ac = avccontext->priv_data; 1951 ChannelElement *che = NULL, *che_prev = NULL; 1952 GetBitContext gb; 1953 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END; 1954 int err, elem_id, data_size_tmp; 1955 int buf_consumed; 1956 int samples = 1024, multiplier; 1957 int buf_offset; 1958 1959 init_get_bits(&gb, buf, buf_size * 8); 1960 1961 if (show_bits(&gb, 12) == 0xfff) { 1962 if (parse_adts_frame_header(ac, &gb) < 0) { 1963 av_log(avccontext, AV_LOG_ERROR, "Error decoding AAC frame header.\n"); 1964 return -1; 1965 } 1966 if (ac->m4ac.sampling_index > 12) { 1967 av_log(ac->avccontext, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index); 1968 return -1; 1969 } 1970 } 1971 1972 // parse 1973 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) { 1974 elem_id = get_bits(&gb, 4); 1975 1976 if (elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) { 1977 av_log(ac->avccontext, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id); 1978 return -1; 1979 } 1980 1981 switch (elem_type) { 1982 1983 case TYPE_SCE: 1984 err = decode_ics(ac, &che->ch[0], &gb, 0, 0); 1985 break; 1986 1987 case TYPE_CPE: 1988 err = decode_cpe(ac, &gb, che); 1989 break; 1990 1991 case TYPE_CCE: 1992 err = decode_cce(ac, &gb, che); 1993 break; 1994 1995 case TYPE_LFE: 1996 err = decode_ics(ac, &che->ch[0], &gb, 0, 0); 1997 break; 1998 1999 case TYPE_DSE: 2000 err = skip_data_stream_element(ac, &gb); 2001 break; 2002 2003 case TYPE_PCE: { 2004 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]; 2005 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])); 2006 if ((err = decode_pce(ac, new_che_pos, &gb))) 2007 break; 2008 if (ac->output_configured > OC_TRIAL_PCE) 2009 av_log(avccontext, AV_LOG_ERROR, 2010 "Not evaluating a further program_config_element as this construct is dubious at best.\n"); 2011 else 2012 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE); 2013 break; 2014 } 2015 2016 case TYPE_FIL: 2017 if (elem_id == 15) 2018 elem_id += get_bits(&gb, 8) - 1; 2019 if (get_bits_left(&gb) < 8 * elem_id) { 2020 av_log(avccontext, AV_LOG_ERROR, overread_err); 2021 return -1; 2022 } 2023 while (elem_id > 0) 2024 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev); 2025 err = 0; /* FIXME */ 2026 break; 2027 2028 default: 2029 err = -1; /* should not happen, but keeps compiler happy */ 2030 break; 2031 } 2032 2033 che_prev = che; 2034 elem_type_prev = elem_type; 2035 2036 if (err) 2037 return err; 2038 2039 if (get_bits_left(&gb) < 3) { 2040 av_log(avccontext, AV_LOG_ERROR, overread_err); 2041 return -1; 2042 } 2043 } 2044 2045 spectral_to_sample(ac); 2046 2047 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0; 2048 samples <<= multiplier; 2049 if (ac->output_configured < OC_LOCKED) { 2050 avccontext->sample_rate = ac->m4ac.sample_rate << multiplier; 2051 avccontext->frame_size = samples; 2052 } 2053 2054 data_size_tmp = samples * avccontext->channels * sizeof(int16_t); 2055 if (*data_size < data_size_tmp) { 2056 av_log(avccontext, AV_LOG_ERROR, 2057 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n", 2058 *data_size, data_size_tmp); 2059 return -1; 2060 } 2061 *data_size = data_size_tmp; 2062 2063 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avccontext->channels); 2064 2065 if (ac->output_configured) 2066 ac->output_configured = OC_LOCKED; 2067 2068 buf_consumed = (get_bits_count(&gb) + 7) >> 3; 2069 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++) 2070 if (buf[buf_offset]) 2071 break; 2072 2073 return buf_size > buf_offset ? buf_consumed : buf_size; 2074} 2075 2076static av_cold int aac_decode_close(AVCodecContext *avccontext) 2077{ 2078 AACContext *ac = avccontext->priv_data; 2079 int i, type; 2080 2081 for (i = 0; i < MAX_ELEM_ID; i++) { 2082 for (type = 0; type < 4; type++) { 2083 if (ac->che[type][i]) 2084 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr); 2085 av_freep(&ac->che[type][i]); 2086 } 2087 } 2088 2089 ff_mdct_end(&ac->mdct); 2090 ff_mdct_end(&ac->mdct_small); 2091 return 0; 2092} 2093 2094AVCodec aac_decoder = { 2095 "aac", 2096 AVMEDIA_TYPE_AUDIO, 2097 CODEC_ID_AAC, 2098 sizeof(AACContext), 2099 aac_decode_init, 2100 NULL, 2101 aac_decode_close, 2102 aac_decode_frame, 2103 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), 2104 .sample_fmts = (const enum SampleFormat[]) { 2105 SAMPLE_FMT_S16,SAMPLE_FMT_NONE 2106 }, 2107 .channel_layouts = aac_channel_layout, 2108}; 2109