1/* 2 * AAC decoder 3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) 4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) 5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com> 6 * 7 * AAC LATM decoder 8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> 9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net> 10 * 11 * This file is part of FFmpeg. 12 * 13 * FFmpeg is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU Lesser General Public 15 * License as published by the Free Software Foundation; either 16 * version 2.1 of the License, or (at your option) any later version. 17 * 18 * FFmpeg is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 * Lesser General Public License for more details. 22 * 23 * You should have received a copy of the GNU Lesser General Public 24 * License along with FFmpeg; if not, write to the Free Software 25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 26 */ 27 28/** 29 * @file 30 * AAC decoder 31 * @author Oded Shimon ( ods15 ods15 dyndns org ) 32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) 33 */ 34 35/* 36 * supported tools 37 * 38 * Support? Name 39 * N (code in SoC repo) gain control 40 * Y block switching 41 * Y window shapes - standard 42 * N window shapes - Low Delay 43 * Y filterbank - standard 44 * N (code in SoC repo) filterbank - Scalable Sample Rate 45 * Y Temporal Noise Shaping 46 * Y Long Term Prediction 47 * Y intensity stereo 48 * Y channel coupling 49 * Y frequency domain prediction 50 * Y Perceptual Noise Substitution 51 * Y Mid/Side stereo 52 * N Scalable Inverse AAC Quantization 53 * N Frequency Selective Switch 54 * N upsampling filter 55 * Y quantization & coding - AAC 56 * N quantization & coding - TwinVQ 57 * N quantization & coding - BSAC 58 * N AAC Error Resilience tools 59 * N Error Resilience payload syntax 60 * N Error Protection tool 61 * N CELP 62 * N Silence Compression 63 * N HVXC 64 * N HVXC 4kbits/s VR 65 * N Structured Audio tools 66 * N Structured Audio Sample Bank Format 67 * N MIDI 68 * N Harmonic and Individual Lines plus Noise 69 * N Text-To-Speech Interface 70 * Y Spectral Band Replication 71 * Y (not in this code) Layer-1 72 * Y (not in this code) Layer-2 73 * Y (not in this code) Layer-3 74 * N SinuSoidal Coding (Transient, Sinusoid, Noise) 75 * Y Parametric Stereo 76 * N Direct Stream Transfer 77 * Y Enhanced AAC Low Delay (ER AAC ELD) 78 * 79 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication. 80 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and 81 Parametric Stereo. 82 */ 83 84#include "libavutil/float_dsp.h" 85#include "libavutil/opt.h" 86#include "avcodec.h" 87#include "internal.h" 88#include "get_bits.h" 89#include "fft.h" 90#include "fmtconvert.h" 91#include "lpc.h" 92#include "kbdwin.h" 93#include "sinewin.h" 94 95#include "aac.h" 96#include "aactab.h" 97#include "aacdectab.h" 98#include "cbrt_tablegen.h" 99#include "sbr.h" 100#include "aacsbr.h" 101#include "mpeg4audio.h" 102#include "aacadtsdec.h" 103#include "libavutil/intfloat.h" 104 105#include <assert.h> 106#include <errno.h> 107#include <math.h> 108#include <stdint.h> 109#include <string.h> 110 111#if ARCH_ARM 112# include "arm/aac.h" 113#elif ARCH_MIPS 114# include "mips/aacdec_mips.h" 115#endif 116 117static VLC vlc_scalefactors; 118static VLC vlc_spectral[11]; 119 120static int output_configure(AACContext *ac, 121 uint8_t layout_map[MAX_ELEM_ID*4][3], int tags, 122 enum OCStatus oc_type, int get_new_frame); 123 124#define overread_err "Input buffer exhausted before END element found\n" 125 126static int count_channels(uint8_t (*layout)[3], int tags) 127{ 128 int i, sum = 0; 129 for (i = 0; i < tags; i++) { 130 int syn_ele = layout[i][0]; 131 int pos = layout[i][2]; 132 sum += (1 + (syn_ele == TYPE_CPE)) * 133 (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC); 134 } 135 return sum; 136} 137 138/** 139 * Check for the channel element in the current channel position configuration. 140 * If it exists, make sure the appropriate element is allocated and map the 141 * channel order to match the internal FFmpeg channel layout. 142 * 143 * @param che_pos current channel position configuration 144 * @param type channel element type 145 * @param id channel element id 146 * @param channels count of the number of channels in the configuration 147 * 148 * @return Returns error status. 0 - OK, !0 - error 149 */ 150static av_cold int che_configure(AACContext *ac, 151 enum ChannelPosition che_pos, 152 int type, int id, int *channels) 153{ 154 if (*channels >= MAX_CHANNELS) 155 return AVERROR_INVALIDDATA; 156 if (che_pos) { 157 if (!ac->che[type][id]) { 158 if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement)))) 159 return AVERROR(ENOMEM); 160 ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr); 161 } 162 if (type != TYPE_CCE) { 163 if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) { 164 av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n"); 165 return AVERROR_INVALIDDATA; 166 } 167 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0]; 168 if (type == TYPE_CPE || 169 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) { 170 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1]; 171 } 172 } 173 } else { 174 if (ac->che[type][id]) 175 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr); 176 av_freep(&ac->che[type][id]); 177 } 178 return 0; 179} 180 181static int frame_configure_elements(AVCodecContext *avctx) 182{ 183 AACContext *ac = avctx->priv_data; 184 int type, id, ch, ret; 185 186 /* set channel pointers to internal buffers by default */ 187 for (type = 0; type < 4; type++) { 188 for (id = 0; id < MAX_ELEM_ID; id++) { 189 ChannelElement *che = ac->che[type][id]; 190 if (che) { 191 che->ch[0].ret = che->ch[0].ret_buf; 192 che->ch[1].ret = che->ch[1].ret_buf; 193 } 194 } 195 } 196 197 /* get output buffer */ 198 av_frame_unref(ac->frame); 199 if (!avctx->channels) 200 return 1; 201 202 ac->frame->nb_samples = 2048; 203 if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0) 204 return ret; 205 206 /* map output channel pointers to AVFrame data */ 207 for (ch = 0; ch < avctx->channels; ch++) { 208 if (ac->output_element[ch]) 209 ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch]; 210 } 211 212 return 0; 213} 214 215struct elem_to_channel { 216 uint64_t av_position; 217 uint8_t syn_ele; 218 uint8_t elem_id; 219 uint8_t aac_position; 220}; 221 222static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID], 223 uint8_t (*layout_map)[3], int offset, uint64_t left, 224 uint64_t right, int pos) 225{ 226 if (layout_map[offset][0] == TYPE_CPE) { 227 e2c_vec[offset] = (struct elem_to_channel) { 228 .av_position = left | right, 229 .syn_ele = TYPE_CPE, 230 .elem_id = layout_map[offset][1], 231 .aac_position = pos 232 }; 233 return 1; 234 } else { 235 e2c_vec[offset] = (struct elem_to_channel) { 236 .av_position = left, 237 .syn_ele = TYPE_SCE, 238 .elem_id = layout_map[offset][1], 239 .aac_position = pos 240 }; 241 e2c_vec[offset + 1] = (struct elem_to_channel) { 242 .av_position = right, 243 .syn_ele = TYPE_SCE, 244 .elem_id = layout_map[offset + 1][1], 245 .aac_position = pos 246 }; 247 return 2; 248 } 249} 250 251static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, 252 int *current) 253{ 254 int num_pos_channels = 0; 255 int first_cpe = 0; 256 int sce_parity = 0; 257 int i; 258 for (i = *current; i < tags; i++) { 259 if (layout_map[i][2] != pos) 260 break; 261 if (layout_map[i][0] == TYPE_CPE) { 262 if (sce_parity) { 263 if (pos == AAC_CHANNEL_FRONT && !first_cpe) { 264 sce_parity = 0; 265 } else { 266 return -1; 267 } 268 } 269 num_pos_channels += 2; 270 first_cpe = 1; 271 } else { 272 num_pos_channels++; 273 sce_parity ^= 1; 274 } 275 } 276 if (sce_parity && 277 ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE)) 278 return -1; 279 *current = i; 280 return num_pos_channels; 281} 282 283static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags) 284{ 285 int i, n, total_non_cc_elements; 286 struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } }; 287 int num_front_channels, num_side_channels, num_back_channels; 288 uint64_t layout; 289 290 if (FF_ARRAY_ELEMS(e2c_vec) < tags) 291 return 0; 292 293 i = 0; 294 num_front_channels = 295 count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i); 296 if (num_front_channels < 0) 297 return 0; 298 num_side_channels = 299 count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i); 300 if (num_side_channels < 0) 301 return 0; 302 num_back_channels = 303 count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i); 304 if (num_back_channels < 0) 305 return 0; 306 307 i = 0; 308 if (num_front_channels & 1) { 309 e2c_vec[i] = (struct elem_to_channel) { 310 .av_position = AV_CH_FRONT_CENTER, 311 .syn_ele = TYPE_SCE, 312 .elem_id = layout_map[i][1], 313 .aac_position = AAC_CHANNEL_FRONT 314 }; 315 i++; 316 num_front_channels--; 317 } 318 if (num_front_channels >= 4) { 319 i += assign_pair(e2c_vec, layout_map, i, 320 AV_CH_FRONT_LEFT_OF_CENTER, 321 AV_CH_FRONT_RIGHT_OF_CENTER, 322 AAC_CHANNEL_FRONT); 323 num_front_channels -= 2; 324 } 325 if (num_front_channels >= 2) { 326 i += assign_pair(e2c_vec, layout_map, i, 327 AV_CH_FRONT_LEFT, 328 AV_CH_FRONT_RIGHT, 329 AAC_CHANNEL_FRONT); 330 num_front_channels -= 2; 331 } 332 while (num_front_channels >= 2) { 333 i += assign_pair(e2c_vec, layout_map, i, 334 UINT64_MAX, 335 UINT64_MAX, 336 AAC_CHANNEL_FRONT); 337 num_front_channels -= 2; 338 } 339 340 if (num_side_channels >= 2) { 341 i += assign_pair(e2c_vec, layout_map, i, 342 AV_CH_SIDE_LEFT, 343 AV_CH_SIDE_RIGHT, 344 AAC_CHANNEL_FRONT); 345 num_side_channels -= 2; 346 } 347 while (num_side_channels >= 2) { 348 i += assign_pair(e2c_vec, layout_map, i, 349 UINT64_MAX, 350 UINT64_MAX, 351 AAC_CHANNEL_SIDE); 352 num_side_channels -= 2; 353 } 354 355 while (num_back_channels >= 4) { 356 i += assign_pair(e2c_vec, layout_map, i, 357 UINT64_MAX, 358 UINT64_MAX, 359 AAC_CHANNEL_BACK); 360 num_back_channels -= 2; 361 } 362 if (num_back_channels >= 2) { 363 i += assign_pair(e2c_vec, layout_map, i, 364 AV_CH_BACK_LEFT, 365 AV_CH_BACK_RIGHT, 366 AAC_CHANNEL_BACK); 367 num_back_channels -= 2; 368 } 369 if (num_back_channels) { 370 e2c_vec[i] = (struct elem_to_channel) { 371 .av_position = AV_CH_BACK_CENTER, 372 .syn_ele = TYPE_SCE, 373 .elem_id = layout_map[i][1], 374 .aac_position = AAC_CHANNEL_BACK 375 }; 376 i++; 377 num_back_channels--; 378 } 379 380 if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) { 381 e2c_vec[i] = (struct elem_to_channel) { 382 .av_position = AV_CH_LOW_FREQUENCY, 383 .syn_ele = TYPE_LFE, 384 .elem_id = layout_map[i][1], 385 .aac_position = AAC_CHANNEL_LFE 386 }; 387 i++; 388 } 389 while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) { 390 e2c_vec[i] = (struct elem_to_channel) { 391 .av_position = UINT64_MAX, 392 .syn_ele = TYPE_LFE, 393 .elem_id = layout_map[i][1], 394 .aac_position = AAC_CHANNEL_LFE 395 }; 396 i++; 397 } 398 399 // Must choose a stable sort 400 total_non_cc_elements = n = i; 401 do { 402 int next_n = 0; 403 for (i = 1; i < n; i++) 404 if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) { 405 FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]); 406 next_n = i; 407 } 408 n = next_n; 409 } while (n > 0); 410 411 layout = 0; 412 for (i = 0; i < total_non_cc_elements; i++) { 413 layout_map[i][0] = e2c_vec[i].syn_ele; 414 layout_map[i][1] = e2c_vec[i].elem_id; 415 layout_map[i][2] = e2c_vec[i].aac_position; 416 if (e2c_vec[i].av_position != UINT64_MAX) { 417 layout |= e2c_vec[i].av_position; 418 } 419 } 420 421 return layout; 422} 423 424/** 425 * Save current output configuration if and only if it has been locked. 426 */ 427static void push_output_configuration(AACContext *ac) { 428 if (ac->oc[1].status == OC_LOCKED) { 429 ac->oc[0] = ac->oc[1]; 430 } 431 ac->oc[1].status = OC_NONE; 432} 433 434/** 435 * Restore the previous output configuration if and only if the current 436 * configuration is unlocked. 437 */ 438static void pop_output_configuration(AACContext *ac) { 439 if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) { 440 ac->oc[1] = ac->oc[0]; 441 ac->avctx->channels = ac->oc[1].channels; 442 ac->avctx->channel_layout = ac->oc[1].channel_layout; 443 output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, 444 ac->oc[1].status, 0); 445 } 446} 447 448/** 449 * Configure output channel order based on the current program 450 * configuration element. 451 * 452 * @return Returns error status. 0 - OK, !0 - error 453 */ 454static int output_configure(AACContext *ac, 455 uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags, 456 enum OCStatus oc_type, int get_new_frame) 457{ 458 AVCodecContext *avctx = ac->avctx; 459 int i, channels = 0, ret; 460 uint64_t layout = 0; 461 462 if (ac->oc[1].layout_map != layout_map) { 463 memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0])); 464 ac->oc[1].layout_map_tags = tags; 465 } 466 467 // Try to sniff a reasonable channel order, otherwise output the 468 // channels in the order the PCE declared them. 469 if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE) 470 layout = sniff_channel_order(layout_map, tags); 471 for (i = 0; i < tags; i++) { 472 int type = layout_map[i][0]; 473 int id = layout_map[i][1]; 474 int position = layout_map[i][2]; 475 // Allocate or free elements depending on if they are in the 476 // current program configuration. 477 ret = che_configure(ac, position, type, id, &channels); 478 if (ret < 0) 479 return ret; 480 } 481 if (ac->oc[1].m4ac.ps == 1 && channels == 2) { 482 if (layout == AV_CH_FRONT_CENTER) { 483 layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT; 484 } else { 485 layout = 0; 486 } 487 } 488 489 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0])); 490 if (layout) avctx->channel_layout = layout; 491 ac->oc[1].channel_layout = layout; 492 avctx->channels = ac->oc[1].channels = channels; 493 ac->oc[1].status = oc_type; 494 495 if (get_new_frame) { 496 if ((ret = frame_configure_elements(ac->avctx)) < 0) 497 return ret; 498 } 499 500 return 0; 501} 502 503static void flush(AVCodecContext *avctx) 504{ 505 AACContext *ac= avctx->priv_data; 506 int type, i, j; 507 508 for (type = 3; type >= 0; type--) { 509 for (i = 0; i < MAX_ELEM_ID; i++) { 510 ChannelElement *che = ac->che[type][i]; 511 if (che) { 512 for (j = 0; j <= 1; j++) { 513 memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved)); 514 } 515 } 516 } 517 } 518} 519 520/** 521 * Set up channel positions based on a default channel configuration 522 * as specified in table 1.17. 523 * 524 * @return Returns error status. 0 - OK, !0 - error 525 */ 526static int set_default_channel_config(AVCodecContext *avctx, 527 uint8_t (*layout_map)[3], 528 int *tags, 529 int channel_config) 530{ 531 if (channel_config < 1 || channel_config > 7) { 532 av_log(avctx, AV_LOG_ERROR, 533 "invalid default channel configuration (%d)\n", 534 channel_config); 535 return AVERROR_INVALIDDATA; 536 } 537 *tags = tags_per_config[channel_config]; 538 memcpy(layout_map, aac_channel_layout_map[channel_config - 1], 539 *tags * sizeof(*layout_map)); 540 541 /* 542 * AAC specification has 7.1(wide) as a default layout for 8-channel streams. 543 * However, at least Nero AAC encoder encodes 7.1 streams using the default 544 * channel config 7, mapping the side channels of the original audio stream 545 * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD 546 * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding 547 * the incorrect streams as if they were correct (and as the encoder intended). 548 * 549 * As actual intended 7.1(wide) streams are very rare, default to assuming a 550 * 7.1 layout was intended. 551 */ 552 if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { 553 av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout" 554 " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode" 555 " according to the specification instead.\n", FF_COMPLIANCE_STRICT); 556 layout_map[2][2] = AAC_CHANNEL_SIDE; 557 } 558 559 return 0; 560} 561 562static ChannelElement *get_che(AACContext *ac, int type, int elem_id) 563{ 564 /* For PCE based channel configurations map the channels solely based 565 * on tags. */ 566 if (!ac->oc[1].m4ac.chan_config) { 567 return ac->tag_che_map[type][elem_id]; 568 } 569 // Allow single CPE stereo files to be signalled with mono configuration. 570 if (!ac->tags_mapped && type == TYPE_CPE && 571 ac->oc[1].m4ac.chan_config == 1) { 572 uint8_t layout_map[MAX_ELEM_ID*4][3]; 573 int layout_map_tags; 574 push_output_configuration(ac); 575 576 av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n"); 577 578 if (set_default_channel_config(ac->avctx, layout_map, 579 &layout_map_tags, 2) < 0) 580 return NULL; 581 if (output_configure(ac, layout_map, layout_map_tags, 582 OC_TRIAL_FRAME, 1) < 0) 583 return NULL; 584 585 ac->oc[1].m4ac.chan_config = 2; 586 ac->oc[1].m4ac.ps = 0; 587 } 588 // And vice-versa 589 if (!ac->tags_mapped && type == TYPE_SCE && 590 ac->oc[1].m4ac.chan_config == 2) { 591 uint8_t layout_map[MAX_ELEM_ID * 4][3]; 592 int layout_map_tags; 593 push_output_configuration(ac); 594 595 av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n"); 596 597 if (set_default_channel_config(ac->avctx, layout_map, 598 &layout_map_tags, 1) < 0) 599 return NULL; 600 if (output_configure(ac, layout_map, layout_map_tags, 601 OC_TRIAL_FRAME, 1) < 0) 602 return NULL; 603 604 ac->oc[1].m4ac.chan_config = 1; 605 if (ac->oc[1].m4ac.sbr) 606 ac->oc[1].m4ac.ps = -1; 607 } 608 /* For indexed channel configurations map the channels solely based 609 * on position. */ 610 switch (ac->oc[1].m4ac.chan_config) { 611 case 7: 612 if (ac->tags_mapped == 3 && type == TYPE_CPE) { 613 ac->tags_mapped++; 614 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2]; 615 } 616 case 6: 617 /* Some streams incorrectly code 5.1 audio as 618 * SCE[0] CPE[0] CPE[1] SCE[1] 619 * instead of 620 * SCE[0] CPE[0] CPE[1] LFE[0]. 621 * If we seem to have encountered such a stream, transfer 622 * the LFE[0] element to the SCE[1]'s mapping */ 623 if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { 624 ac->tags_mapped++; 625 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0]; 626 } 627 case 5: 628 if (ac->tags_mapped == 2 && type == TYPE_CPE) { 629 ac->tags_mapped++; 630 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1]; 631 } 632 case 4: 633 if (ac->tags_mapped == 2 && 634 ac->oc[1].m4ac.chan_config == 4 && 635 type == TYPE_SCE) { 636 ac->tags_mapped++; 637 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; 638 } 639 case 3: 640 case 2: 641 if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && 642 type == TYPE_CPE) { 643 ac->tags_mapped++; 644 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0]; 645 } else if (ac->oc[1].m4ac.chan_config == 2) { 646 return NULL; 647 } 648 case 1: 649 if (!ac->tags_mapped && type == TYPE_SCE) { 650 ac->tags_mapped++; 651 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0]; 652 } 653 default: 654 return NULL; 655 } 656} 657 658/** 659 * Decode an array of 4 bit element IDs, optionally interleaved with a 660 * stereo/mono switching bit. 661 * 662 * @param type speaker type/position for these channels 663 */ 664static void decode_channel_map(uint8_t layout_map[][3], 665 enum ChannelPosition type, 666 GetBitContext *gb, int n) 667{ 668 while (n--) { 669 enum RawDataBlockType syn_ele; 670 switch (type) { 671 case AAC_CHANNEL_FRONT: 672 case AAC_CHANNEL_BACK: 673 case AAC_CHANNEL_SIDE: 674 syn_ele = get_bits1(gb); 675 break; 676 case AAC_CHANNEL_CC: 677 skip_bits1(gb); 678 syn_ele = TYPE_CCE; 679 break; 680 case AAC_CHANNEL_LFE: 681 syn_ele = TYPE_LFE; 682 break; 683 default: 684 av_assert0(0); 685 } 686 layout_map[0][0] = syn_ele; 687 layout_map[0][1] = get_bits(gb, 4); 688 layout_map[0][2] = type; 689 layout_map++; 690 } 691} 692 693/** 694 * Decode program configuration element; reference: table 4.2. 695 * 696 * @return Returns error status. 0 - OK, !0 - error 697 */ 698static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac, 699 uint8_t (*layout_map)[3], 700 GetBitContext *gb) 701{ 702 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc; 703 int sampling_index; 704 int comment_len; 705 int tags; 706 707 skip_bits(gb, 2); // object_type 708 709 sampling_index = get_bits(gb, 4); 710 if (m4ac->sampling_index != sampling_index) 711 av_log(avctx, AV_LOG_WARNING, 712 "Sample rate index in program config element does not " 713 "match the sample rate index configured by the container.\n"); 714 715 num_front = get_bits(gb, 4); 716 num_side = get_bits(gb, 4); 717 num_back = get_bits(gb, 4); 718 num_lfe = get_bits(gb, 2); 719 num_assoc_data = get_bits(gb, 3); 720 num_cc = get_bits(gb, 4); 721 722 if (get_bits1(gb)) 723 skip_bits(gb, 4); // mono_mixdown_tag 724 if (get_bits1(gb)) 725 skip_bits(gb, 4); // stereo_mixdown_tag 726 727 if (get_bits1(gb)) 728 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround 729 730 if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) { 731 av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); 732 return -1; 733 } 734 decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front); 735 tags = num_front; 736 decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side); 737 tags += num_side; 738 decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back); 739 tags += num_back; 740 decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe); 741 tags += num_lfe; 742 743 skip_bits_long(gb, 4 * num_assoc_data); 744 745 decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc); 746 tags += num_cc; 747 748 align_get_bits(gb); 749 750 /* comment field, first byte is length */ 751 comment_len = get_bits(gb, 8) * 8; 752 if (get_bits_left(gb) < comment_len) { 753 av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); 754 return AVERROR_INVALIDDATA; 755 } 756 skip_bits_long(gb, comment_len); 757 return tags; 758} 759 760/** 761 * Decode GA "General Audio" specific configuration; reference: table 4.1. 762 * 763 * @param ac pointer to AACContext, may be null 764 * @param avctx pointer to AVCCodecContext, used for logging 765 * 766 * @return Returns error status. 0 - OK, !0 - error 767 */ 768static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx, 769 GetBitContext *gb, 770 MPEG4AudioConfig *m4ac, 771 int channel_config) 772{ 773 int extension_flag, ret, ep_config, res_flags; 774 uint8_t layout_map[MAX_ELEM_ID*4][3]; 775 int tags = 0; 776 777 if (get_bits1(gb)) { // frameLengthFlag 778 avpriv_request_sample(avctx, "960/120 MDCT window"); 779 return AVERROR_PATCHWELCOME; 780 } 781 782 if (get_bits1(gb)) // dependsOnCoreCoder 783 skip_bits(gb, 14); // coreCoderDelay 784 extension_flag = get_bits1(gb); 785 786 if (m4ac->object_type == AOT_AAC_SCALABLE || 787 m4ac->object_type == AOT_ER_AAC_SCALABLE) 788 skip_bits(gb, 3); // layerNr 789 790 if (channel_config == 0) { 791 skip_bits(gb, 4); // element_instance_tag 792 tags = decode_pce(avctx, m4ac, layout_map, gb); 793 if (tags < 0) 794 return tags; 795 } else { 796 if ((ret = set_default_channel_config(avctx, layout_map, 797 &tags, channel_config))) 798 return ret; 799 } 800 801 if (count_channels(layout_map, tags) > 1) { 802 m4ac->ps = 0; 803 } else if (m4ac->sbr == 1 && m4ac->ps == -1) 804 m4ac->ps = 1; 805 806 if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) 807 return ret; 808 809 if (extension_flag) { 810 switch (m4ac->object_type) { 811 case AOT_ER_BSAC: 812 skip_bits(gb, 5); // numOfSubFrame 813 skip_bits(gb, 11); // layer_length 814 break; 815 case AOT_ER_AAC_LC: 816 case AOT_ER_AAC_LTP: 817 case AOT_ER_AAC_SCALABLE: 818 case AOT_ER_AAC_LD: 819 res_flags = get_bits(gb, 3); 820 if (res_flags) { 821 avpriv_report_missing_feature(avctx, 822 "AAC data resilience (flags %x)", 823 res_flags); 824 return AVERROR_PATCHWELCOME; 825 } 826 break; 827 } 828 skip_bits1(gb); // extensionFlag3 (TBD in version 3) 829 } 830 switch (m4ac->object_type) { 831 case AOT_ER_AAC_LC: 832 case AOT_ER_AAC_LTP: 833 case AOT_ER_AAC_SCALABLE: 834 case AOT_ER_AAC_LD: 835 ep_config = get_bits(gb, 2); 836 if (ep_config) { 837 avpriv_report_missing_feature(avctx, 838 "epConfig %d", ep_config); 839 return AVERROR_PATCHWELCOME; 840 } 841 } 842 return 0; 843} 844 845static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx, 846 GetBitContext *gb, 847 MPEG4AudioConfig *m4ac, 848 int channel_config) 849{ 850 int ret, ep_config, res_flags; 851 uint8_t layout_map[MAX_ELEM_ID*4][3]; 852 int tags = 0; 853 const int ELDEXT_TERM = 0; 854 855 m4ac->ps = 0; 856 m4ac->sbr = 0; 857 858 if (get_bits1(gb)) { // frameLengthFlag 859 avpriv_request_sample(avctx, "960/120 MDCT window"); 860 return AVERROR_PATCHWELCOME; 861 } 862 863 res_flags = get_bits(gb, 3); 864 if (res_flags) { 865 avpriv_report_missing_feature(avctx, 866 "AAC data resilience (flags %x)", 867 res_flags); 868 return AVERROR_PATCHWELCOME; 869 } 870 871 if (get_bits1(gb)) { // ldSbrPresentFlag 872 avpriv_report_missing_feature(avctx, 873 "Low Delay SBR"); 874 return AVERROR_PATCHWELCOME; 875 } 876 877 while (get_bits(gb, 4) != ELDEXT_TERM) { 878 int len = get_bits(gb, 4); 879 if (len == 15) 880 len += get_bits(gb, 8); 881 if (len == 15 + 255) 882 len += get_bits(gb, 16); 883 if (get_bits_left(gb) < len * 8 + 4) { 884 av_log(ac->avctx, AV_LOG_ERROR, overread_err); 885 return AVERROR_INVALIDDATA; 886 } 887 skip_bits_long(gb, 8 * len); 888 } 889 890 if ((ret = set_default_channel_config(avctx, layout_map, 891 &tags, channel_config))) 892 return ret; 893 894 if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) 895 return ret; 896 897 ep_config = get_bits(gb, 2); 898 if (ep_config) { 899 avpriv_report_missing_feature(avctx, 900 "epConfig %d", ep_config); 901 return AVERROR_PATCHWELCOME; 902 } 903 return 0; 904} 905 906/** 907 * Decode audio specific configuration; reference: table 1.13. 908 * 909 * @param ac pointer to AACContext, may be null 910 * @param avctx pointer to AVCCodecContext, used for logging 911 * @param m4ac pointer to MPEG4AudioConfig, used for parsing 912 * @param data pointer to buffer holding an audio specific config 913 * @param bit_size size of audio specific config or data in bits 914 * @param sync_extension look for an appended sync extension 915 * 916 * @return Returns error status or number of consumed bits. <0 - error 917 */ 918static int decode_audio_specific_config(AACContext *ac, 919 AVCodecContext *avctx, 920 MPEG4AudioConfig *m4ac, 921 const uint8_t *data, int bit_size, 922 int sync_extension) 923{ 924 GetBitContext gb; 925 int i, ret; 926 927 av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3); 928 for (i = 0; i < bit_size >> 3; i++) 929 av_dlog(avctx, "%02x ", data[i]); 930 av_dlog(avctx, "\n"); 931 932 if ((ret = init_get_bits(&gb, data, bit_size)) < 0) 933 return ret; 934 935 if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, 936 sync_extension)) < 0) 937 return AVERROR_INVALIDDATA; 938 if (m4ac->sampling_index > 12) { 939 av_log(avctx, AV_LOG_ERROR, 940 "invalid sampling rate index %d\n", 941 m4ac->sampling_index); 942 return AVERROR_INVALIDDATA; 943 } 944 if (m4ac->object_type == AOT_ER_AAC_LD && 945 (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) { 946 av_log(avctx, AV_LOG_ERROR, 947 "invalid low delay sampling rate index %d\n", 948 m4ac->sampling_index); 949 return AVERROR_INVALIDDATA; 950 } 951 952 skip_bits_long(&gb, i); 953 954 switch (m4ac->object_type) { 955 case AOT_AAC_MAIN: 956 case AOT_AAC_LC: 957 case AOT_AAC_LTP: 958 case AOT_ER_AAC_LC: 959 case AOT_ER_AAC_LD: 960 if ((ret = decode_ga_specific_config(ac, avctx, &gb, 961 m4ac, m4ac->chan_config)) < 0) 962 return ret; 963 break; 964 case AOT_ER_AAC_ELD: 965 if ((ret = decode_eld_specific_config(ac, avctx, &gb, 966 m4ac, m4ac->chan_config)) < 0) 967 return ret; 968 break; 969 default: 970 avpriv_report_missing_feature(avctx, 971 "Audio object type %s%d", 972 m4ac->sbr == 1 ? "SBR+" : "", 973 m4ac->object_type); 974 return AVERROR(ENOSYS); 975 } 976 977 av_dlog(avctx, 978 "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n", 979 m4ac->object_type, m4ac->chan_config, m4ac->sampling_index, 980 m4ac->sample_rate, m4ac->sbr, 981 m4ac->ps); 982 983 return get_bits_count(&gb); 984} 985 986/** 987 * linear congruential pseudorandom number generator 988 * 989 * @param previous_val pointer to the current state of the generator 990 * 991 * @return Returns a 32-bit pseudorandom integer 992 */ 993static av_always_inline int lcg_random(unsigned previous_val) 994{ 995 union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 }; 996 return v.s; 997} 998 999static av_always_inline void reset_predict_state(PredictorState *ps) 1000{ 1001 ps->r0 = 0.0f; 1002 ps->r1 = 0.0f; 1003 ps->cor0 = 0.0f; 1004 ps->cor1 = 0.0f; 1005 ps->var0 = 1.0f; 1006 ps->var1 = 1.0f; 1007} 1008 1009static void reset_all_predictors(PredictorState *ps) 1010{ 1011 int i; 1012 for (i = 0; i < MAX_PREDICTORS; i++) 1013 reset_predict_state(&ps[i]); 1014} 1015 1016static int sample_rate_idx (int rate) 1017{ 1018 if (92017 <= rate) return 0; 1019 else if (75132 <= rate) return 1; 1020 else if (55426 <= rate) return 2; 1021 else if (46009 <= rate) return 3; 1022 else if (37566 <= rate) return 4; 1023 else if (27713 <= rate) return 5; 1024 else if (23004 <= rate) return 6; 1025 else if (18783 <= rate) return 7; 1026 else if (13856 <= rate) return 8; 1027 else if (11502 <= rate) return 9; 1028 else if (9391 <= rate) return 10; 1029 else return 11; 1030} 1031 1032static void reset_predictor_group(PredictorState *ps, int group_num) 1033{ 1034 int i; 1035 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) 1036 reset_predict_state(&ps[i]); 1037} 1038 1039#define AAC_INIT_VLC_STATIC(num, size) \ 1040 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \ 1041 ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]), \ 1042 sizeof(ff_aac_spectral_bits[num][0]), \ 1043 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \ 1044 sizeof(ff_aac_spectral_codes[num][0]), \ 1045 size); 1046 1047static void aacdec_init(AACContext *ac); 1048 1049static av_cold int aac_decode_init(AVCodecContext *avctx) 1050{ 1051 AACContext *ac = avctx->priv_data; 1052 int ret; 1053 1054 ac->avctx = avctx; 1055 ac->oc[1].m4ac.sample_rate = avctx->sample_rate; 1056 1057 aacdec_init(ac); 1058 1059 avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; 1060 1061 if (avctx->extradata_size > 0) { 1062 if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac, 1063 avctx->extradata, 1064 avctx->extradata_size * 8, 1065 1)) < 0) 1066 return ret; 1067 } else { 1068 int sr, i; 1069 uint8_t layout_map[MAX_ELEM_ID*4][3]; 1070 int layout_map_tags; 1071 1072 sr = sample_rate_idx(avctx->sample_rate); 1073 ac->oc[1].m4ac.sampling_index = sr; 1074 ac->oc[1].m4ac.channels = avctx->channels; 1075 ac->oc[1].m4ac.sbr = -1; 1076 ac->oc[1].m4ac.ps = -1; 1077 1078 for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++) 1079 if (ff_mpeg4audio_channels[i] == avctx->channels) 1080 break; 1081 if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) { 1082 i = 0; 1083 } 1084 ac->oc[1].m4ac.chan_config = i; 1085 1086 if (ac->oc[1].m4ac.chan_config) { 1087 int ret = set_default_channel_config(avctx, layout_map, 1088 &layout_map_tags, ac->oc[1].m4ac.chan_config); 1089 if (!ret) 1090 output_configure(ac, layout_map, layout_map_tags, 1091 OC_GLOBAL_HDR, 0); 1092 else if (avctx->err_recognition & AV_EF_EXPLODE) 1093 return AVERROR_INVALIDDATA; 1094 } 1095 } 1096 1097 if (avctx->channels > MAX_CHANNELS) { 1098 av_log(avctx, AV_LOG_ERROR, "Too many channels\n"); 1099 return AVERROR_INVALIDDATA; 1100 } 1101 1102 AAC_INIT_VLC_STATIC( 0, 304); 1103 AAC_INIT_VLC_STATIC( 1, 270); 1104 AAC_INIT_VLC_STATIC( 2, 550); 1105 AAC_INIT_VLC_STATIC( 3, 300); 1106 AAC_INIT_VLC_STATIC( 4, 328); 1107 AAC_INIT_VLC_STATIC( 5, 294); 1108 AAC_INIT_VLC_STATIC( 6, 306); 1109 AAC_INIT_VLC_STATIC( 7, 268); 1110 AAC_INIT_VLC_STATIC( 8, 510); 1111 AAC_INIT_VLC_STATIC( 9, 366); 1112 AAC_INIT_VLC_STATIC(10, 462); 1113 1114 ff_aac_sbr_init(); 1115 1116 ff_fmt_convert_init(&ac->fmt_conv, avctx); 1117 avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); 1118 1119 ac->random_state = 0x1f2e3d4c; 1120 1121 ff_aac_tableinit(); 1122 1123 INIT_VLC_STATIC(&vlc_scalefactors, 7, 1124 FF_ARRAY_ELEMS(ff_aac_scalefactor_code), 1125 ff_aac_scalefactor_bits, 1126 sizeof(ff_aac_scalefactor_bits[0]), 1127 sizeof(ff_aac_scalefactor_bits[0]), 1128 ff_aac_scalefactor_code, 1129 sizeof(ff_aac_scalefactor_code[0]), 1130 sizeof(ff_aac_scalefactor_code[0]), 1131 352); 1132 1133 ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0)); 1134 ff_mdct_init(&ac->mdct_ld, 10, 1, 1.0 / (32768.0 * 512.0)); 1135 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0)); 1136 ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0); 1137 // window initialization 1138 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); 1139 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); 1140 ff_init_ff_sine_windows(10); 1141 ff_init_ff_sine_windows( 9); 1142 ff_init_ff_sine_windows( 7); 1143 1144 cbrt_tableinit(); 1145 1146 return 0; 1147} 1148 1149/** 1150 * Skip data_stream_element; reference: table 4.10. 1151 */ 1152static int skip_data_stream_element(AACContext *ac, GetBitContext *gb) 1153{ 1154 int byte_align = get_bits1(gb); 1155 int count = get_bits(gb, 8); 1156 if (count == 255) 1157 count += get_bits(gb, 8); 1158 if (byte_align) 1159 align_get_bits(gb); 1160 1161 if (get_bits_left(gb) < 8 * count) { 1162 av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err); 1163 return AVERROR_INVALIDDATA; 1164 } 1165 skip_bits_long(gb, 8 * count); 1166 return 0; 1167} 1168 1169static int decode_prediction(AACContext *ac, IndividualChannelStream *ics, 1170 GetBitContext *gb) 1171{ 1172 int sfb; 1173 if (get_bits1(gb)) { 1174 ics->predictor_reset_group = get_bits(gb, 5); 1175 if (ics->predictor_reset_group == 0 || 1176 ics->predictor_reset_group > 30) { 1177 av_log(ac->avctx, AV_LOG_ERROR, 1178 "Invalid Predictor Reset Group.\n"); 1179 return AVERROR_INVALIDDATA; 1180 } 1181 } 1182 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) { 1183 ics->prediction_used[sfb] = get_bits1(gb); 1184 } 1185 return 0; 1186} 1187 1188/** 1189 * Decode Long Term Prediction data; reference: table 4.xx. 1190 */ 1191static void decode_ltp(LongTermPrediction *ltp, 1192 GetBitContext *gb, uint8_t max_sfb) 1193{ 1194 int sfb; 1195 1196 ltp->lag = get_bits(gb, 11); 1197 ltp->coef = ltp_coef[get_bits(gb, 3)]; 1198 for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++) 1199 ltp->used[sfb] = get_bits1(gb); 1200} 1201 1202/** 1203 * Decode Individual Channel Stream info; reference: table 4.6. 1204 */ 1205static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, 1206 GetBitContext *gb) 1207{ 1208 int aot = ac->oc[1].m4ac.object_type; 1209 if (aot != AOT_ER_AAC_ELD) { 1210 if (get_bits1(gb)) { 1211 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n"); 1212 return AVERROR_INVALIDDATA; 1213 } 1214 ics->window_sequence[1] = ics->window_sequence[0]; 1215 ics->window_sequence[0] = get_bits(gb, 2); 1216 if (aot == AOT_ER_AAC_LD && 1217 ics->window_sequence[0] != ONLY_LONG_SEQUENCE) { 1218 av_log(ac->avctx, AV_LOG_ERROR, 1219 "AAC LD is only defined for ONLY_LONG_SEQUENCE but " 1220 "window sequence %d found.\n", ics->window_sequence[0]); 1221 ics->window_sequence[0] = ONLY_LONG_SEQUENCE; 1222 return AVERROR_INVALIDDATA; 1223 } 1224 ics->use_kb_window[1] = ics->use_kb_window[0]; 1225 ics->use_kb_window[0] = get_bits1(gb); 1226 } 1227 ics->num_window_groups = 1; 1228 ics->group_len[0] = 1; 1229 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1230 int i; 1231 ics->max_sfb = get_bits(gb, 4); 1232 for (i = 0; i < 7; i++) { 1233 if (get_bits1(gb)) { 1234 ics->group_len[ics->num_window_groups - 1]++; 1235 } else { 1236 ics->num_window_groups++; 1237 ics->group_len[ics->num_window_groups - 1] = 1; 1238 } 1239 } 1240 ics->num_windows = 8; 1241 ics->swb_offset = ff_swb_offset_128[ac->oc[1].m4ac.sampling_index]; 1242 ics->num_swb = ff_aac_num_swb_128[ac->oc[1].m4ac.sampling_index]; 1243 ics->tns_max_bands = ff_tns_max_bands_128[ac->oc[1].m4ac.sampling_index]; 1244 ics->predictor_present = 0; 1245 } else { 1246 ics->max_sfb = get_bits(gb, 6); 1247 ics->num_windows = 1; 1248 if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) { 1249 ics->swb_offset = ff_swb_offset_512[ac->oc[1].m4ac.sampling_index]; 1250 ics->num_swb = ff_aac_num_swb_512[ac->oc[1].m4ac.sampling_index]; 1251 ics->tns_max_bands = ff_tns_max_bands_512[ac->oc[1].m4ac.sampling_index]; 1252 if (!ics->num_swb || !ics->swb_offset) 1253 return AVERROR_BUG; 1254 } else { 1255 ics->swb_offset = ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index]; 1256 ics->num_swb = ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index]; 1257 ics->tns_max_bands = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index]; 1258 } 1259 if (aot != AOT_ER_AAC_ELD) { 1260 ics->predictor_present = get_bits1(gb); 1261 ics->predictor_reset_group = 0; 1262 } 1263 if (ics->predictor_present) { 1264 if (aot == AOT_AAC_MAIN) { 1265 if (decode_prediction(ac, ics, gb)) { 1266 goto fail; 1267 } 1268 } else if (aot == AOT_AAC_LC || 1269 aot == AOT_ER_AAC_LC) { 1270 av_log(ac->avctx, AV_LOG_ERROR, 1271 "Prediction is not allowed in AAC-LC.\n"); 1272 goto fail; 1273 } else { 1274 if (aot == AOT_ER_AAC_LD) { 1275 av_log(ac->avctx, AV_LOG_ERROR, 1276 "LTP in ER AAC LD not yet implemented.\n"); 1277 return AVERROR_PATCHWELCOME; 1278 } 1279 if ((ics->ltp.present = get_bits(gb, 1))) 1280 decode_ltp(&ics->ltp, gb, ics->max_sfb); 1281 } 1282 } 1283 } 1284 1285 if (ics->max_sfb > ics->num_swb) { 1286 av_log(ac->avctx, AV_LOG_ERROR, 1287 "Number of scalefactor bands in group (%d) " 1288 "exceeds limit (%d).\n", 1289 ics->max_sfb, ics->num_swb); 1290 goto fail; 1291 } 1292 1293 return 0; 1294fail: 1295 ics->max_sfb = 0; 1296 return AVERROR_INVALIDDATA; 1297} 1298 1299/** 1300 * Decode band types (section_data payload); reference: table 4.46. 1301 * 1302 * @param band_type array of the used band type 1303 * @param band_type_run_end array of the last scalefactor band of a band type run 1304 * 1305 * @return Returns error status. 0 - OK, !0 - error 1306 */ 1307static int decode_band_types(AACContext *ac, enum BandType band_type[120], 1308 int band_type_run_end[120], GetBitContext *gb, 1309 IndividualChannelStream *ics) 1310{ 1311 int g, idx = 0; 1312 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5; 1313 for (g = 0; g < ics->num_window_groups; g++) { 1314 int k = 0; 1315 while (k < ics->max_sfb) { 1316 uint8_t sect_end = k; 1317 int sect_len_incr; 1318 int sect_band_type = get_bits(gb, 4); 1319 if (sect_band_type == 12) { 1320 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n"); 1321 return AVERROR_INVALIDDATA; 1322 } 1323 do { 1324 sect_len_incr = get_bits(gb, bits); 1325 sect_end += sect_len_incr; 1326 if (get_bits_left(gb) < 0) { 1327 av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err); 1328 return AVERROR_INVALIDDATA; 1329 } 1330 if (sect_end > ics->max_sfb) { 1331 av_log(ac->avctx, AV_LOG_ERROR, 1332 "Number of bands (%d) exceeds limit (%d).\n", 1333 sect_end, ics->max_sfb); 1334 return AVERROR_INVALIDDATA; 1335 } 1336 } while (sect_len_incr == (1 << bits) - 1); 1337 for (; k < sect_end; k++) { 1338 band_type [idx] = sect_band_type; 1339 band_type_run_end[idx++] = sect_end; 1340 } 1341 } 1342 } 1343 return 0; 1344} 1345 1346/** 1347 * Decode scalefactors; reference: table 4.47. 1348 * 1349 * @param global_gain first scalefactor value as scalefactors are differentially coded 1350 * @param band_type array of the used band type 1351 * @param band_type_run_end array of the last scalefactor band of a band type run 1352 * @param sf array of scalefactors or intensity stereo positions 1353 * 1354 * @return Returns error status. 0 - OK, !0 - error 1355 */ 1356static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, 1357 unsigned int global_gain, 1358 IndividualChannelStream *ics, 1359 enum BandType band_type[120], 1360 int band_type_run_end[120]) 1361{ 1362 int g, i, idx = 0; 1363 int offset[3] = { global_gain, global_gain - 90, 0 }; 1364 int clipped_offset; 1365 int noise_flag = 1; 1366 for (g = 0; g < ics->num_window_groups; g++) { 1367 for (i = 0; i < ics->max_sfb;) { 1368 int run_end = band_type_run_end[idx]; 1369 if (band_type[idx] == ZERO_BT) { 1370 for (; i < run_end; i++, idx++) 1371 sf[idx] = 0.0; 1372 } else if ((band_type[idx] == INTENSITY_BT) || 1373 (band_type[idx] == INTENSITY_BT2)) { 1374 for (; i < run_end; i++, idx++) { 1375 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 1376 clipped_offset = av_clip(offset[2], -155, 100); 1377 if (offset[2] != clipped_offset) { 1378 avpriv_request_sample(ac->avctx, 1379 "If you heard an audible artifact, there may be a bug in the decoder. " 1380 "Clipped intensity stereo position (%d -> %d)", 1381 offset[2], clipped_offset); 1382 } 1383 sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO]; 1384 } 1385 } else if (band_type[idx] == NOISE_BT) { 1386 for (; i < run_end; i++, idx++) { 1387 if (noise_flag-- > 0) 1388 offset[1] += get_bits(gb, 9) - 256; 1389 else 1390 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 1391 clipped_offset = av_clip(offset[1], -100, 155); 1392 if (offset[1] != clipped_offset) { 1393 avpriv_request_sample(ac->avctx, 1394 "If you heard an audible artifact, there may be a bug in the decoder. " 1395 "Clipped noise gain (%d -> %d)", 1396 offset[1], clipped_offset); 1397 } 1398 sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO]; 1399 } 1400 } else { 1401 for (; i < run_end; i++, idx++) { 1402 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 1403 if (offset[0] > 255U) { 1404 av_log(ac->avctx, AV_LOG_ERROR, 1405 "Scalefactor (%d) out of range.\n", offset[0]); 1406 return AVERROR_INVALIDDATA; 1407 } 1408 sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO]; 1409 } 1410 } 1411 } 1412 } 1413 return 0; 1414} 1415 1416/** 1417 * Decode pulse data; reference: table 4.7. 1418 */ 1419static int decode_pulses(Pulse *pulse, GetBitContext *gb, 1420 const uint16_t *swb_offset, int num_swb) 1421{ 1422 int i, pulse_swb; 1423 pulse->num_pulse = get_bits(gb, 2) + 1; 1424 pulse_swb = get_bits(gb, 6); 1425 if (pulse_swb >= num_swb) 1426 return -1; 1427 pulse->pos[0] = swb_offset[pulse_swb]; 1428 pulse->pos[0] += get_bits(gb, 5); 1429 if (pulse->pos[0] >= swb_offset[num_swb]) 1430 return -1; 1431 pulse->amp[0] = get_bits(gb, 4); 1432 for (i = 1; i < pulse->num_pulse; i++) { 1433 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1]; 1434 if (pulse->pos[i] >= swb_offset[num_swb]) 1435 return -1; 1436 pulse->amp[i] = get_bits(gb, 4); 1437 } 1438 return 0; 1439} 1440 1441/** 1442 * Decode Temporal Noise Shaping data; reference: table 4.48. 1443 * 1444 * @return Returns error status. 0 - OK, !0 - error 1445 */ 1446static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns, 1447 GetBitContext *gb, const IndividualChannelStream *ics) 1448{ 1449 int w, filt, i, coef_len, coef_res, coef_compress; 1450 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE; 1451 const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12; 1452 for (w = 0; w < ics->num_windows; w++) { 1453 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) { 1454 coef_res = get_bits1(gb); 1455 1456 for (filt = 0; filt < tns->n_filt[w]; filt++) { 1457 int tmp2_idx; 1458 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8); 1459 1460 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) { 1461 av_log(ac->avctx, AV_LOG_ERROR, 1462 "TNS filter order %d is greater than maximum %d.\n", 1463 tns->order[w][filt], tns_max_order); 1464 tns->order[w][filt] = 0; 1465 return AVERROR_INVALIDDATA; 1466 } 1467 if (tns->order[w][filt]) { 1468 tns->direction[w][filt] = get_bits1(gb); 1469 coef_compress = get_bits1(gb); 1470 coef_len = coef_res + 3 - coef_compress; 1471 tmp2_idx = 2 * coef_compress + coef_res; 1472 1473 for (i = 0; i < tns->order[w][filt]; i++) 1474 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]; 1475 } 1476 } 1477 } 1478 } 1479 return 0; 1480} 1481 1482/** 1483 * Decode Mid/Side data; reference: table 4.54. 1484 * 1485 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 1486 * [1] mask is decoded from bitstream; [2] mask is all 1s; 1487 * [3] reserved for scalable AAC 1488 */ 1489static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb, 1490 int ms_present) 1491{ 1492 int idx; 1493 if (ms_present == 1) { 1494 for (idx = 0; 1495 idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; 1496 idx++) 1497 cpe->ms_mask[idx] = get_bits1(gb); 1498 } else if (ms_present == 2) { 1499 memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb); 1500 } 1501} 1502 1503#ifndef VMUL2 1504static inline float *VMUL2(float *dst, const float *v, unsigned idx, 1505 const float *scale) 1506{ 1507 float s = *scale; 1508 *dst++ = v[idx & 15] * s; 1509 *dst++ = v[idx>>4 & 15] * s; 1510 return dst; 1511} 1512#endif 1513 1514#ifndef VMUL4 1515static inline float *VMUL4(float *dst, const float *v, unsigned idx, 1516 const float *scale) 1517{ 1518 float s = *scale; 1519 *dst++ = v[idx & 3] * s; 1520 *dst++ = v[idx>>2 & 3] * s; 1521 *dst++ = v[idx>>4 & 3] * s; 1522 *dst++ = v[idx>>6 & 3] * s; 1523 return dst; 1524} 1525#endif 1526 1527#ifndef VMUL2S 1528static inline float *VMUL2S(float *dst, const float *v, unsigned idx, 1529 unsigned sign, const float *scale) 1530{ 1531 union av_intfloat32 s0, s1; 1532 1533 s0.f = s1.f = *scale; 1534 s0.i ^= sign >> 1 << 31; 1535 s1.i ^= sign << 31; 1536 1537 *dst++ = v[idx & 15] * s0.f; 1538 *dst++ = v[idx>>4 & 15] * s1.f; 1539 1540 return dst; 1541} 1542#endif 1543 1544#ifndef VMUL4S 1545static inline float *VMUL4S(float *dst, const float *v, unsigned idx, 1546 unsigned sign, const float *scale) 1547{ 1548 unsigned nz = idx >> 12; 1549 union av_intfloat32 s = { .f = *scale }; 1550 union av_intfloat32 t; 1551 1552 t.i = s.i ^ (sign & 1U<<31); 1553 *dst++ = v[idx & 3] * t.f; 1554 1555 sign <<= nz & 1; nz >>= 1; 1556 t.i = s.i ^ (sign & 1U<<31); 1557 *dst++ = v[idx>>2 & 3] * t.f; 1558 1559 sign <<= nz & 1; nz >>= 1; 1560 t.i = s.i ^ (sign & 1U<<31); 1561 *dst++ = v[idx>>4 & 3] * t.f; 1562 1563 sign <<= nz & 1; 1564 t.i = s.i ^ (sign & 1U<<31); 1565 *dst++ = v[idx>>6 & 3] * t.f; 1566 1567 return dst; 1568} 1569#endif 1570 1571/** 1572 * Decode spectral data; reference: table 4.50. 1573 * Dequantize and scale spectral data; reference: 4.6.3.3. 1574 * 1575 * @param coef array of dequantized, scaled spectral data 1576 * @param sf array of scalefactors or intensity stereo positions 1577 * @param pulse_present set if pulses are present 1578 * @param pulse pointer to pulse data struct 1579 * @param band_type array of the used band type 1580 * 1581 * @return Returns error status. 0 - OK, !0 - error 1582 */ 1583static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], 1584 GetBitContext *gb, const float sf[120], 1585 int pulse_present, const Pulse *pulse, 1586 const IndividualChannelStream *ics, 1587 enum BandType band_type[120]) 1588{ 1589 int i, k, g, idx = 0; 1590 const int c = 1024 / ics->num_windows; 1591 const uint16_t *offsets = ics->swb_offset; 1592 float *coef_base = coef; 1593 1594 for (g = 0; g < ics->num_windows; g++) 1595 memset(coef + g * 128 + offsets[ics->max_sfb], 0, 1596 sizeof(float) * (c - offsets[ics->max_sfb])); 1597 1598 for (g = 0; g < ics->num_window_groups; g++) { 1599 unsigned g_len = ics->group_len[g]; 1600 1601 for (i = 0; i < ics->max_sfb; i++, idx++) { 1602 const unsigned cbt_m1 = band_type[idx] - 1; 1603 float *cfo = coef + offsets[i]; 1604 int off_len = offsets[i + 1] - offsets[i]; 1605 int group; 1606 1607 if (cbt_m1 >= INTENSITY_BT2 - 1) { 1608 for (group = 0; group < g_len; group++, cfo+=128) { 1609 memset(cfo, 0, off_len * sizeof(float)); 1610 } 1611 } else if (cbt_m1 == NOISE_BT - 1) { 1612 for (group = 0; group < g_len; group++, cfo+=128) { 1613 float scale; 1614 float band_energy; 1615 1616 for (k = 0; k < off_len; k++) { 1617 ac->random_state = lcg_random(ac->random_state); 1618 cfo[k] = ac->random_state; 1619 } 1620 1621 band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); 1622 scale = sf[idx] / sqrtf(band_energy); 1623 ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); 1624 } 1625 } else { 1626 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; 1627 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; 1628 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; 1629 OPEN_READER(re, gb); 1630 1631 switch (cbt_m1 >> 1) { 1632 case 0: 1633 for (group = 0; group < g_len; group++, cfo+=128) { 1634 float *cf = cfo; 1635 int len = off_len; 1636 1637 do { 1638 int code; 1639 unsigned cb_idx; 1640 1641 UPDATE_CACHE(re, gb); 1642 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1643 cb_idx = cb_vector_idx[code]; 1644 cf = VMUL4(cf, vq, cb_idx, sf + idx); 1645 } while (len -= 4); 1646 } 1647 break; 1648 1649 case 1: 1650 for (group = 0; group < g_len; group++, cfo+=128) { 1651 float *cf = cfo; 1652 int len = off_len; 1653 1654 do { 1655 int code; 1656 unsigned nnz; 1657 unsigned cb_idx; 1658 uint32_t bits; 1659 1660 UPDATE_CACHE(re, gb); 1661 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1662 cb_idx = cb_vector_idx[code]; 1663 nnz = cb_idx >> 8 & 15; 1664 bits = nnz ? GET_CACHE(re, gb) : 0; 1665 LAST_SKIP_BITS(re, gb, nnz); 1666 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); 1667 } while (len -= 4); 1668 } 1669 break; 1670 1671 case 2: 1672 for (group = 0; group < g_len; group++, cfo+=128) { 1673 float *cf = cfo; 1674 int len = off_len; 1675 1676 do { 1677 int code; 1678 unsigned cb_idx; 1679 1680 UPDATE_CACHE(re, gb); 1681 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1682 cb_idx = cb_vector_idx[code]; 1683 cf = VMUL2(cf, vq, cb_idx, sf + idx); 1684 } while (len -= 2); 1685 } 1686 break; 1687 1688 case 3: 1689 case 4: 1690 for (group = 0; group < g_len; group++, cfo+=128) { 1691 float *cf = cfo; 1692 int len = off_len; 1693 1694 do { 1695 int code; 1696 unsigned nnz; 1697 unsigned cb_idx; 1698 unsigned sign; 1699 1700 UPDATE_CACHE(re, gb); 1701 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1702 cb_idx = cb_vector_idx[code]; 1703 nnz = cb_idx >> 8 & 15; 1704 sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0; 1705 LAST_SKIP_BITS(re, gb, nnz); 1706 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); 1707 } while (len -= 2); 1708 } 1709 break; 1710 1711 default: 1712 for (group = 0; group < g_len; group++, cfo+=128) { 1713 float *cf = cfo; 1714 uint32_t *icf = (uint32_t *) cf; 1715 int len = off_len; 1716 1717 do { 1718 int code; 1719 unsigned nzt, nnz; 1720 unsigned cb_idx; 1721 uint32_t bits; 1722 int j; 1723 1724 UPDATE_CACHE(re, gb); 1725 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1726 1727 if (!code) { 1728 *icf++ = 0; 1729 *icf++ = 0; 1730 continue; 1731 } 1732 1733 cb_idx = cb_vector_idx[code]; 1734 nnz = cb_idx >> 12; 1735 nzt = cb_idx >> 8; 1736 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); 1737 LAST_SKIP_BITS(re, gb, nnz); 1738 1739 for (j = 0; j < 2; j++) { 1740 if (nzt & 1<<j) { 1741 uint32_t b; 1742 int n; 1743 /* The total length of escape_sequence must be < 22 bits according 1744 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ 1745 UPDATE_CACHE(re, gb); 1746 b = GET_CACHE(re, gb); 1747 b = 31 - av_log2(~b); 1748 1749 if (b > 8) { 1750 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); 1751 return AVERROR_INVALIDDATA; 1752 } 1753 1754 SKIP_BITS(re, gb, b + 1); 1755 b += 4; 1756 n = (1 << b) + SHOW_UBITS(re, gb, b); 1757 LAST_SKIP_BITS(re, gb, b); 1758 *icf++ = cbrt_tab[n] | (bits & 1U<<31); 1759 bits <<= 1; 1760 } else { 1761 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; 1762 *icf++ = (bits & 1U<<31) | v; 1763 bits <<= !!v; 1764 } 1765 cb_idx >>= 4; 1766 } 1767 } while (len -= 2); 1768 1769 ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); 1770 } 1771 } 1772 1773 CLOSE_READER(re, gb); 1774 } 1775 } 1776 coef += g_len << 7; 1777 } 1778 1779 if (pulse_present) { 1780 idx = 0; 1781 for (i = 0; i < pulse->num_pulse; i++) { 1782 float co = coef_base[ pulse->pos[i] ]; 1783 while (offsets[idx + 1] <= pulse->pos[i]) 1784 idx++; 1785 if (band_type[idx] != NOISE_BT && sf[idx]) { 1786 float ico = -pulse->amp[i]; 1787 if (co) { 1788 co /= sf[idx]; 1789 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico); 1790 } 1791 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx]; 1792 } 1793 } 1794 } 1795 return 0; 1796} 1797 1798static av_always_inline float flt16_round(float pf) 1799{ 1800 union av_intfloat32 tmp; 1801 tmp.f = pf; 1802 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; 1803 return tmp.f; 1804} 1805 1806static av_always_inline float flt16_even(float pf) 1807{ 1808 union av_intfloat32 tmp; 1809 tmp.f = pf; 1810 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; 1811 return tmp.f; 1812} 1813 1814static av_always_inline float flt16_trunc(float pf) 1815{ 1816 union av_intfloat32 pun; 1817 pun.f = pf; 1818 pun.i &= 0xFFFF0000U; 1819 return pun.f; 1820} 1821 1822static av_always_inline void predict(PredictorState *ps, float *coef, 1823 int output_enable) 1824{ 1825 const float a = 0.953125; // 61.0 / 64 1826 const float alpha = 0.90625; // 29.0 / 32 1827 float e0, e1; 1828 float pv; 1829 float k1, k2; 1830 float r0 = ps->r0, r1 = ps->r1; 1831 float cor0 = ps->cor0, cor1 = ps->cor1; 1832 float var0 = ps->var0, var1 = ps->var1; 1833 1834 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0; 1835 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0; 1836 1837 pv = flt16_round(k1 * r0 + k2 * r1); 1838 if (output_enable) 1839 *coef += pv; 1840 1841 e0 = *coef; 1842 e1 = e0 - k1 * r0; 1843 1844 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1); 1845 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1)); 1846 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0); 1847 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0)); 1848 1849 ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); 1850 ps->r0 = flt16_trunc(a * e0); 1851} 1852 1853/** 1854 * Apply AAC-Main style frequency domain prediction. 1855 */ 1856static void apply_prediction(AACContext *ac, SingleChannelElement *sce) 1857{ 1858 int sfb, k; 1859 1860 if (!sce->ics.predictor_initialized) { 1861 reset_all_predictors(sce->predictor_state); 1862 sce->ics.predictor_initialized = 1; 1863 } 1864 1865 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 1866 for (sfb = 0; 1867 sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; 1868 sfb++) { 1869 for (k = sce->ics.swb_offset[sfb]; 1870 k < sce->ics.swb_offset[sfb + 1]; 1871 k++) { 1872 predict(&sce->predictor_state[k], &sce->coeffs[k], 1873 sce->ics.predictor_present && 1874 sce->ics.prediction_used[sfb]); 1875 } 1876 } 1877 if (sce->ics.predictor_reset_group) 1878 reset_predictor_group(sce->predictor_state, 1879 sce->ics.predictor_reset_group); 1880 } else 1881 reset_all_predictors(sce->predictor_state); 1882} 1883 1884/** 1885 * Decode an individual_channel_stream payload; reference: table 4.44. 1886 * 1887 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. 1888 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) 1889 * 1890 * @return Returns error status. 0 - OK, !0 - error 1891 */ 1892static int decode_ics(AACContext *ac, SingleChannelElement *sce, 1893 GetBitContext *gb, int common_window, int scale_flag) 1894{ 1895 Pulse pulse; 1896 TemporalNoiseShaping *tns = &sce->tns; 1897 IndividualChannelStream *ics = &sce->ics; 1898 float *out = sce->coeffs; 1899 int global_gain, eld_syntax, er_syntax, pulse_present = 0; 1900 int ret; 1901 1902 eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 1903 er_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC || 1904 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP || 1905 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD || 1906 ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 1907 1908 /* This assignment is to silence a GCC warning about the variable being used 1909 * uninitialized when in fact it always is. 1910 */ 1911 pulse.num_pulse = 0; 1912 1913 global_gain = get_bits(gb, 8); 1914 1915 if (!common_window && !scale_flag) { 1916 if (decode_ics_info(ac, ics, gb) < 0) 1917 return AVERROR_INVALIDDATA; 1918 } 1919 1920 if ((ret = decode_band_types(ac, sce->band_type, 1921 sce->band_type_run_end, gb, ics)) < 0) 1922 return ret; 1923 if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics, 1924 sce->band_type, sce->band_type_run_end)) < 0) 1925 return ret; 1926 1927 pulse_present = 0; 1928 if (!scale_flag) { 1929 if (!eld_syntax && (pulse_present = get_bits1(gb))) { 1930 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1931 av_log(ac->avctx, AV_LOG_ERROR, 1932 "Pulse tool not allowed in eight short sequence.\n"); 1933 return AVERROR_INVALIDDATA; 1934 } 1935 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) { 1936 av_log(ac->avctx, AV_LOG_ERROR, 1937 "Pulse data corrupt or invalid.\n"); 1938 return AVERROR_INVALIDDATA; 1939 } 1940 } 1941 tns->present = get_bits1(gb); 1942 if (tns->present && !er_syntax) 1943 if (decode_tns(ac, tns, gb, ics) < 0) 1944 return AVERROR_INVALIDDATA; 1945 if (!eld_syntax && get_bits1(gb)) { 1946 avpriv_request_sample(ac->avctx, "SSR"); 1947 return AVERROR_PATCHWELCOME; 1948 } 1949 // I see no textual basis in the spec for this occurring after SSR gain 1950 // control, but this is what both reference and real implmentations do 1951 if (tns->present && er_syntax) 1952 if (decode_tns(ac, tns, gb, ics) < 0) 1953 return AVERROR_INVALIDDATA; 1954 } 1955 1956 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, 1957 &pulse, ics, sce->band_type) < 0) 1958 return AVERROR_INVALIDDATA; 1959 1960 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window) 1961 apply_prediction(ac, sce); 1962 1963 return 0; 1964} 1965 1966/** 1967 * Mid/Side stereo decoding; reference: 4.6.8.1.3. 1968 */ 1969static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) 1970{ 1971 const IndividualChannelStream *ics = &cpe->ch[0].ics; 1972 float *ch0 = cpe->ch[0].coeffs; 1973 float *ch1 = cpe->ch[1].coeffs; 1974 int g, i, group, idx = 0; 1975 const uint16_t *offsets = ics->swb_offset; 1976 for (g = 0; g < ics->num_window_groups; g++) { 1977 for (i = 0; i < ics->max_sfb; i++, idx++) { 1978 if (cpe->ms_mask[idx] && 1979 cpe->ch[0].band_type[idx] < NOISE_BT && 1980 cpe->ch[1].band_type[idx] < NOISE_BT) { 1981 for (group = 0; group < ics->group_len[g]; group++) { 1982 ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], 1983 ch1 + group * 128 + offsets[i], 1984 offsets[i+1] - offsets[i]); 1985 } 1986 } 1987 } 1988 ch0 += ics->group_len[g] * 128; 1989 ch1 += ics->group_len[g] * 128; 1990 } 1991} 1992 1993/** 1994 * intensity stereo decoding; reference: 4.6.8.2.3 1995 * 1996 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 1997 * [1] mask is decoded from bitstream; [2] mask is all 1s; 1998 * [3] reserved for scalable AAC 1999 */ 2000static void apply_intensity_stereo(AACContext *ac, 2001 ChannelElement *cpe, int ms_present) 2002{ 2003 const IndividualChannelStream *ics = &cpe->ch[1].ics; 2004 SingleChannelElement *sce1 = &cpe->ch[1]; 2005 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs; 2006 const uint16_t *offsets = ics->swb_offset; 2007 int g, group, i, idx = 0; 2008 int c; 2009 float scale; 2010 for (g = 0; g < ics->num_window_groups; g++) { 2011 for (i = 0; i < ics->max_sfb;) { 2012 if (sce1->band_type[idx] == INTENSITY_BT || 2013 sce1->band_type[idx] == INTENSITY_BT2) { 2014 const int bt_run_end = sce1->band_type_run_end[idx]; 2015 for (; i < bt_run_end; i++, idx++) { 2016 c = -1 + 2 * (sce1->band_type[idx] - 14); 2017 if (ms_present) 2018 c *= 1 - 2 * cpe->ms_mask[idx]; 2019 scale = c * sce1->sf[idx]; 2020 for (group = 0; group < ics->group_len[g]; group++) 2021 ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], 2022 coef0 + group * 128 + offsets[i], 2023 scale, 2024 offsets[i + 1] - offsets[i]); 2025 } 2026 } else { 2027 int bt_run_end = sce1->band_type_run_end[idx]; 2028 idx += bt_run_end - i; 2029 i = bt_run_end; 2030 } 2031 } 2032 coef0 += ics->group_len[g] * 128; 2033 coef1 += ics->group_len[g] * 128; 2034 } 2035} 2036 2037/** 2038 * Decode a channel_pair_element; reference: table 4.4. 2039 * 2040 * @return Returns error status. 0 - OK, !0 - error 2041 */ 2042static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) 2043{ 2044 int i, ret, common_window, ms_present = 0; 2045 int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 2046 2047 common_window = eld_syntax || get_bits1(gb); 2048 if (common_window) { 2049 if (decode_ics_info(ac, &cpe->ch[0].ics, gb)) 2050 return AVERROR_INVALIDDATA; 2051 i = cpe->ch[1].ics.use_kb_window[0]; 2052 cpe->ch[1].ics = cpe->ch[0].ics; 2053 cpe->ch[1].ics.use_kb_window[1] = i; 2054 if (cpe->ch[1].ics.predictor_present && 2055 (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN)) 2056 if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1))) 2057 decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb); 2058 ms_present = get_bits(gb, 2); 2059 if (ms_present == 3) { 2060 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); 2061 return AVERROR_INVALIDDATA; 2062 } else if (ms_present) 2063 decode_mid_side_stereo(cpe, gb, ms_present); 2064 } 2065 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0))) 2066 return ret; 2067 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0))) 2068 return ret; 2069 2070 if (common_window) { 2071 if (ms_present) 2072 apply_mid_side_stereo(ac, cpe); 2073 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) { 2074 apply_prediction(ac, &cpe->ch[0]); 2075 apply_prediction(ac, &cpe->ch[1]); 2076 } 2077 } 2078 2079 apply_intensity_stereo(ac, cpe, ms_present); 2080 return 0; 2081} 2082 2083static const float cce_scale[] = { 2084 1.09050773266525765921, //2^(1/8) 2085 1.18920711500272106672, //2^(1/4) 2086 M_SQRT2, 2087 2, 2088}; 2089 2090/** 2091 * Decode coupling_channel_element; reference: table 4.8. 2092 * 2093 * @return Returns error status. 0 - OK, !0 - error 2094 */ 2095static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) 2096{ 2097 int num_gain = 0; 2098 int c, g, sfb, ret; 2099 int sign; 2100 float scale; 2101 SingleChannelElement *sce = &che->ch[0]; 2102 ChannelCoupling *coup = &che->coup; 2103 2104 coup->coupling_point = 2 * get_bits1(gb); 2105 coup->num_coupled = get_bits(gb, 3); 2106 for (c = 0; c <= coup->num_coupled; c++) { 2107 num_gain++; 2108 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE; 2109 coup->id_select[c] = get_bits(gb, 4); 2110 if (coup->type[c] == TYPE_CPE) { 2111 coup->ch_select[c] = get_bits(gb, 2); 2112 if (coup->ch_select[c] == 3) 2113 num_gain++; 2114 } else 2115 coup->ch_select[c] = 2; 2116 } 2117 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1); 2118 2119 sign = get_bits(gb, 1); 2120 scale = cce_scale[get_bits(gb, 2)]; 2121 2122 if ((ret = decode_ics(ac, sce, gb, 0, 0))) 2123 return ret; 2124 2125 for (c = 0; c < num_gain; c++) { 2126 int idx = 0; 2127 int cge = 1; 2128 int gain = 0; 2129 float gain_cache = 1.0; 2130 if (c) { 2131 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb); 2132 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0; 2133 gain_cache = powf(scale, -gain); 2134 } 2135 if (coup->coupling_point == AFTER_IMDCT) { 2136 coup->gain[c][0] = gain_cache; 2137 } else { 2138 for (g = 0; g < sce->ics.num_window_groups; g++) { 2139 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) { 2140 if (sce->band_type[idx] != ZERO_BT) { 2141 if (!cge) { 2142 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 2143 if (t) { 2144 int s = 1; 2145 t = gain += t; 2146 if (sign) { 2147 s -= 2 * (t & 0x1); 2148 t >>= 1; 2149 } 2150 gain_cache = powf(scale, -t) * s; 2151 } 2152 } 2153 coup->gain[c][idx] = gain_cache; 2154 } 2155 } 2156 } 2157 } 2158 } 2159 return 0; 2160} 2161 2162/** 2163 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53. 2164 * 2165 * @return Returns number of bytes consumed. 2166 */ 2167static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, 2168 GetBitContext *gb) 2169{ 2170 int i; 2171 int num_excl_chan = 0; 2172 2173 do { 2174 for (i = 0; i < 7; i++) 2175 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb); 2176 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb)); 2177 2178 return num_excl_chan / 7; 2179} 2180 2181/** 2182 * Decode dynamic range information; reference: table 4.52. 2183 * 2184 * @return Returns number of bytes consumed. 2185 */ 2186static int decode_dynamic_range(DynamicRangeControl *che_drc, 2187 GetBitContext *gb) 2188{ 2189 int n = 1; 2190 int drc_num_bands = 1; 2191 int i; 2192 2193 /* pce_tag_present? */ 2194 if (get_bits1(gb)) { 2195 che_drc->pce_instance_tag = get_bits(gb, 4); 2196 skip_bits(gb, 4); // tag_reserved_bits 2197 n++; 2198 } 2199 2200 /* excluded_chns_present? */ 2201 if (get_bits1(gb)) { 2202 n += decode_drc_channel_exclusions(che_drc, gb); 2203 } 2204 2205 /* drc_bands_present? */ 2206 if (get_bits1(gb)) { 2207 che_drc->band_incr = get_bits(gb, 4); 2208 che_drc->interpolation_scheme = get_bits(gb, 4); 2209 n++; 2210 drc_num_bands += che_drc->band_incr; 2211 for (i = 0; i < drc_num_bands; i++) { 2212 che_drc->band_top[i] = get_bits(gb, 8); 2213 n++; 2214 } 2215 } 2216 2217 /* prog_ref_level_present? */ 2218 if (get_bits1(gb)) { 2219 che_drc->prog_ref_level = get_bits(gb, 7); 2220 skip_bits1(gb); // prog_ref_level_reserved_bits 2221 n++; 2222 } 2223 2224 for (i = 0; i < drc_num_bands; i++) { 2225 che_drc->dyn_rng_sgn[i] = get_bits1(gb); 2226 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7); 2227 n++; 2228 } 2229 2230 return n; 2231} 2232 2233static int decode_fill(AACContext *ac, GetBitContext *gb, int len) { 2234 uint8_t buf[256]; 2235 int i, major, minor; 2236 2237 if (len < 13+7*8) 2238 goto unknown; 2239 2240 get_bits(gb, 13); len -= 13; 2241 2242 for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8) 2243 buf[i] = get_bits(gb, 8); 2244 2245 buf[i] = 0; 2246 if (ac->avctx->debug & FF_DEBUG_PICT_INFO) 2247 av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf); 2248 2249 if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){ 2250 ac->avctx->internal->skip_samples = 1024; 2251 } 2252 2253unknown: 2254 skip_bits_long(gb, len); 2255 2256 return 0; 2257} 2258 2259/** 2260 * Decode extension data (incomplete); reference: table 4.51. 2261 * 2262 * @param cnt length of TYPE_FIL syntactic element in bytes 2263 * 2264 * @return Returns number of bytes consumed 2265 */ 2266static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt, 2267 ChannelElement *che, enum RawDataBlockType elem_type) 2268{ 2269 int crc_flag = 0; 2270 int res = cnt; 2271 switch (get_bits(gb, 4)) { // extension type 2272 case EXT_SBR_DATA_CRC: 2273 crc_flag++; 2274 case EXT_SBR_DATA: 2275 if (!che) { 2276 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n"); 2277 return res; 2278 } else if (!ac->oc[1].m4ac.sbr) { 2279 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n"); 2280 skip_bits_long(gb, 8 * cnt - 4); 2281 return res; 2282 } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) { 2283 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n"); 2284 skip_bits_long(gb, 8 * cnt - 4); 2285 return res; 2286 } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) { 2287 ac->oc[1].m4ac.sbr = 1; 2288 ac->oc[1].m4ac.ps = 1; 2289 ac->avctx->profile = FF_PROFILE_AAC_HE_V2; 2290 output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, 2291 ac->oc[1].status, 1); 2292 } else { 2293 ac->oc[1].m4ac.sbr = 1; 2294 ac->avctx->profile = FF_PROFILE_AAC_HE; 2295 } 2296 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type); 2297 break; 2298 case EXT_DYNAMIC_RANGE: 2299 res = decode_dynamic_range(&ac->che_drc, gb); 2300 break; 2301 case EXT_FILL: 2302 decode_fill(ac, gb, 8 * cnt - 4); 2303 break; 2304 case EXT_FILL_DATA: 2305 case EXT_DATA_ELEMENT: 2306 default: 2307 skip_bits_long(gb, 8 * cnt - 4); 2308 break; 2309 }; 2310 return res; 2311} 2312 2313/** 2314 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3. 2315 * 2316 * @param decode 1 if tool is used normally, 0 if tool is used in LTP. 2317 * @param coef spectral coefficients 2318 */ 2319static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, 2320 IndividualChannelStream *ics, int decode) 2321{ 2322 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb); 2323 int w, filt, m, i; 2324 int bottom, top, order, start, end, size, inc; 2325 float lpc[TNS_MAX_ORDER]; 2326 float tmp[TNS_MAX_ORDER+1]; 2327 2328 for (w = 0; w < ics->num_windows; w++) { 2329 bottom = ics->num_swb; 2330 for (filt = 0; filt < tns->n_filt[w]; filt++) { 2331 top = bottom; 2332 bottom = FFMAX(0, top - tns->length[w][filt]); 2333 order = tns->order[w][filt]; 2334 if (order == 0) 2335 continue; 2336 2337 // tns_decode_coef 2338 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0); 2339 2340 start = ics->swb_offset[FFMIN(bottom, mmm)]; 2341 end = ics->swb_offset[FFMIN( top, mmm)]; 2342 if ((size = end - start) <= 0) 2343 continue; 2344 if (tns->direction[w][filt]) { 2345 inc = -1; 2346 start = end - 1; 2347 } else { 2348 inc = 1; 2349 } 2350 start += w * 128; 2351 2352 if (decode) { 2353 // ar filter 2354 for (m = 0; m < size; m++, start += inc) 2355 for (i = 1; i <= FFMIN(m, order); i++) 2356 coef[start] -= coef[start - i * inc] * lpc[i - 1]; 2357 } else { 2358 // ma filter 2359 for (m = 0; m < size; m++, start += inc) { 2360 tmp[0] = coef[start]; 2361 for (i = 1; i <= FFMIN(m, order); i++) 2362 coef[start] += tmp[i] * lpc[i - 1]; 2363 for (i = order; i > 0; i--) 2364 tmp[i] = tmp[i - 1]; 2365 } 2366 } 2367 } 2368 } 2369} 2370 2371/** 2372 * Apply windowing and MDCT to obtain the spectral 2373 * coefficient from the predicted sample by LTP. 2374 */ 2375static void windowing_and_mdct_ltp(AACContext *ac, float *out, 2376 float *in, IndividualChannelStream *ics) 2377{ 2378 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 2379 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 2380 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 2381 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 2382 2383 if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { 2384 ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024); 2385 } else { 2386 memset(in, 0, 448 * sizeof(float)); 2387 ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); 2388 } 2389 if (ics->window_sequence[0] != LONG_START_SEQUENCE) { 2390 ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); 2391 } else { 2392 ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); 2393 memset(in + 1024 + 576, 0, 448 * sizeof(float)); 2394 } 2395 ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in); 2396} 2397 2398/** 2399 * Apply the long term prediction 2400 */ 2401static void apply_ltp(AACContext *ac, SingleChannelElement *sce) 2402{ 2403 const LongTermPrediction *ltp = &sce->ics.ltp; 2404 const uint16_t *offsets = sce->ics.swb_offset; 2405 int i, sfb; 2406 2407 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 2408 float *predTime = sce->ret; 2409 float *predFreq = ac->buf_mdct; 2410 int16_t num_samples = 2048; 2411 2412 if (ltp->lag < 1024) 2413 num_samples = ltp->lag + 1024; 2414 for (i = 0; i < num_samples; i++) 2415 predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; 2416 memset(&predTime[i], 0, (2048 - i) * sizeof(float)); 2417 2418 ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); 2419 2420 if (sce->tns.present) 2421 ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0); 2422 2423 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) 2424 if (ltp->used[sfb]) 2425 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) 2426 sce->coeffs[i] += predFreq[i]; 2427 } 2428} 2429 2430/** 2431 * Update the LTP buffer for next frame 2432 */ 2433static void update_ltp(AACContext *ac, SingleChannelElement *sce) 2434{ 2435 IndividualChannelStream *ics = &sce->ics; 2436 float *saved = sce->saved; 2437 float *saved_ltp = sce->coeffs; 2438 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 2439 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 2440 int i; 2441 2442 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2443 memcpy(saved_ltp, saved, 512 * sizeof(float)); 2444 memset(saved_ltp + 576, 0, 448 * sizeof(float)); 2445 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 2446 for (i = 0; i < 64; i++) 2447 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; 2448 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 2449 memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float)); 2450 memset(saved_ltp + 576, 0, 448 * sizeof(float)); 2451 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 2452 for (i = 0; i < 64; i++) 2453 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; 2454 } else { // LONG_STOP or ONLY_LONG 2455 ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); 2456 for (i = 0; i < 512; i++) 2457 saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i]; 2458 } 2459 2460 memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state)); 2461 memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state)); 2462 memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state)); 2463} 2464 2465/** 2466 * Conduct IMDCT and windowing. 2467 */ 2468static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) 2469{ 2470 IndividualChannelStream *ics = &sce->ics; 2471 float *in = sce->coeffs; 2472 float *out = sce->ret; 2473 float *saved = sce->saved; 2474 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 2475 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 2476 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 2477 float *buf = ac->buf_mdct; 2478 float *temp = ac->temp; 2479 int i; 2480 2481 // imdct 2482 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2483 for (i = 0; i < 1024; i += 128) 2484 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); 2485 } else 2486 ac->mdct.imdct_half(&ac->mdct, buf, in); 2487 2488 /* window overlapping 2489 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 2490 * and long to short transitions are considered to be short to short 2491 * transitions. This leaves just two cases (long to long and short to short) 2492 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 2493 */ 2494 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 2495 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 2496 ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); 2497 } else { 2498 memcpy( out, saved, 448 * sizeof(float)); 2499 2500 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2501 ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); 2502 ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); 2503 ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); 2504 ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); 2505 ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); 2506 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); 2507 } else { 2508 ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); 2509 memcpy( out + 576, buf + 64, 448 * sizeof(float)); 2510 } 2511 } 2512 2513 // buffer update 2514 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2515 memcpy( saved, temp + 64, 64 * sizeof(float)); 2516 ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); 2517 ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); 2518 ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); 2519 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); 2520 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 2521 memcpy( saved, buf + 512, 448 * sizeof(float)); 2522 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); 2523 } else { // LONG_STOP or ONLY_LONG 2524 memcpy( saved, buf + 512, 512 * sizeof(float)); 2525 } 2526} 2527 2528static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) 2529{ 2530 IndividualChannelStream *ics = &sce->ics; 2531 float *in = sce->coeffs; 2532 float *out = sce->ret; 2533 float *saved = sce->saved; 2534 float *buf = ac->buf_mdct; 2535 2536 // imdct 2537 ac->mdct.imdct_half(&ac->mdct_ld, buf, in); 2538 2539 // window overlapping 2540 if (ics->use_kb_window[1]) { 2541 // AAC LD uses a low overlap sine window instead of a KBD window 2542 memcpy(out, saved, 192 * sizeof(float)); 2543 ac->fdsp.vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64); 2544 memcpy( out + 320, buf + 64, 192 * sizeof(float)); 2545 } else { 2546 ac->fdsp.vector_fmul_window(out, saved, buf, ff_sine_512, 256); 2547 } 2548 2549 // buffer update 2550 memcpy(saved, buf + 256, 256 * sizeof(float)); 2551} 2552 2553static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce) 2554{ 2555 float *in = sce->coeffs; 2556 float *out = sce->ret; 2557 float *saved = sce->saved; 2558 const float *const window = ff_aac_eld_window; 2559 float *buf = ac->buf_mdct; 2560 int i; 2561 const int n = 512; 2562 const int n2 = n >> 1; 2563 const int n4 = n >> 2; 2564 2565 // Inverse transform, mapped to the conventional IMDCT by 2566 // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V., 2567 // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks," 2568 // International Conference on Audio, Language and Image Processing, ICALIP 2008. 2569 // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950 2570 for (i = 0; i < n2; i+=2) { 2571 float temp; 2572 temp = in[i ]; in[i ] = -in[n - 1 - i]; in[n - 1 - i] = temp; 2573 temp = -in[i + 1]; in[i + 1] = in[n - 2 - i]; in[n - 2 - i] = temp; 2574 } 2575 ac->mdct.imdct_half(&ac->mdct_ld, buf, in); 2576 for (i = 0; i < n; i+=2) { 2577 buf[i] = -buf[i]; 2578 } 2579 // Like with the regular IMDCT at this point we still have the middle half 2580 // of a transform but with even symmetry on the left and odd symmetry on 2581 // the right 2582 2583 // window overlapping 2584 // The spec says to use samples [0..511] but the reference decoder uses 2585 // samples [128..639]. 2586 for (i = n4; i < n2; i ++) { 2587 out[i - n4] = buf[n2 - 1 - i] * window[i - n4] + 2588 saved[ i + n2] * window[i + n - n4] + 2589 -saved[ n + n2 - 1 - i] * window[i + 2*n - n4] + 2590 -saved[2*n + n2 + i] * window[i + 3*n - n4]; 2591 } 2592 for (i = 0; i < n2; i ++) { 2593 out[n4 + i] = buf[i] * window[i + n2 - n4] + 2594 -saved[ n - 1 - i] * window[i + n2 + n - n4] + 2595 -saved[ n + i] * window[i + n2 + 2*n - n4] + 2596 saved[2*n + n - 1 - i] * window[i + n2 + 3*n - n4]; 2597 } 2598 for (i = 0; i < n4; i ++) { 2599 out[n2 + n4 + i] = buf[ i + n2] * window[i + n - n4] + 2600 -saved[ n2 - 1 - i] * window[i + 2*n - n4] + 2601 -saved[ n + n2 + i] * window[i + 3*n - n4]; 2602 } 2603 2604 // buffer update 2605 memmove(saved + n, saved, 2 * n * sizeof(float)); 2606 memcpy( saved, buf, n * sizeof(float)); 2607} 2608 2609/** 2610 * Apply dependent channel coupling (applied before IMDCT). 2611 * 2612 * @param index index into coupling gain array 2613 */ 2614static void apply_dependent_coupling(AACContext *ac, 2615 SingleChannelElement *target, 2616 ChannelElement *cce, int index) 2617{ 2618 IndividualChannelStream *ics = &cce->ch[0].ics; 2619 const uint16_t *offsets = ics->swb_offset; 2620 float *dest = target->coeffs; 2621 const float *src = cce->ch[0].coeffs; 2622 int g, i, group, k, idx = 0; 2623 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { 2624 av_log(ac->avctx, AV_LOG_ERROR, 2625 "Dependent coupling is not supported together with LTP\n"); 2626 return; 2627 } 2628 for (g = 0; g < ics->num_window_groups; g++) { 2629 for (i = 0; i < ics->max_sfb; i++, idx++) { 2630 if (cce->ch[0].band_type[idx] != ZERO_BT) { 2631 const float gain = cce->coup.gain[index][idx]; 2632 for (group = 0; group < ics->group_len[g]; group++) { 2633 for (k = offsets[i]; k < offsets[i + 1]; k++) { 2634 // FIXME: SIMDify 2635 dest[group * 128 + k] += gain * src[group * 128 + k]; 2636 } 2637 } 2638 } 2639 } 2640 dest += ics->group_len[g] * 128; 2641 src += ics->group_len[g] * 128; 2642 } 2643} 2644 2645/** 2646 * Apply independent channel coupling (applied after IMDCT). 2647 * 2648 * @param index index into coupling gain array 2649 */ 2650static void apply_independent_coupling(AACContext *ac, 2651 SingleChannelElement *target, 2652 ChannelElement *cce, int index) 2653{ 2654 int i; 2655 const float gain = cce->coup.gain[index][0]; 2656 const float *src = cce->ch[0].ret; 2657 float *dest = target->ret; 2658 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1); 2659 2660 for (i = 0; i < len; i++) 2661 dest[i] += gain * src[i]; 2662} 2663 2664/** 2665 * channel coupling transformation interface 2666 * 2667 * @param apply_coupling_method pointer to (in)dependent coupling function 2668 */ 2669static void apply_channel_coupling(AACContext *ac, ChannelElement *cc, 2670 enum RawDataBlockType type, int elem_id, 2671 enum CouplingPoint coupling_point, 2672 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)) 2673{ 2674 int i, c; 2675 2676 for (i = 0; i < MAX_ELEM_ID; i++) { 2677 ChannelElement *cce = ac->che[TYPE_CCE][i]; 2678 int index = 0; 2679 2680 if (cce && cce->coup.coupling_point == coupling_point) { 2681 ChannelCoupling *coup = &cce->coup; 2682 2683 for (c = 0; c <= coup->num_coupled; c++) { 2684 if (coup->type[c] == type && coup->id_select[c] == elem_id) { 2685 if (coup->ch_select[c] != 1) { 2686 apply_coupling_method(ac, &cc->ch[0], cce, index); 2687 if (coup->ch_select[c] != 0) 2688 index++; 2689 } 2690 if (coup->ch_select[c] != 2) 2691 apply_coupling_method(ac, &cc->ch[1], cce, index++); 2692 } else 2693 index += 1 + (coup->ch_select[c] == 3); 2694 } 2695 } 2696 } 2697} 2698 2699/** 2700 * Convert spectral data to float samples, applying all supported tools as appropriate. 2701 */ 2702static void spectral_to_sample(AACContext *ac) 2703{ 2704 int i, type; 2705 void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce); 2706 switch (ac->oc[1].m4ac.object_type) { 2707 case AOT_ER_AAC_LD: 2708 imdct_and_window = imdct_and_windowing_ld; 2709 break; 2710 case AOT_ER_AAC_ELD: 2711 imdct_and_window = imdct_and_windowing_eld; 2712 break; 2713 default: 2714 imdct_and_window = ac->imdct_and_windowing; 2715 } 2716 for (type = 3; type >= 0; type--) { 2717 for (i = 0; i < MAX_ELEM_ID; i++) { 2718 ChannelElement *che = ac->che[type][i]; 2719 if (che) { 2720 if (type <= TYPE_CPE) 2721 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling); 2722 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { 2723 if (che->ch[0].ics.predictor_present) { 2724 if (che->ch[0].ics.ltp.present) 2725 ac->apply_ltp(ac, &che->ch[0]); 2726 if (che->ch[1].ics.ltp.present && type == TYPE_CPE) 2727 ac->apply_ltp(ac, &che->ch[1]); 2728 } 2729 } 2730 if (che->ch[0].tns.present) 2731 ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1); 2732 if (che->ch[1].tns.present) 2733 ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1); 2734 if (type <= TYPE_CPE) 2735 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); 2736 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { 2737 imdct_and_window(ac, &che->ch[0]); 2738 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) 2739 ac->update_ltp(ac, &che->ch[0]); 2740 if (type == TYPE_CPE) { 2741 imdct_and_window(ac, &che->ch[1]); 2742 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) 2743 ac->update_ltp(ac, &che->ch[1]); 2744 } 2745 if (ac->oc[1].m4ac.sbr > 0) { 2746 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); 2747 } 2748 } 2749 if (type <= TYPE_CCE) 2750 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling); 2751 } 2752 } 2753 } 2754} 2755 2756static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb) 2757{ 2758 int size; 2759 AACADTSHeaderInfo hdr_info; 2760 uint8_t layout_map[MAX_ELEM_ID*4][3]; 2761 int layout_map_tags, ret; 2762 2763 size = avpriv_aac_parse_header(gb, &hdr_info); 2764 if (size > 0) { 2765 if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) { 2766 // This is 2 for "VLB " audio in NSV files. 2767 // See samples/nsv/vlb_audio. 2768 avpriv_report_missing_feature(ac->avctx, 2769 "More than one AAC RDB per ADTS frame"); 2770 ac->warned_num_aac_frames = 1; 2771 } 2772 push_output_configuration(ac); 2773 if (hdr_info.chan_config) { 2774 ac->oc[1].m4ac.chan_config = hdr_info.chan_config; 2775 if ((ret = set_default_channel_config(ac->avctx, 2776 layout_map, 2777 &layout_map_tags, 2778 hdr_info.chan_config)) < 0) 2779 return ret; 2780 if ((ret = output_configure(ac, layout_map, layout_map_tags, 2781 FFMAX(ac->oc[1].status, 2782 OC_TRIAL_FRAME), 0)) < 0) 2783 return ret; 2784 } else { 2785 ac->oc[1].m4ac.chan_config = 0; 2786 /** 2787 * dual mono frames in Japanese DTV can have chan_config 0 2788 * WITHOUT specifying PCE. 2789 * thus, set dual mono as default. 2790 */ 2791 if (ac->dmono_mode && ac->oc[0].status == OC_NONE) { 2792 layout_map_tags = 2; 2793 layout_map[0][0] = layout_map[1][0] = TYPE_SCE; 2794 layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT; 2795 layout_map[0][1] = 0; 2796 layout_map[1][1] = 1; 2797 if (output_configure(ac, layout_map, layout_map_tags, 2798 OC_TRIAL_FRAME, 0)) 2799 return -7; 2800 } 2801 } 2802 ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate; 2803 ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index; 2804 ac->oc[1].m4ac.object_type = hdr_info.object_type; 2805 if (ac->oc[0].status != OC_LOCKED || 2806 ac->oc[0].m4ac.chan_config != hdr_info.chan_config || 2807 ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) { 2808 ac->oc[1].m4ac.sbr = -1; 2809 ac->oc[1].m4ac.ps = -1; 2810 } 2811 if (!hdr_info.crc_absent) 2812 skip_bits(gb, 16); 2813 } 2814 return size; 2815} 2816 2817static int aac_decode_er_frame(AVCodecContext *avctx, void *data, 2818 int *got_frame_ptr, GetBitContext *gb) 2819{ 2820 AACContext *ac = avctx->priv_data; 2821 ChannelElement *che; 2822 int err, i; 2823 int samples = 1024; 2824 int chan_config = ac->oc[1].m4ac.chan_config; 2825 int aot = ac->oc[1].m4ac.object_type; 2826 2827 if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) 2828 samples >>= 1; 2829 2830 ac->frame = data; 2831 2832 if ((err = frame_configure_elements(avctx)) < 0) 2833 return err; 2834 2835 // The FF_PROFILE_AAC_* defines are all object_type - 1 2836 // This may lead to an undefined profile being signaled 2837 ac->avctx->profile = ac->oc[1].m4ac.object_type - 1; 2838 2839 ac->tags_mapped = 0; 2840 2841 if (chan_config < 0 || chan_config >= 8) { 2842 avpriv_request_sample(avctx, "Unknown ER channel configuration %d", 2843 ac->oc[1].m4ac.chan_config); 2844 return AVERROR_INVALIDDATA; 2845 } 2846 for (i = 0; i < tags_per_config[chan_config]; i++) { 2847 const int elem_type = aac_channel_layout_map[chan_config-1][i][0]; 2848 const int elem_id = aac_channel_layout_map[chan_config-1][i][1]; 2849 if (!(che=get_che(ac, elem_type, elem_id))) { 2850 av_log(ac->avctx, AV_LOG_ERROR, 2851 "channel element %d.%d is not allocated\n", 2852 elem_type, elem_id); 2853 return AVERROR_INVALIDDATA; 2854 } 2855 if (aot != AOT_ER_AAC_ELD) 2856 skip_bits(gb, 4); 2857 switch (elem_type) { 2858 case TYPE_SCE: 2859 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 2860 break; 2861 case TYPE_CPE: 2862 err = decode_cpe(ac, gb, che); 2863 break; 2864 case TYPE_LFE: 2865 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 2866 break; 2867 } 2868 if (err < 0) 2869 return err; 2870 } 2871 2872 spectral_to_sample(ac); 2873 2874 ac->frame->nb_samples = samples; 2875 ac->frame->sample_rate = avctx->sample_rate; 2876 *got_frame_ptr = 1; 2877 2878 skip_bits_long(gb, get_bits_left(gb)); 2879 return 0; 2880} 2881 2882static int aac_decode_frame_int(AVCodecContext *avctx, void *data, 2883 int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt) 2884{ 2885 AACContext *ac = avctx->priv_data; 2886 ChannelElement *che = NULL, *che_prev = NULL; 2887 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END; 2888 int err, elem_id; 2889 int samples = 0, multiplier, audio_found = 0, pce_found = 0; 2890 int is_dmono, sce_count = 0; 2891 2892 ac->frame = data; 2893 2894 if (show_bits(gb, 12) == 0xfff) { 2895 if ((err = parse_adts_frame_header(ac, gb)) < 0) { 2896 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n"); 2897 goto fail; 2898 } 2899 if (ac->oc[1].m4ac.sampling_index > 12) { 2900 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index); 2901 err = AVERROR_INVALIDDATA; 2902 goto fail; 2903 } 2904 } 2905 2906 if ((err = frame_configure_elements(avctx)) < 0) 2907 goto fail; 2908 2909 // The FF_PROFILE_AAC_* defines are all object_type - 1 2910 // This may lead to an undefined profile being signaled 2911 ac->avctx->profile = ac->oc[1].m4ac.object_type - 1; 2912 2913 ac->tags_mapped = 0; 2914 // parse 2915 while ((elem_type = get_bits(gb, 3)) != TYPE_END) { 2916 elem_id = get_bits(gb, 4); 2917 2918 if (elem_type < TYPE_DSE) { 2919 if (!(che=get_che(ac, elem_type, elem_id))) { 2920 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", 2921 elem_type, elem_id); 2922 err = AVERROR_INVALIDDATA; 2923 goto fail; 2924 } 2925 samples = 1024; 2926 } 2927 2928 switch (elem_type) { 2929 2930 case TYPE_SCE: 2931 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 2932 audio_found = 1; 2933 sce_count++; 2934 break; 2935 2936 case TYPE_CPE: 2937 err = decode_cpe(ac, gb, che); 2938 audio_found = 1; 2939 break; 2940 2941 case TYPE_CCE: 2942 err = decode_cce(ac, gb, che); 2943 break; 2944 2945 case TYPE_LFE: 2946 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 2947 audio_found = 1; 2948 break; 2949 2950 case TYPE_DSE: 2951 err = skip_data_stream_element(ac, gb); 2952 break; 2953 2954 case TYPE_PCE: { 2955 uint8_t layout_map[MAX_ELEM_ID*4][3]; 2956 int tags; 2957 push_output_configuration(ac); 2958 tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb); 2959 if (tags < 0) { 2960 err = tags; 2961 break; 2962 } 2963 if (pce_found) { 2964 av_log(avctx, AV_LOG_ERROR, 2965 "Not evaluating a further program_config_element as this construct is dubious at best.\n"); 2966 } else { 2967 err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1); 2968 if (!err) 2969 ac->oc[1].m4ac.chan_config = 0; 2970 pce_found = 1; 2971 } 2972 break; 2973 } 2974 2975 case TYPE_FIL: 2976 if (elem_id == 15) 2977 elem_id += get_bits(gb, 8) - 1; 2978 if (get_bits_left(gb) < 8 * elem_id) { 2979 av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err); 2980 err = AVERROR_INVALIDDATA; 2981 goto fail; 2982 } 2983 while (elem_id > 0) 2984 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev); 2985 err = 0; /* FIXME */ 2986 break; 2987 2988 default: 2989 err = AVERROR_BUG; /* should not happen, but keeps compiler happy */ 2990 break; 2991 } 2992 2993 che_prev = che; 2994 elem_type_prev = elem_type; 2995 2996 if (err) 2997 goto fail; 2998 2999 if (get_bits_left(gb) < 3) { 3000 av_log(avctx, AV_LOG_ERROR, overread_err); 3001 err = AVERROR_INVALIDDATA; 3002 goto fail; 3003 } 3004 } 3005 3006 spectral_to_sample(ac); 3007 3008 multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0; 3009 samples <<= multiplier; 3010 3011 if (ac->oc[1].status && audio_found) { 3012 avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier; 3013 avctx->frame_size = samples; 3014 ac->oc[1].status = OC_LOCKED; 3015 } 3016 3017 if (multiplier) { 3018 int side_size; 3019 const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size); 3020 if (side && side_size>=4) 3021 AV_WL32(side, 2*AV_RL32(side)); 3022 } 3023 3024 *got_frame_ptr = !!samples; 3025 if (samples) { 3026 ac->frame->nb_samples = samples; 3027 ac->frame->sample_rate = avctx->sample_rate; 3028 } else 3029 av_frame_unref(ac->frame); 3030 *got_frame_ptr = !!samples; 3031 3032 /* for dual-mono audio (SCE + SCE) */ 3033 is_dmono = ac->dmono_mode && sce_count == 2 && 3034 ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT); 3035 if (is_dmono) { 3036 if (ac->dmono_mode == 1) 3037 ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0]; 3038 else if (ac->dmono_mode == 2) 3039 ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1]; 3040 } 3041 3042 return 0; 3043fail: 3044 pop_output_configuration(ac); 3045 return err; 3046} 3047 3048static int aac_decode_frame(AVCodecContext *avctx, void *data, 3049 int *got_frame_ptr, AVPacket *avpkt) 3050{ 3051 AACContext *ac = avctx->priv_data; 3052 const uint8_t *buf = avpkt->data; 3053 int buf_size = avpkt->size; 3054 GetBitContext gb; 3055 int buf_consumed; 3056 int buf_offset; 3057 int err; 3058 int new_extradata_size; 3059 const uint8_t *new_extradata = av_packet_get_side_data(avpkt, 3060 AV_PKT_DATA_NEW_EXTRADATA, 3061 &new_extradata_size); 3062 int jp_dualmono_size; 3063 const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt, 3064 AV_PKT_DATA_JP_DUALMONO, 3065 &jp_dualmono_size); 3066 3067 if (new_extradata && 0) { 3068 av_free(avctx->extradata); 3069 avctx->extradata = av_mallocz(new_extradata_size + 3070 FF_INPUT_BUFFER_PADDING_SIZE); 3071 if (!avctx->extradata) 3072 return AVERROR(ENOMEM); 3073 avctx->extradata_size = new_extradata_size; 3074 memcpy(avctx->extradata, new_extradata, new_extradata_size); 3075 push_output_configuration(ac); 3076 if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac, 3077 avctx->extradata, 3078 avctx->extradata_size*8, 1) < 0) { 3079 pop_output_configuration(ac); 3080 return AVERROR_INVALIDDATA; 3081 } 3082 } 3083 3084 ac->dmono_mode = 0; 3085 if (jp_dualmono && jp_dualmono_size > 0) 3086 ac->dmono_mode = 1 + *jp_dualmono; 3087 if (ac->force_dmono_mode >= 0) 3088 ac->dmono_mode = ac->force_dmono_mode; 3089 3090 if (INT_MAX / 8 <= buf_size) 3091 return AVERROR_INVALIDDATA; 3092 3093 if ((err = init_get_bits(&gb, buf, buf_size * 8)) < 0) 3094 return err; 3095 3096 switch (ac->oc[1].m4ac.object_type) { 3097 case AOT_ER_AAC_LC: 3098 case AOT_ER_AAC_LTP: 3099 case AOT_ER_AAC_LD: 3100 case AOT_ER_AAC_ELD: 3101 err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb); 3102 break; 3103 default: 3104 err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt); 3105 } 3106 if (err < 0) 3107 return err; 3108 3109 buf_consumed = (get_bits_count(&gb) + 7) >> 3; 3110 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++) 3111 if (buf[buf_offset]) 3112 break; 3113 3114 return buf_size > buf_offset ? buf_consumed : buf_size; 3115} 3116 3117static av_cold int aac_decode_close(AVCodecContext *avctx) 3118{ 3119 AACContext *ac = avctx->priv_data; 3120 int i, type; 3121 3122 for (i = 0; i < MAX_ELEM_ID; i++) { 3123 for (type = 0; type < 4; type++) { 3124 if (ac->che[type][i]) 3125 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr); 3126 av_freep(&ac->che[type][i]); 3127 } 3128 } 3129 3130 ff_mdct_end(&ac->mdct); 3131 ff_mdct_end(&ac->mdct_small); 3132 ff_mdct_end(&ac->mdct_ld); 3133 ff_mdct_end(&ac->mdct_ltp); 3134 return 0; 3135} 3136 3137 3138#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word 3139 3140struct LATMContext { 3141 AACContext aac_ctx; ///< containing AACContext 3142 int initialized; ///< initialized after a valid extradata was seen 3143 3144 // parser data 3145 int audio_mux_version_A; ///< LATM syntax version 3146 int frame_length_type; ///< 0/1 variable/fixed frame length 3147 int frame_length; ///< frame length for fixed frame length 3148}; 3149 3150static inline uint32_t latm_get_value(GetBitContext *b) 3151{ 3152 int length = get_bits(b, 2); 3153 3154 return get_bits_long(b, (length+1)*8); 3155} 3156 3157static int latm_decode_audio_specific_config(struct LATMContext *latmctx, 3158 GetBitContext *gb, int asclen) 3159{ 3160 AACContext *ac = &latmctx->aac_ctx; 3161 AVCodecContext *avctx = ac->avctx; 3162 MPEG4AudioConfig m4ac = { 0 }; 3163 int config_start_bit = get_bits_count(gb); 3164 int sync_extension = 0; 3165 int bits_consumed, esize; 3166 3167 if (asclen) { 3168 sync_extension = 1; 3169 asclen = FFMIN(asclen, get_bits_left(gb)); 3170 } else 3171 asclen = get_bits_left(gb); 3172 3173 if (config_start_bit % 8) { 3174 avpriv_request_sample(latmctx->aac_ctx.avctx, 3175 "Non-byte-aligned audio-specific config"); 3176 return AVERROR_PATCHWELCOME; 3177 } 3178 if (asclen <= 0) 3179 return AVERROR_INVALIDDATA; 3180 bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac, 3181 gb->buffer + (config_start_bit / 8), 3182 asclen, sync_extension); 3183 3184 if (bits_consumed < 0) 3185 return AVERROR_INVALIDDATA; 3186 3187 if (!latmctx->initialized || 3188 ac->oc[1].m4ac.sample_rate != m4ac.sample_rate || 3189 ac->oc[1].m4ac.chan_config != m4ac.chan_config) { 3190 3191 if(latmctx->initialized) { 3192 av_log(avctx, AV_LOG_INFO, "audio config changed\n"); 3193 } else { 3194 av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n"); 3195 } 3196 latmctx->initialized = 0; 3197 3198 esize = (bits_consumed+7) / 8; 3199 3200 if (avctx->extradata_size < esize) { 3201 av_free(avctx->extradata); 3202 avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE); 3203 if (!avctx->extradata) 3204 return AVERROR(ENOMEM); 3205 } 3206 3207 avctx->extradata_size = esize; 3208 memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize); 3209 memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE); 3210 } 3211 skip_bits_long(gb, bits_consumed); 3212 3213 return bits_consumed; 3214} 3215 3216static int read_stream_mux_config(struct LATMContext *latmctx, 3217 GetBitContext *gb) 3218{ 3219 int ret, audio_mux_version = get_bits(gb, 1); 3220 3221 latmctx->audio_mux_version_A = 0; 3222 if (audio_mux_version) 3223 latmctx->audio_mux_version_A = get_bits(gb, 1); 3224 3225 if (!latmctx->audio_mux_version_A) { 3226 3227 if (audio_mux_version) 3228 latm_get_value(gb); // taraFullness 3229 3230 skip_bits(gb, 1); // allStreamSameTimeFraming 3231 skip_bits(gb, 6); // numSubFrames 3232 // numPrograms 3233 if (get_bits(gb, 4)) { // numPrograms 3234 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs"); 3235 return AVERROR_PATCHWELCOME; 3236 } 3237 3238 // for each program (which there is only one in DVB) 3239 3240 // for each layer (which there is only one in DVB) 3241 if (get_bits(gb, 3)) { // numLayer 3242 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers"); 3243 return AVERROR_PATCHWELCOME; 3244 } 3245 3246 // for all but first stream: use_same_config = get_bits(gb, 1); 3247 if (!audio_mux_version) { 3248 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0) 3249 return ret; 3250 } else { 3251 int ascLen = latm_get_value(gb); 3252 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0) 3253 return ret; 3254 ascLen -= ret; 3255 skip_bits_long(gb, ascLen); 3256 } 3257 3258 latmctx->frame_length_type = get_bits(gb, 3); 3259 switch (latmctx->frame_length_type) { 3260 case 0: 3261 skip_bits(gb, 8); // latmBufferFullness 3262 break; 3263 case 1: 3264 latmctx->frame_length = get_bits(gb, 9); 3265 break; 3266 case 3: 3267 case 4: 3268 case 5: 3269 skip_bits(gb, 6); // CELP frame length table index 3270 break; 3271 case 6: 3272 case 7: 3273 skip_bits(gb, 1); // HVXC frame length table index 3274 break; 3275 } 3276 3277 if (get_bits(gb, 1)) { // other data 3278 if (audio_mux_version) { 3279 latm_get_value(gb); // other_data_bits 3280 } else { 3281 int esc; 3282 do { 3283 esc = get_bits(gb, 1); 3284 skip_bits(gb, 8); 3285 } while (esc); 3286 } 3287 } 3288 3289 if (get_bits(gb, 1)) // crc present 3290 skip_bits(gb, 8); // config_crc 3291 } 3292 3293 return 0; 3294} 3295 3296static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb) 3297{ 3298 uint8_t tmp; 3299 3300 if (ctx->frame_length_type == 0) { 3301 int mux_slot_length = 0; 3302 do { 3303 tmp = get_bits(gb, 8); 3304 mux_slot_length += tmp; 3305 } while (tmp == 255); 3306 return mux_slot_length; 3307 } else if (ctx->frame_length_type == 1) { 3308 return ctx->frame_length; 3309 } else if (ctx->frame_length_type == 3 || 3310 ctx->frame_length_type == 5 || 3311 ctx->frame_length_type == 7) { 3312 skip_bits(gb, 2); // mux_slot_length_coded 3313 } 3314 return 0; 3315} 3316 3317static int read_audio_mux_element(struct LATMContext *latmctx, 3318 GetBitContext *gb) 3319{ 3320 int err; 3321 uint8_t use_same_mux = get_bits(gb, 1); 3322 if (!use_same_mux) { 3323 if ((err = read_stream_mux_config(latmctx, gb)) < 0) 3324 return err; 3325 } else if (!latmctx->aac_ctx.avctx->extradata) { 3326 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG, 3327 "no decoder config found\n"); 3328 return AVERROR(EAGAIN); 3329 } 3330 if (latmctx->audio_mux_version_A == 0) { 3331 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb); 3332 if (mux_slot_length_bytes * 8 > get_bits_left(gb)) { 3333 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n"); 3334 return AVERROR_INVALIDDATA; 3335 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) { 3336 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, 3337 "frame length mismatch %d << %d\n", 3338 mux_slot_length_bytes * 8, get_bits_left(gb)); 3339 return AVERROR_INVALIDDATA; 3340 } 3341 } 3342 return 0; 3343} 3344 3345 3346static int latm_decode_frame(AVCodecContext *avctx, void *out, 3347 int *got_frame_ptr, AVPacket *avpkt) 3348{ 3349 struct LATMContext *latmctx = avctx->priv_data; 3350 int muxlength, err; 3351 GetBitContext gb; 3352 3353 if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0) 3354 return err; 3355 3356 // check for LOAS sync word 3357 if (get_bits(&gb, 11) != LOAS_SYNC_WORD) 3358 return AVERROR_INVALIDDATA; 3359 3360 muxlength = get_bits(&gb, 13) + 3; 3361 // not enough data, the parser should have sorted this out 3362 if (muxlength > avpkt->size) 3363 return AVERROR_INVALIDDATA; 3364 3365 if ((err = read_audio_mux_element(latmctx, &gb)) < 0) 3366 return err; 3367 3368 if (!latmctx->initialized) { 3369 if (!avctx->extradata) { 3370 *got_frame_ptr = 0; 3371 return avpkt->size; 3372 } else { 3373 push_output_configuration(&latmctx->aac_ctx); 3374 if ((err = decode_audio_specific_config( 3375 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac, 3376 avctx->extradata, avctx->extradata_size*8, 1)) < 0) { 3377 pop_output_configuration(&latmctx->aac_ctx); 3378 return err; 3379 } 3380 latmctx->initialized = 1; 3381 } 3382 } 3383 3384 if (show_bits(&gb, 12) == 0xfff) { 3385 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, 3386 "ADTS header detected, probably as result of configuration " 3387 "misparsing\n"); 3388 return AVERROR_INVALIDDATA; 3389 } 3390 3391 if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0) 3392 return err; 3393 3394 return muxlength; 3395} 3396 3397static av_cold int latm_decode_init(AVCodecContext *avctx) 3398{ 3399 struct LATMContext *latmctx = avctx->priv_data; 3400 int ret = aac_decode_init(avctx); 3401 3402 if (avctx->extradata_size > 0) 3403 latmctx->initialized = !ret; 3404 3405 return ret; 3406} 3407 3408static void aacdec_init(AACContext *c) 3409{ 3410 c->imdct_and_windowing = imdct_and_windowing; 3411 c->apply_ltp = apply_ltp; 3412 c->apply_tns = apply_tns; 3413 c->windowing_and_mdct_ltp = windowing_and_mdct_ltp; 3414 c->update_ltp = update_ltp; 3415 3416 if(ARCH_MIPS) 3417 ff_aacdec_init_mips(c); 3418} 3419/** 3420 * AVOptions for Japanese DTV specific extensions (ADTS only) 3421 */ 3422#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM 3423static const AVOption options[] = { 3424 {"dual_mono_mode", "Select the channel to decode for dual mono", 3425 offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2, 3426 AACDEC_FLAGS, "dual_mono_mode"}, 3427 3428 {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3429 {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3430 {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3431 {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3432 3433 {NULL}, 3434}; 3435 3436static const AVClass aac_decoder_class = { 3437 .class_name = "AAC decoder", 3438 .item_name = av_default_item_name, 3439 .option = options, 3440 .version = LIBAVUTIL_VERSION_INT, 3441}; 3442 3443AVCodec ff_aac_decoder = { 3444 .name = "aac", 3445 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"), 3446 .type = AVMEDIA_TYPE_AUDIO, 3447 .id = AV_CODEC_ID_AAC, 3448 .priv_data_size = sizeof(AACContext), 3449 .init = aac_decode_init, 3450 .close = aac_decode_close, 3451 .decode = aac_decode_frame, 3452 .sample_fmts = (const enum AVSampleFormat[]) { 3453 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE 3454 }, 3455 .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1, 3456 .channel_layouts = aac_channel_layout, 3457 .flush = flush, 3458 .priv_class = &aac_decoder_class, 3459}; 3460 3461/* 3462 Note: This decoder filter is intended to decode LATM streams transferred 3463 in MPEG transport streams which only contain one program. 3464 To do a more complex LATM demuxing a separate LATM demuxer should be used. 3465*/ 3466AVCodec ff_aac_latm_decoder = { 3467 .name = "aac_latm", 3468 .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"), 3469 .type = AVMEDIA_TYPE_AUDIO, 3470 .id = AV_CODEC_ID_AAC_LATM, 3471 .priv_data_size = sizeof(struct LATMContext), 3472 .init = latm_decode_init, 3473 .close = aac_decode_close, 3474 .decode = latm_decode_frame, 3475 .sample_fmts = (const enum AVSampleFormat[]) { 3476 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE 3477 }, 3478 .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1, 3479 .channel_layouts = aac_channel_layout, 3480 .flush = flush, 3481}; 3482