1/* 2 * AAC encoder 3 * Copyright (C) 2008 Konstantin Shishkov 4 * 5 * This file is part of Libav. 6 * 7 * Libav is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * Libav is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with Libav; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * AAC encoder 25 */ 26 27/*********************************** 28 * TODOs: 29 * add sane pulse detection 30 * add temporal noise shaping 31 ***********************************/ 32 33#include "libavutil/opt.h" 34#include "avcodec.h" 35#include "put_bits.h" 36#include "dsputil.h" 37#include "mpeg4audio.h" 38#include "kbdwin.h" 39#include "sinewin.h" 40 41#include "aac.h" 42#include "aactab.h" 43#include "aacenc.h" 44 45#include "psymodel.h" 46 47#define AAC_MAX_CHANNELS 6 48 49static const uint8_t swb_size_1024_96[] = { 50 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 51 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44, 52 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 53}; 54 55static const uint8_t swb_size_1024_64[] = { 56 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 57 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36, 58 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 59}; 60 61static const uint8_t swb_size_1024_48[] = { 62 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 63 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 64 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 65 96 66}; 67 68static const uint8_t swb_size_1024_32[] = { 69 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 70 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 71 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 72}; 73 74static const uint8_t swb_size_1024_24[] = { 75 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 76 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28, 77 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64 78}; 79 80static const uint8_t swb_size_1024_16[] = { 81 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 82 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28, 83 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64 84}; 85 86static const uint8_t swb_size_1024_8[] = { 87 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 88 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28, 89 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80 90}; 91 92static const uint8_t *swb_size_1024[] = { 93 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64, 94 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32, 95 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16, 96 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8 97}; 98 99static const uint8_t swb_size_128_96[] = { 100 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 101}; 102 103static const uint8_t swb_size_128_48[] = { 104 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16 105}; 106 107static const uint8_t swb_size_128_24[] = { 108 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20 109}; 110 111static const uint8_t swb_size_128_16[] = { 112 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20 113}; 114 115static const uint8_t swb_size_128_8[] = { 116 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20 117}; 118 119static const uint8_t *swb_size_128[] = { 120 /* the last entry on the following row is swb_size_128_64 but is a 121 duplicate of swb_size_128_96 */ 122 swb_size_128_96, swb_size_128_96, swb_size_128_96, 123 swb_size_128_48, swb_size_128_48, swb_size_128_48, 124 swb_size_128_24, swb_size_128_24, swb_size_128_16, 125 swb_size_128_16, swb_size_128_16, swb_size_128_8 126}; 127 128/** default channel configurations */ 129static const uint8_t aac_chan_configs[6][5] = { 130 {1, TYPE_SCE}, // 1 channel - single channel element 131 {1, TYPE_CPE}, // 2 channels - channel pair 132 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo 133 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center 134 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo 135 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE 136}; 137 138/** 139 * Make AAC audio config object. 140 * @see 1.6.2.1 "Syntax - AudioSpecificConfig" 141 */ 142static void put_audio_specific_config(AVCodecContext *avctx) 143{ 144 PutBitContext pb; 145 AACEncContext *s = avctx->priv_data; 146 147 init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8); 148 put_bits(&pb, 5, 2); //object type - AAC-LC 149 put_bits(&pb, 4, s->samplerate_index); //sample rate index 150 put_bits(&pb, 4, avctx->channels); 151 //GASpecificConfig 152 put_bits(&pb, 1, 0); //frame length - 1024 samples 153 put_bits(&pb, 1, 0); //does not depend on core coder 154 put_bits(&pb, 1, 0); //is not extension 155 156 //Explicitly Mark SBR absent 157 put_bits(&pb, 11, 0x2b7); //sync extension 158 put_bits(&pb, 5, AOT_SBR); 159 put_bits(&pb, 1, 0); 160 flush_put_bits(&pb); 161} 162 163static av_cold int aac_encode_init(AVCodecContext *avctx) 164{ 165 AACEncContext *s = avctx->priv_data; 166 int i; 167 const uint8_t *sizes[2]; 168 uint8_t grouping[AAC_MAX_CHANNELS]; 169 int lengths[2]; 170 171 avctx->frame_size = 1024; 172 173 for (i = 0; i < 16; i++) 174 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i]) 175 break; 176 if (i == 16) { 177 av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate); 178 return -1; 179 } 180 if (avctx->channels > AAC_MAX_CHANNELS) { 181 av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels); 182 return -1; 183 } 184 if (avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW) { 185 av_log(avctx, AV_LOG_ERROR, "Unsupported profile %d\n", avctx->profile); 186 return -1; 187 } 188 if (1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * avctx->channels) { 189 av_log(avctx, AV_LOG_ERROR, "Too many bits per frame requested\n"); 190 return -1; 191 } 192 s->samplerate_index = i; 193 194 dsputil_init(&s->dsp, avctx); 195 ff_mdct_init(&s->mdct1024, 11, 0, 1.0); 196 ff_mdct_init(&s->mdct128, 8, 0, 1.0); 197 // window init 198 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); 199 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); 200 ff_init_ff_sine_windows(10); 201 ff_init_ff_sine_windows(7); 202 203 s->chan_map = aac_chan_configs[avctx->channels-1]; 204 s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0])); 205 s->cpe = av_mallocz(sizeof(ChannelElement) * s->chan_map[0]); 206 avctx->extradata = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE); 207 avctx->extradata_size = 5; 208 put_audio_specific_config(avctx); 209 210 sizes[0] = swb_size_1024[i]; 211 sizes[1] = swb_size_128[i]; 212 lengths[0] = ff_aac_num_swb_1024[i]; 213 lengths[1] = ff_aac_num_swb_128[i]; 214 for (i = 0; i < s->chan_map[0]; i++) 215 grouping[i] = s->chan_map[i + 1] == TYPE_CPE; 216 ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping); 217 s->psypp = ff_psy_preprocess_init(avctx); 218 s->coder = &ff_aac_coders[2]; 219 220 s->lambda = avctx->global_quality ? avctx->global_quality : 120; 221 222 ff_aac_tableinit(); 223 224 return 0; 225} 226 227static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s, 228 SingleChannelElement *sce, short *audio) 229{ 230 int i, k; 231 const int chans = avctx->channels; 232 const float * lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 233 const float * swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 234 const float * pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 235 float *output = sce->ret; 236 237 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 238 memcpy(output, sce->saved, sizeof(float)*1024); 239 if (sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE) { 240 memset(output, 0, sizeof(output[0]) * 448); 241 for (i = 448; i < 576; i++) 242 output[i] = sce->saved[i] * pwindow[i - 448]; 243 for (i = 576; i < 704; i++) 244 output[i] = sce->saved[i]; 245 } 246 if (sce->ics.window_sequence[0] != LONG_START_SEQUENCE) { 247 for (i = 0; i < 1024; i++) { 248 output[i+1024] = audio[i * chans] * lwindow[1024 - i - 1]; 249 sce->saved[i] = audio[i * chans] * lwindow[i]; 250 } 251 } else { 252 for (i = 0; i < 448; i++) 253 output[i+1024] = audio[i * chans]; 254 for (; i < 576; i++) 255 output[i+1024] = audio[i * chans] * swindow[576 - i - 1]; 256 memset(output+1024+576, 0, sizeof(output[0]) * 448); 257 for (i = 0; i < 1024; i++) 258 sce->saved[i] = audio[i * chans]; 259 } 260 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output); 261 } else { 262 for (k = 0; k < 1024; k += 128) { 263 for (i = 448 + k; i < 448 + k + 256; i++) 264 output[i - 448 - k] = (i < 1024) 265 ? sce->saved[i] 266 : audio[(i-1024)*chans]; 267 s->dsp.vector_fmul (output, output, k ? swindow : pwindow, 128); 268 s->dsp.vector_fmul_reverse(output+128, output+128, swindow, 128); 269 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + k, output); 270 } 271 for (i = 0; i < 1024; i++) 272 sce->saved[i] = audio[i * chans]; 273 } 274} 275 276/** 277 * Encode ics_info element. 278 * @see Table 4.6 (syntax of ics_info) 279 */ 280static void put_ics_info(AACEncContext *s, IndividualChannelStream *info) 281{ 282 int w; 283 284 put_bits(&s->pb, 1, 0); // ics_reserved bit 285 put_bits(&s->pb, 2, info->window_sequence[0]); 286 put_bits(&s->pb, 1, info->use_kb_window[0]); 287 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 288 put_bits(&s->pb, 6, info->max_sfb); 289 put_bits(&s->pb, 1, 0); // no prediction 290 } else { 291 put_bits(&s->pb, 4, info->max_sfb); 292 for (w = 1; w < 8; w++) 293 put_bits(&s->pb, 1, !info->group_len[w]); 294 } 295} 296 297/** 298 * Encode MS data. 299 * @see 4.6.8.1 "Joint Coding - M/S Stereo" 300 */ 301static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe) 302{ 303 int i, w; 304 305 put_bits(pb, 2, cpe->ms_mode); 306 if (cpe->ms_mode == 1) 307 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w]) 308 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++) 309 put_bits(pb, 1, cpe->ms_mask[w*16 + i]); 310} 311 312/** 313 * Produce integer coefficients from scalefactors provided by the model. 314 */ 315static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans) 316{ 317 int i, w, w2, g, ch; 318 int start, maxsfb, cmaxsfb; 319 320 for (ch = 0; ch < chans; ch++) { 321 IndividualChannelStream *ics = &cpe->ch[ch].ics; 322 start = 0; 323 maxsfb = 0; 324 cpe->ch[ch].pulse.num_pulse = 0; 325 for (w = 0; w < ics->num_windows*16; w += 16) { 326 for (g = 0; g < ics->num_swb; g++) { 327 //apply M/S 328 if (cpe->common_window && !ch && cpe->ms_mask[w + g]) { 329 for (i = 0; i < ics->swb_sizes[g]; i++) { 330 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0; 331 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i]; 332 } 333 } 334 start += ics->swb_sizes[g]; 335 } 336 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--) 337 ; 338 maxsfb = FFMAX(maxsfb, cmaxsfb); 339 } 340 ics->max_sfb = maxsfb; 341 342 //adjust zero bands for window groups 343 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { 344 for (g = 0; g < ics->max_sfb; g++) { 345 i = 1; 346 for (w2 = w; w2 < w + ics->group_len[w]; w2++) { 347 if (!cpe->ch[ch].zeroes[w2*16 + g]) { 348 i = 0; 349 break; 350 } 351 } 352 cpe->ch[ch].zeroes[w*16 + g] = i; 353 } 354 } 355 } 356 357 if (chans > 1 && cpe->common_window) { 358 IndividualChannelStream *ics0 = &cpe->ch[0].ics; 359 IndividualChannelStream *ics1 = &cpe->ch[1].ics; 360 int msc = 0; 361 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb); 362 ics1->max_sfb = ics0->max_sfb; 363 for (w = 0; w < ics0->num_windows*16; w += 16) 364 for (i = 0; i < ics0->max_sfb; i++) 365 if (cpe->ms_mask[w+i]) 366 msc++; 367 if (msc == 0 || ics0->max_sfb == 0) 368 cpe->ms_mode = 0; 369 else 370 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2; 371 } 372} 373 374/** 375 * Encode scalefactor band coding type. 376 */ 377static void encode_band_info(AACEncContext *s, SingleChannelElement *sce) 378{ 379 int w; 380 381 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 382 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda); 383} 384 385/** 386 * Encode scalefactors. 387 */ 388static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, 389 SingleChannelElement *sce) 390{ 391 int off = sce->sf_idx[0], diff; 392 int i, w; 393 394 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 395 for (i = 0; i < sce->ics.max_sfb; i++) { 396 if (!sce->zeroes[w*16 + i]) { 397 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO; 398 if (diff < 0 || diff > 120) 399 av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n"); 400 off = sce->sf_idx[w*16 + i]; 401 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]); 402 } 403 } 404 } 405} 406 407/** 408 * Encode pulse data. 409 */ 410static void encode_pulses(AACEncContext *s, Pulse *pulse) 411{ 412 int i; 413 414 put_bits(&s->pb, 1, !!pulse->num_pulse); 415 if (!pulse->num_pulse) 416 return; 417 418 put_bits(&s->pb, 2, pulse->num_pulse - 1); 419 put_bits(&s->pb, 6, pulse->start); 420 for (i = 0; i < pulse->num_pulse; i++) { 421 put_bits(&s->pb, 5, pulse->pos[i]); 422 put_bits(&s->pb, 4, pulse->amp[i]); 423 } 424} 425 426/** 427 * Encode spectral coefficients processed by psychoacoustic model. 428 */ 429static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) 430{ 431 int start, i, w, w2; 432 433 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 434 start = 0; 435 for (i = 0; i < sce->ics.max_sfb; i++) { 436 if (sce->zeroes[w*16 + i]) { 437 start += sce->ics.swb_sizes[i]; 438 continue; 439 } 440 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) 441 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128, 442 sce->ics.swb_sizes[i], 443 sce->sf_idx[w*16 + i], 444 sce->band_type[w*16 + i], 445 s->lambda); 446 start += sce->ics.swb_sizes[i]; 447 } 448 } 449} 450 451/** 452 * Encode one channel of audio data. 453 */ 454static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, 455 SingleChannelElement *sce, 456 int common_window) 457{ 458 put_bits(&s->pb, 8, sce->sf_idx[0]); 459 if (!common_window) 460 put_ics_info(s, &sce->ics); 461 encode_band_info(s, sce); 462 encode_scale_factors(avctx, s, sce); 463 encode_pulses(s, &sce->pulse); 464 put_bits(&s->pb, 1, 0); //tns 465 put_bits(&s->pb, 1, 0); //ssr 466 encode_spectral_coeffs(s, sce); 467 return 0; 468} 469 470/** 471 * Write some auxiliary information about the created AAC file. 472 */ 473static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, 474 const char *name) 475{ 476 int i, namelen, padbits; 477 478 namelen = strlen(name) + 2; 479 put_bits(&s->pb, 3, TYPE_FIL); 480 put_bits(&s->pb, 4, FFMIN(namelen, 15)); 481 if (namelen >= 15) 482 put_bits(&s->pb, 8, namelen - 16); 483 put_bits(&s->pb, 4, 0); //extension type - filler 484 padbits = 8 - (put_bits_count(&s->pb) & 7); 485 avpriv_align_put_bits(&s->pb); 486 for (i = 0; i < namelen - 2; i++) 487 put_bits(&s->pb, 8, name[i]); 488 put_bits(&s->pb, 12 - padbits, 0); 489} 490 491static int aac_encode_frame(AVCodecContext *avctx, 492 uint8_t *frame, int buf_size, void *data) 493{ 494 AACEncContext *s = avctx->priv_data; 495 int16_t *samples = s->samples, *samples2, *la; 496 ChannelElement *cpe; 497 int i, ch, w, g, chans, tag, start_ch; 498 int chan_el_counter[4]; 499 FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; 500 501 if (s->last_frame) 502 return 0; 503 if (data) { 504 if (!s->psypp) { 505 memcpy(s->samples + 1024 * avctx->channels, data, 506 1024 * avctx->channels * sizeof(s->samples[0])); 507 } else { 508 start_ch = 0; 509 samples2 = s->samples + 1024 * avctx->channels; 510 for (i = 0; i < s->chan_map[0]; i++) { 511 tag = s->chan_map[i+1]; 512 chans = tag == TYPE_CPE ? 2 : 1; 513 ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, 514 samples2 + start_ch, start_ch, chans); 515 start_ch += chans; 516 } 517 } 518 } 519 if (!avctx->frame_number) { 520 memcpy(s->samples, s->samples + 1024 * avctx->channels, 521 1024 * avctx->channels * sizeof(s->samples[0])); 522 return 0; 523 } 524 525 start_ch = 0; 526 for (i = 0; i < s->chan_map[0]; i++) { 527 FFPsyWindowInfo* wi = windows + start_ch; 528 tag = s->chan_map[i+1]; 529 chans = tag == TYPE_CPE ? 2 : 1; 530 cpe = &s->cpe[i]; 531 for (ch = 0; ch < chans; ch++) { 532 IndividualChannelStream *ics = &cpe->ch[ch].ics; 533 int cur_channel = start_ch + ch; 534 samples2 = samples + cur_channel; 535 la = samples2 + (448+64) * avctx->channels; 536 if (!data) 537 la = NULL; 538 if (tag == TYPE_LFE) { 539 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE; 540 wi[ch].window_shape = 0; 541 wi[ch].num_windows = 1; 542 wi[ch].grouping[0] = 1; 543 544 /* Only the lowest 12 coefficients are used in a LFE channel. 545 * The expression below results in only the bottom 8 coefficients 546 * being used for 11.025kHz to 16kHz sample rates. 547 */ 548 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3; 549 } else { 550 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel, 551 ics->window_sequence[0]); 552 } 553 ics->window_sequence[1] = ics->window_sequence[0]; 554 ics->window_sequence[0] = wi[ch].window_type[0]; 555 ics->use_kb_window[1] = ics->use_kb_window[0]; 556 ics->use_kb_window[0] = wi[ch].window_shape; 557 ics->num_windows = wi[ch].num_windows; 558 ics->swb_sizes = s->psy.bands [ics->num_windows == 8]; 559 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8]; 560 for (w = 0; w < ics->num_windows; w++) 561 ics->group_len[w] = wi[ch].grouping[w]; 562 563 apply_window_and_mdct(avctx, s, &cpe->ch[ch], samples2); 564 } 565 start_ch += chans; 566 } 567 do { 568 int frame_bits; 569 init_put_bits(&s->pb, frame, buf_size*8); 570 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)) 571 put_bitstream_info(avctx, s, LIBAVCODEC_IDENT); 572 start_ch = 0; 573 memset(chan_el_counter, 0, sizeof(chan_el_counter)); 574 for (i = 0; i < s->chan_map[0]; i++) { 575 FFPsyWindowInfo* wi = windows + start_ch; 576 const float *coeffs[2]; 577 tag = s->chan_map[i+1]; 578 chans = tag == TYPE_CPE ? 2 : 1; 579 cpe = &s->cpe[i]; 580 put_bits(&s->pb, 3, tag); 581 put_bits(&s->pb, 4, chan_el_counter[tag]++); 582 for (ch = 0; ch < chans; ch++) 583 coeffs[ch] = cpe->ch[ch].coeffs; 584 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi); 585 for (ch = 0; ch < chans; ch++) { 586 s->cur_channel = start_ch * 2 + ch; 587 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); 588 } 589 cpe->common_window = 0; 590 if (chans > 1 591 && wi[0].window_type[0] == wi[1].window_type[0] 592 && wi[0].window_shape == wi[1].window_shape) { 593 594 cpe->common_window = 1; 595 for (w = 0; w < wi[0].num_windows; w++) { 596 if (wi[0].grouping[w] != wi[1].grouping[w]) { 597 cpe->common_window = 0; 598 break; 599 } 600 } 601 } 602 s->cur_channel = start_ch * 2; 603 if (s->options.stereo_mode && cpe->common_window) { 604 if (s->options.stereo_mode > 0) { 605 IndividualChannelStream *ics = &cpe->ch[0].ics; 606 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) 607 for (g = 0; g < ics->num_swb; g++) 608 cpe->ms_mask[w*16+g] = 1; 609 } else if (s->coder->search_for_ms) { 610 s->coder->search_for_ms(s, cpe, s->lambda); 611 } 612 } 613 adjust_frame_information(s, cpe, chans); 614 if (chans == 2) { 615 put_bits(&s->pb, 1, cpe->common_window); 616 if (cpe->common_window) { 617 put_ics_info(s, &cpe->ch[0].ics); 618 encode_ms_info(&s->pb, cpe); 619 } 620 } 621 for (ch = 0; ch < chans; ch++) { 622 s->cur_channel = start_ch + ch; 623 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window); 624 } 625 start_ch += chans; 626 } 627 628 frame_bits = put_bits_count(&s->pb); 629 if (frame_bits <= 6144 * avctx->channels - 3) { 630 s->psy.bitres.bits = frame_bits / avctx->channels; 631 break; 632 } 633 634 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits; 635 636 } while (1); 637 638 put_bits(&s->pb, 3, TYPE_END); 639 flush_put_bits(&s->pb); 640 avctx->frame_bits = put_bits_count(&s->pb); 641 642 // rate control stuff 643 if (!(avctx->flags & CODEC_FLAG_QSCALE)) { 644 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits; 645 s->lambda *= ratio; 646 s->lambda = FFMIN(s->lambda, 65536.f); 647 } 648 649 if (!data) 650 s->last_frame = 1; 651 memcpy(s->samples, s->samples + 1024 * avctx->channels, 652 1024 * avctx->channels * sizeof(s->samples[0])); 653 return put_bits_count(&s->pb)>>3; 654} 655 656static av_cold int aac_encode_end(AVCodecContext *avctx) 657{ 658 AACEncContext *s = avctx->priv_data; 659 660 ff_mdct_end(&s->mdct1024); 661 ff_mdct_end(&s->mdct128); 662 ff_psy_end(&s->psy); 663 ff_psy_preprocess_end(s->psypp); 664 av_freep(&s->samples); 665 av_freep(&s->cpe); 666 return 0; 667} 668 669#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM 670static const AVOption aacenc_options[] = { 671 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"}, 672 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, 673 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, 674 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, 675 {NULL} 676}; 677 678static const AVClass aacenc_class = { 679 "AAC encoder", 680 av_default_item_name, 681 aacenc_options, 682 LIBAVUTIL_VERSION_INT, 683}; 684 685AVCodec ff_aac_encoder = { 686 .name = "aac", 687 .type = AVMEDIA_TYPE_AUDIO, 688 .id = CODEC_ID_AAC, 689 .priv_data_size = sizeof(AACEncContext), 690 .init = aac_encode_init, 691 .encode = aac_encode_frame, 692 .close = aac_encode_end, 693 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, 694 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, 695 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), 696 .priv_class = &aacenc_class, 697}; 698