1/* 2 * AAC encoder 3 * Copyright (C) 2008 Konstantin Shishkov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file libavcodec/aacenc.c 24 * AAC encoder 25 */ 26 27/*********************************** 28 * TODOs: 29 * psy model selection with some option 30 * add sane pulse detection 31 * add temporal noise shaping 32 ***********************************/ 33 34#include "avcodec.h" 35#include "bitstream.h" 36#include "dsputil.h" 37#include "mpeg4audio.h" 38 39#include "aacpsy.h" 40#include "aac.h" 41#include "aactab.h" 42 43static const uint8_t swb_size_1024_96[] = { 44 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 45 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44, 46 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 47}; 48 49static const uint8_t swb_size_1024_64[] = { 50 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 51 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36, 52 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 53}; 54 55static const uint8_t swb_size_1024_48[] = { 56 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 57 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 58 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 59 96 60}; 61 62static const uint8_t swb_size_1024_32[] = { 63 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 64 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 65 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 66}; 67 68static const uint8_t swb_size_1024_24[] = { 69 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 70 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28, 71 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64 72}; 73 74static const uint8_t swb_size_1024_16[] = { 75 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 76 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28, 77 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64 78}; 79 80static const uint8_t swb_size_1024_8[] = { 81 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 82 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28, 83 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80 84}; 85 86static const uint8_t * const swb_size_1024[] = { 87 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64, 88 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32, 89 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16, 90 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8 91}; 92 93static const uint8_t swb_size_128_96[] = { 94 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 95}; 96 97static const uint8_t swb_size_128_48[] = { 98 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16 99}; 100 101static const uint8_t swb_size_128_24[] = { 102 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20 103}; 104 105static const uint8_t swb_size_128_16[] = { 106 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20 107}; 108 109static const uint8_t swb_size_128_8[] = { 110 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20 111}; 112 113static const uint8_t * const swb_size_128[] = { 114 /* the last entry on the following row is swb_size_128_64 but is a 115 duplicate of swb_size_128_96 */ 116 swb_size_128_96, swb_size_128_96, swb_size_128_96, 117 swb_size_128_48, swb_size_128_48, swb_size_128_48, 118 swb_size_128_24, swb_size_128_24, swb_size_128_16, 119 swb_size_128_16, swb_size_128_16, swb_size_128_8 120}; 121 122/** bits needed to code codebook run value for long windows */ 123static const uint8_t run_value_bits_long[64] = { 124 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 125 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 126 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 127 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15 128}; 129 130/** bits needed to code codebook run value for short windows */ 131static const uint8_t run_value_bits_short[16] = { 132 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9 133}; 134 135static const uint8_t* const run_value_bits[2] = { 136 run_value_bits_long, run_value_bits_short 137}; 138 139/** default channel configurations */ 140static const uint8_t aac_chan_configs[6][5] = { 141 {1, TYPE_SCE}, // 1 channel - single channel element 142 {1, TYPE_CPE}, // 2 channels - channel pair 143 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo 144 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center 145 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo 146 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE 147}; 148 149/** 150 * structure used in optimal codebook search 151 */ 152typedef struct BandCodingPath { 153 int prev_idx; ///< pointer to the previous path point 154 int codebook; ///< codebook for coding band run 155 int bits; ///< number of bit needed to code given number of bands 156} BandCodingPath; 157 158/** 159 * AAC encoder context 160 */ 161typedef struct { 162 PutBitContext pb; 163 MDCTContext mdct1024; ///< long (1024 samples) frame transform context 164 MDCTContext mdct128; ///< short (128 samples) frame transform context 165 DSPContext dsp; 166 DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients 167 int16_t* samples; ///< saved preprocessed input 168 169 int samplerate_index; ///< MPEG-4 samplerate index 170 171 ChannelElement *cpe; ///< channel elements 172 AACPsyContext psy; ///< psychoacoustic model context 173 int last_frame; 174} AACEncContext; 175 176/** 177 * Make AAC audio config object. 178 * @see 1.6.2.1 "Syntax - AudioSpecificConfig" 179 */ 180static void put_audio_specific_config(AVCodecContext *avctx) 181{ 182 PutBitContext pb; 183 AACEncContext *s = avctx->priv_data; 184 185 init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8); 186 put_bits(&pb, 5, 2); //object type - AAC-LC 187 put_bits(&pb, 4, s->samplerate_index); //sample rate index 188 put_bits(&pb, 4, avctx->channels); 189 //GASpecificConfig 190 put_bits(&pb, 1, 0); //frame length - 1024 samples 191 put_bits(&pb, 1, 0); //does not depend on core coder 192 put_bits(&pb, 1, 0); //is not extension 193 flush_put_bits(&pb); 194} 195 196static av_cold int aac_encode_init(AVCodecContext *avctx) 197{ 198 AACEncContext *s = avctx->priv_data; 199 int i; 200 201 avctx->frame_size = 1024; 202 203 for(i = 0; i < 16; i++) 204 if(avctx->sample_rate == ff_mpeg4audio_sample_rates[i]) 205 break; 206 if(i == 16){ 207 av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate); 208 return -1; 209 } 210 if(avctx->channels > 6){ 211 av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels); 212 return -1; 213 } 214 s->samplerate_index = i; 215 216 dsputil_init(&s->dsp, avctx); 217 ff_mdct_init(&s->mdct1024, 11, 0); 218 ff_mdct_init(&s->mdct128, 8, 0); 219 // window init 220 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); 221 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); 222 ff_sine_window_init(ff_sine_1024, 1024); 223 ff_sine_window_init(ff_sine_128, 128); 224 225 s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0])); 226 s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]); 227 if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, 228 aac_chan_configs[avctx->channels-1][0], 0, 229 swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){ 230 av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n"); 231 return -1; 232 } 233 avctx->extradata = av_malloc(2); 234 avctx->extradata_size = 2; 235 put_audio_specific_config(avctx); 236 return 0; 237} 238 239/** 240 * Encode ics_info element. 241 * @see Table 4.6 (syntax of ics_info) 242 */ 243static void put_ics_info(AACEncContext *s, IndividualChannelStream *info) 244{ 245 int i; 246 247 put_bits(&s->pb, 1, 0); // ics_reserved bit 248 put_bits(&s->pb, 2, info->window_sequence[0]); 249 put_bits(&s->pb, 1, info->use_kb_window[0]); 250 if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){ 251 put_bits(&s->pb, 6, info->max_sfb); 252 put_bits(&s->pb, 1, 0); // no prediction 253 }else{ 254 put_bits(&s->pb, 4, info->max_sfb); 255 for(i = 1; i < info->num_windows; i++) 256 put_bits(&s->pb, 1, info->group_len[i]); 257 } 258} 259 260/** 261 * Calculate the number of bits needed to code all coefficient signs in current band. 262 */ 263static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce, 264 int group_len, int start, int size) 265{ 266 int bits = 0; 267 int i, w; 268 for(w = 0; w < group_len; w++){ 269 for(i = 0; i < size; i++){ 270 if(sce->icoefs[start + i]) 271 bits++; 272 } 273 start += 128; 274 } 275 return bits; 276} 277 278/** 279 * Encode pulse data. 280 */ 281static void encode_pulses(AACEncContext *s, Pulse *pulse) 282{ 283 int i; 284 285 put_bits(&s->pb, 1, !!pulse->num_pulse); 286 if(!pulse->num_pulse) return; 287 288 put_bits(&s->pb, 2, pulse->num_pulse - 1); 289 put_bits(&s->pb, 6, pulse->start); 290 for(i = 0; i < pulse->num_pulse; i++){ 291 put_bits(&s->pb, 5, pulse->pos[i]); 292 put_bits(&s->pb, 4, pulse->amp[i]); 293 } 294} 295 296/** 297 * Encode spectral coefficients processed by psychoacoustic model. 298 */ 299static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) 300{ 301 int start, i, w, w2, wg; 302 303 w = 0; 304 for(wg = 0; wg < sce->ics.num_window_groups; wg++){ 305 start = 0; 306 for(i = 0; i < sce->ics.max_sfb; i++){ 307 if(sce->zeroes[w*16 + i]){ 308 start += sce->ics.swb_sizes[i]; 309 continue; 310 } 311 for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){ 312 encode_band_coeffs(s, sce, start + w2*128, 313 sce->ics.swb_sizes[i], 314 sce->band_type[w*16 + i]); 315 } 316 start += sce->ics.swb_sizes[i]; 317 } 318 w += sce->ics.group_len[wg]; 319 } 320} 321 322/** 323 * Write some auxiliary information about the created AAC file. 324 */ 325static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name) 326{ 327 int i, namelen, padbits; 328 329 namelen = strlen(name) + 2; 330 put_bits(&s->pb, 3, TYPE_FIL); 331 put_bits(&s->pb, 4, FFMIN(namelen, 15)); 332 if(namelen >= 15) 333 put_bits(&s->pb, 8, namelen - 16); 334 put_bits(&s->pb, 4, 0); //extension type - filler 335 padbits = 8 - (put_bits_count(&s->pb) & 7); 336 align_put_bits(&s->pb); 337 for(i = 0; i < namelen - 2; i++) 338 put_bits(&s->pb, 8, name[i]); 339 put_bits(&s->pb, 12 - padbits, 0); 340} 341 342static av_cold int aac_encode_end(AVCodecContext *avctx) 343{ 344 AACEncContext *s = avctx->priv_data; 345 346 ff_mdct_end(&s->mdct1024); 347 ff_mdct_end(&s->mdct128); 348 ff_aac_psy_end(&s->psy); 349 av_freep(&s->samples); 350 av_freep(&s->cpe); 351 return 0; 352} 353 354AVCodec aac_encoder = { 355 "aac", 356 CODEC_TYPE_AUDIO, 357 CODEC_ID_AAC, 358 sizeof(AACEncContext), 359 aac_encode_init, 360 aac_encode_frame, 361 aac_encode_end, 362 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 363 .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, 364 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), 365}; 366