1/*
2 * AAC encoder
3 * Copyright (C) 2008 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file libavcodec/aacenc.c
24 * AAC encoder
25 */
26
27/***********************************
28 *              TODOs:
29 * psy model selection with some option
30 * add sane pulse detection
31 * add temporal noise shaping
32 ***********************************/
33
34#include "avcodec.h"
35#include "bitstream.h"
36#include "dsputil.h"
37#include "mpeg4audio.h"
38
39#include "aacpsy.h"
40#include "aac.h"
41#include "aactab.h"
42
43static const uint8_t swb_size_1024_96[] = {
44    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
45    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
46    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
47};
48
49static const uint8_t swb_size_1024_64[] = {
50    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
51    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
52    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
53};
54
55static const uint8_t swb_size_1024_48[] = {
56    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
57    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
58    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
59    96
60};
61
62static const uint8_t swb_size_1024_32[] = {
63    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
64    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
65    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
66};
67
68static const uint8_t swb_size_1024_24[] = {
69    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
70    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
71    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
72};
73
74static const uint8_t swb_size_1024_16[] = {
75    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
76    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
77    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
78};
79
80static const uint8_t swb_size_1024_8[] = {
81    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
82    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
83    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
84};
85
86static const uint8_t * const swb_size_1024[] = {
87    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
88    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
89    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
90    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
91};
92
93static const uint8_t swb_size_128_96[] = {
94    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
95};
96
97static const uint8_t swb_size_128_48[] = {
98    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
99};
100
101static const uint8_t swb_size_128_24[] = {
102    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
103};
104
105static const uint8_t swb_size_128_16[] = {
106    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
107};
108
109static const uint8_t swb_size_128_8[] = {
110    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
111};
112
113static const uint8_t * const swb_size_128[] = {
114    /* the last entry on the following row is swb_size_128_64 but is a
115       duplicate of swb_size_128_96 */
116    swb_size_128_96, swb_size_128_96, swb_size_128_96,
117    swb_size_128_48, swb_size_128_48, swb_size_128_48,
118    swb_size_128_24, swb_size_128_24, swb_size_128_16,
119    swb_size_128_16, swb_size_128_16, swb_size_128_8
120};
121
122/** bits needed to code codebook run value for long windows */
123static const uint8_t run_value_bits_long[64] = {
124     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
125     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
126    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
127    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
128};
129
130/** bits needed to code codebook run value for short windows */
131static const uint8_t run_value_bits_short[16] = {
132    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
133};
134
135static const uint8_t* const run_value_bits[2] = {
136    run_value_bits_long, run_value_bits_short
137};
138
139/** default channel configurations */
140static const uint8_t aac_chan_configs[6][5] = {
141 {1, TYPE_SCE},                               // 1 channel  - single channel element
142 {1, TYPE_CPE},                               // 2 channels - channel pair
143 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
144 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
145 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
146 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
147};
148
149/**
150 * structure used in optimal codebook search
151 */
152typedef struct BandCodingPath {
153    int prev_idx; ///< pointer to the previous path point
154    int codebook; ///< codebook for coding band run
155    int bits;     ///< number of bit needed to code given number of bands
156} BandCodingPath;
157
158/**
159 * AAC encoder context
160 */
161typedef struct {
162    PutBitContext pb;
163    MDCTContext mdct1024;                        ///< long (1024 samples) frame transform context
164    MDCTContext mdct128;                         ///< short (128 samples) frame transform context
165    DSPContext  dsp;
166    DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
167    int16_t* samples;                            ///< saved preprocessed input
168
169    int samplerate_index;                        ///< MPEG-4 samplerate index
170
171    ChannelElement *cpe;                         ///< channel elements
172    AACPsyContext psy;                           ///< psychoacoustic model context
173    int last_frame;
174} AACEncContext;
175
176/**
177 * Make AAC audio config object.
178 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
179 */
180static void put_audio_specific_config(AVCodecContext *avctx)
181{
182    PutBitContext pb;
183    AACEncContext *s = avctx->priv_data;
184
185    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
186    put_bits(&pb, 5, 2); //object type - AAC-LC
187    put_bits(&pb, 4, s->samplerate_index); //sample rate index
188    put_bits(&pb, 4, avctx->channels);
189    //GASpecificConfig
190    put_bits(&pb, 1, 0); //frame length - 1024 samples
191    put_bits(&pb, 1, 0); //does not depend on core coder
192    put_bits(&pb, 1, 0); //is not extension
193    flush_put_bits(&pb);
194}
195
196static av_cold int aac_encode_init(AVCodecContext *avctx)
197{
198    AACEncContext *s = avctx->priv_data;
199    int i;
200
201    avctx->frame_size = 1024;
202
203    for(i = 0; i < 16; i++)
204        if(avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
205            break;
206    if(i == 16){
207        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
208        return -1;
209    }
210    if(avctx->channels > 6){
211        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
212        return -1;
213    }
214    s->samplerate_index = i;
215
216    dsputil_init(&s->dsp, avctx);
217    ff_mdct_init(&s->mdct1024, 11, 0);
218    ff_mdct_init(&s->mdct128,   8, 0);
219    // window init
220    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
221    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
222    ff_sine_window_init(ff_sine_1024, 1024);
223    ff_sine_window_init(ff_sine_128, 128);
224
225    s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
226    s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
227    if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
228                       aac_chan_configs[avctx->channels-1][0], 0,
229                       swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){
230        av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
231        return -1;
232    }
233    avctx->extradata = av_malloc(2);
234    avctx->extradata_size = 2;
235    put_audio_specific_config(avctx);
236    return 0;
237}
238
239/**
240 * Encode ics_info element.
241 * @see Table 4.6 (syntax of ics_info)
242 */
243static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
244{
245    int i;
246
247    put_bits(&s->pb, 1, 0);                // ics_reserved bit
248    put_bits(&s->pb, 2, info->window_sequence[0]);
249    put_bits(&s->pb, 1, info->use_kb_window[0]);
250    if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){
251        put_bits(&s->pb, 6, info->max_sfb);
252        put_bits(&s->pb, 1, 0);            // no prediction
253    }else{
254        put_bits(&s->pb, 4, info->max_sfb);
255        for(i = 1; i < info->num_windows; i++)
256            put_bits(&s->pb, 1, info->group_len[i]);
257    }
258}
259
260/**
261 * Calculate the number of bits needed to code all coefficient signs in current band.
262 */
263static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce,
264                                    int group_len, int start, int size)
265{
266    int bits = 0;
267    int i, w;
268    for(w = 0; w < group_len; w++){
269        for(i = 0; i < size; i++){
270            if(sce->icoefs[start + i])
271                bits++;
272        }
273        start += 128;
274    }
275    return bits;
276}
277
278/**
279 * Encode pulse data.
280 */
281static void encode_pulses(AACEncContext *s, Pulse *pulse)
282{
283    int i;
284
285    put_bits(&s->pb, 1, !!pulse->num_pulse);
286    if(!pulse->num_pulse) return;
287
288    put_bits(&s->pb, 2, pulse->num_pulse - 1);
289    put_bits(&s->pb, 6, pulse->start);
290    for(i = 0; i < pulse->num_pulse; i++){
291        put_bits(&s->pb, 5, pulse->pos[i]);
292        put_bits(&s->pb, 4, pulse->amp[i]);
293    }
294}
295
296/**
297 * Encode spectral coefficients processed by psychoacoustic model.
298 */
299static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
300{
301    int start, i, w, w2, wg;
302
303    w = 0;
304    for(wg = 0; wg < sce->ics.num_window_groups; wg++){
305        start = 0;
306        for(i = 0; i < sce->ics.max_sfb; i++){
307            if(sce->zeroes[w*16 + i]){
308                start += sce->ics.swb_sizes[i];
309                continue;
310            }
311            for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){
312                encode_band_coeffs(s, sce, start + w2*128,
313                                   sce->ics.swb_sizes[i],
314                                   sce->band_type[w*16 + i]);
315            }
316            start += sce->ics.swb_sizes[i];
317        }
318        w += sce->ics.group_len[wg];
319    }
320}
321
322/**
323 * Write some auxiliary information about the created AAC file.
324 */
325static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name)
326{
327    int i, namelen, padbits;
328
329    namelen = strlen(name) + 2;
330    put_bits(&s->pb, 3, TYPE_FIL);
331    put_bits(&s->pb, 4, FFMIN(namelen, 15));
332    if(namelen >= 15)
333        put_bits(&s->pb, 8, namelen - 16);
334    put_bits(&s->pb, 4, 0); //extension type - filler
335    padbits = 8 - (put_bits_count(&s->pb) & 7);
336    align_put_bits(&s->pb);
337    for(i = 0; i < namelen - 2; i++)
338        put_bits(&s->pb, 8, name[i]);
339    put_bits(&s->pb, 12 - padbits, 0);
340}
341
342static av_cold int aac_encode_end(AVCodecContext *avctx)
343{
344    AACEncContext *s = avctx->priv_data;
345
346    ff_mdct_end(&s->mdct1024);
347    ff_mdct_end(&s->mdct128);
348    ff_aac_psy_end(&s->psy);
349    av_freep(&s->samples);
350    av_freep(&s->cpe);
351    return 0;
352}
353
354AVCodec aac_encoder = {
355    "aac",
356    CODEC_TYPE_AUDIO,
357    CODEC_ID_AAC,
358    sizeof(AACEncContext),
359    aac_encode_init,
360    aac_encode_frame,
361    aac_encode_end,
362    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
363    .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
364    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
365};
366