1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "nellymoser.h"
39#include "avcodec.h"
40#include "dsputil.h"
41#include "fft.h"
42
43#define BITSTREAM_WRITER_LE
44#include "put_bits.h"
45
46#define POW_TABLE_SIZE (1<<11)
47#define POW_TABLE_OFFSET 3
48#define OPT_SIZE ((1<<15) + 3000)
49
50typedef struct NellyMoserEncodeContext {
51    AVCodecContext  *avctx;
52    int             last_frame;
53    int             bufsel;
54    int             have_saved;
55    DSPContext      dsp;
56    FFTContext      mdct_ctx;
57    DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
58    DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
59    DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
60    float           (*opt )[NELLY_BANDS];
61    uint8_t         (*path)[NELLY_BANDS];
62} NellyMoserEncodeContext;
63
64static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
65
66static const uint8_t sf_lut[96] = {
67     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
68     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
69    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
70    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
71    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
72    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
73};
74
75static const uint8_t sf_delta_lut[78] = {
76     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
77     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
78    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
79    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
80    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
81};
82
83static const uint8_t quant_lut[230] = {
84     0,
85
86     0,  1,  2,
87
88     0,  1,  2,  3,  4,  5,  6,
89
90     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
91    12, 13, 13, 13, 14,
92
93     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
94     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
95    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
96    30,
97
98     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
99     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
100    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
101    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
102    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
103    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
104    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
105    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
106    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
107    61, 61, 61, 61, 62,
108};
109
110static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
111static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
112static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
113
114static void apply_mdct(NellyMoserEncodeContext *s)
115{
116    memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
117    s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN);
118    s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
119                               NELLY_BUF_LEN);
120    ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
121
122    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
123    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
124                               NELLY_BUF_LEN);
125    ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
126}
127
128static av_cold int encode_init(AVCodecContext *avctx)
129{
130    NellyMoserEncodeContext *s = avctx->priv_data;
131    int i;
132
133    if (avctx->channels != 1) {
134        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
135        return -1;
136    }
137
138    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
139        avctx->sample_rate != 11025 &&
140        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
141        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
142        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
143        return -1;
144    }
145
146    avctx->frame_size = NELLY_SAMPLES;
147    s->avctx = avctx;
148    ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
149    dsputil_init(&s->dsp, avctx);
150
151    /* Generate overlap window */
152    ff_sine_window_init(ff_sine_128, 128);
153    for (i = 0; i < POW_TABLE_SIZE; i++)
154        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
155
156    if (s->avctx->trellis) {
157        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
158        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
159    }
160
161    return 0;
162}
163
164static av_cold int encode_end(AVCodecContext *avctx)
165{
166    NellyMoserEncodeContext *s = avctx->priv_data;
167
168    ff_mdct_end(&s->mdct_ctx);
169
170    if (s->avctx->trellis) {
171        av_free(s->opt);
172        av_free(s->path);
173    }
174
175    return 0;
176}
177
178#define find_best(val, table, LUT, LUT_add, LUT_size) \
179    best_idx = \
180        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
181    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
182        best_idx++;
183
184static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
185{
186    int band, best_idx, power_idx = 0;
187    float power_candidate;
188
189    //base exponent
190    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
191    idx_table[0] = best_idx;
192    power_idx = ff_nelly_init_table[best_idx];
193
194    for (band = 1; band < NELLY_BANDS; band++) {
195        power_candidate = cand[band] - power_idx;
196        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
197        idx_table[band] = best_idx;
198        power_idx += ff_nelly_delta_table[best_idx];
199    }
200}
201
202static inline float distance(float x, float y, int band)
203{
204    //return pow(fabs(x-y), 2.0);
205    float tmp = x - y;
206    return tmp * tmp;
207}
208
209static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
210{
211    int i, j, band, best_idx;
212    float power_candidate, best_val;
213
214    float  (*opt )[NELLY_BANDS] = s->opt ;
215    uint8_t(*path)[NELLY_BANDS] = s->path;
216
217    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
218        opt[0][i] = INFINITY;
219    }
220
221    for (i = 0; i < 64; i++) {
222        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
223        path[0][ff_nelly_init_table[i]] = i;
224    }
225
226    for (band = 1; band < NELLY_BANDS; band++) {
227        int q, c = 0;
228        float tmp;
229        int idx_min, idx_max, idx;
230        power_candidate = cand[band];
231        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
232            idx_min = FFMAX(0, cand[band] - q);
233            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
234            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
235                if ( isinf(opt[band - 1][i]) )
236                    continue;
237                for (j = 0; j < 32; j++) {
238                    idx = i + ff_nelly_delta_table[j];
239                    if (idx > idx_max)
240                        break;
241                    if (idx >= idx_min) {
242                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
243                        if (opt[band][idx] > tmp) {
244                            opt[band][idx] = tmp;
245                            path[band][idx] = j;
246                            c = 1;
247                        }
248                    }
249                }
250            }
251        }
252        assert(c); //FIXME
253    }
254
255    best_val = INFINITY;
256    best_idx = -1;
257    band = NELLY_BANDS - 1;
258    for (i = 0; i < OPT_SIZE; i++) {
259        if (best_val > opt[band][i]) {
260            best_val = opt[band][i];
261            best_idx = i;
262        }
263    }
264    for (band = NELLY_BANDS - 1; band >= 0; band--) {
265        idx_table[band] = path[band][best_idx];
266        if (band) {
267            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
268        }
269    }
270}
271
272/**
273 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
274 *  @param s               encoder context
275 *  @param output          output buffer
276 *  @param output_size     size of output buffer
277 */
278static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
279{
280    PutBitContext pb;
281    int i, j, band, block, best_idx, power_idx = 0;
282    float power_val, coeff, coeff_sum;
283    float pows[NELLY_FILL_LEN];
284    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
285    float cand[NELLY_BANDS];
286
287    apply_mdct(s);
288
289    init_put_bits(&pb, output, output_size * 8);
290
291    i = 0;
292    for (band = 0; band < NELLY_BANDS; band++) {
293        coeff_sum = 0;
294        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
295            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
296                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
297        }
298        cand[band] =
299            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
300    }
301
302    if (s->avctx->trellis) {
303        get_exponent_dynamic(s, cand, idx_table);
304    } else {
305        get_exponent_greedy(s, cand, idx_table);
306    }
307
308    i = 0;
309    for (band = 0; band < NELLY_BANDS; band++) {
310        if (band) {
311            power_idx += ff_nelly_delta_table[idx_table[band]];
312            put_bits(&pb, 5, idx_table[band]);
313        } else {
314            power_idx = ff_nelly_init_table[idx_table[0]];
315            put_bits(&pb, 6, idx_table[0]);
316        }
317        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
318        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
319            s->mdct_out[i] *= power_val;
320            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
321            pows[i] = power_idx;
322        }
323    }
324
325    ff_nelly_get_sample_bits(pows, bits);
326
327    for (block = 0; block < 2; block++) {
328        for (i = 0; i < NELLY_FILL_LEN; i++) {
329            if (bits[i] > 0) {
330                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
331                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
332                best_idx =
333                    quant_lut[av_clip (
334                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
335                            quant_lut_offset[bits[i]],
336                            quant_lut_offset[bits[i]+1] - 1
337                            )];
338                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
339                    best_idx++;
340
341                put_bits(&pb, bits[i], best_idx);
342            }
343        }
344        if (!block)
345            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
346    }
347
348    flush_put_bits(&pb);
349}
350
351static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
352{
353    NellyMoserEncodeContext *s = avctx->priv_data;
354    int16_t *samples = data;
355    int i;
356
357    if (s->last_frame)
358        return 0;
359
360    if (data) {
361        for (i = 0; i < avctx->frame_size; i++) {
362            s->buf[s->bufsel][i] = samples[i];
363        }
364        for (; i < NELLY_SAMPLES; i++) {
365            s->buf[s->bufsel][i] = 0;
366        }
367        s->bufsel = 1 - s->bufsel;
368        if (!s->have_saved) {
369            s->have_saved = 1;
370            return 0;
371        }
372    } else {
373        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374        s->bufsel = 1 - s->bufsel;
375        s->last_frame = 1;
376    }
377
378    if (s->have_saved) {
379        encode_block(s, frame, buf_size);
380        return NELLY_BLOCK_LEN;
381    }
382    return 0;
383}
384
385AVCodec nellymoser_encoder = {
386    .name = "nellymoser",
387    .type = AVMEDIA_TYPE_AUDIO,
388    .id = CODEC_ID_NELLYMOSER,
389    .priv_data_size = sizeof(NellyMoserEncodeContext),
390    .init = encode_init,
391    .encode = encode_frame,
392    .close = encode_end,
393    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
394    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395};
396