1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file libavcodec/nellymoserenc.c
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "nellymoser.h"
39#include "avcodec.h"
40#include "dsputil.h"
41
42#define BITSTREAM_WRITER_LE
43#include "bitstream.h"
44
45#define POW_TABLE_SIZE (1<<11)
46#define POW_TABLE_OFFSET 3
47#define OPT_SIZE ((1<<15) + 3000)
48
49typedef struct NellyMoserEncodeContext {
50    AVCodecContext  *avctx;
51    int             last_frame;
52    int             bufsel;
53    int             have_saved;
54    DSPContext      dsp;
55    MDCTContext     mdct_ctx;
56    DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
57    DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]);     ///< sample buffer
58    float           (*opt )[NELLY_BANDS];
59    uint8_t         (*path)[NELLY_BANDS];
60} NellyMoserEncodeContext;
61
62static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
63
64static const uint8_t sf_lut[96] = {
65     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
66     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
67    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
68    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
69    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
70    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
71};
72
73static const uint8_t sf_delta_lut[78] = {
74     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
75     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
76    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
77    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
78    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
79};
80
81static const uint8_t quant_lut[230] = {
82     0,
83
84     0,  1,  2,
85
86     0,  1,  2,  3,  4,  5,  6,
87
88     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
89    12, 13, 13, 13, 14,
90
91     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
92     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
93    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
94    30,
95
96     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
97     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
98    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
99    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
100    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
101    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
102    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
103    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
104    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
105    61, 61, 61, 61, 62,
106};
107
108static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
109static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
110static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
111
112void apply_mdct(NellyMoserEncodeContext *s)
113{
114    DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
115
116    memcpy(in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
117    s->dsp.vector_fmul(in_buff, ff_sine_128, NELLY_BUF_LEN);
118    s->dsp.vector_fmul_reverse(in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
119                               NELLY_BUF_LEN);
120    ff_mdct_calc(&s->mdct_ctx, s->mdct_out, in_buff);
121
122    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
123    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
124                               NELLY_BUF_LEN);
125    ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
126}
127
128static av_cold int encode_init(AVCodecContext *avctx)
129{
130    NellyMoserEncodeContext *s = avctx->priv_data;
131    int i;
132
133    if (avctx->channels != 1) {
134        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
135        return -1;
136    }
137
138    if (avctx->sample_rate != 8000 && avctx->sample_rate != 11025 &&
139        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
140        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
141        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
142        return -1;
143    }
144
145    avctx->frame_size = NELLY_SAMPLES;
146    s->avctx = avctx;
147    ff_mdct_init(&s->mdct_ctx, 8, 0);
148    dsputil_init(&s->dsp, avctx);
149
150    /* Generate overlap window */
151    ff_sine_window_init(ff_sine_128, 128);
152    for (i = 0; i < POW_TABLE_SIZE; i++)
153        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
154
155    if (s->avctx->trellis) {
156        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
157        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
158    }
159
160    return 0;
161}
162
163static av_cold int encode_end(AVCodecContext *avctx)
164{
165    NellyMoserEncodeContext *s = avctx->priv_data;
166
167    ff_mdct_end(&s->mdct_ctx);
168
169    if (s->avctx->trellis) {
170        av_free(s->opt);
171        av_free(s->path);
172    }
173
174    return 0;
175}
176
177#define find_best(val, table, LUT, LUT_add, LUT_size) \
178    best_idx = \
179        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
180    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
181        best_idx++;
182
183static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
184{
185    int band, best_idx, power_idx = 0;
186    float power_candidate;
187
188    //base exponent
189    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
190    idx_table[0] = best_idx;
191    power_idx = ff_nelly_init_table[best_idx];
192
193    for (band = 1; band < NELLY_BANDS; band++) {
194        power_candidate = cand[band] - power_idx;
195        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
196        idx_table[band] = best_idx;
197        power_idx += ff_nelly_delta_table[best_idx];
198    }
199}
200
201static inline float distance(float x, float y, int band)
202{
203    //return pow(fabs(x-y), 2.0);
204    float tmp = x - y;
205    return tmp * tmp;
206}
207
208static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
209{
210    int i, j, band, best_idx;
211    float power_candidate, best_val;
212
213    float  (*opt )[NELLY_BANDS] = s->opt ;
214    uint8_t(*path)[NELLY_BANDS] = s->path;
215
216    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
217        opt[0][i] = INFINITY;
218    }
219
220    for (i = 0; i < 64; i++) {
221        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
222        path[0][ff_nelly_init_table[i]] = i;
223    }
224
225    for (band = 1; band < NELLY_BANDS; band++) {
226        int q, c = 0;
227        float tmp;
228        int idx_min, idx_max, idx;
229        power_candidate = cand[band];
230        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
231            idx_min = FFMAX(0, cand[band] - q);
232            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
233            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
234                if ( isinf(opt[band - 1][i]) )
235                    continue;
236                for (j = 0; j < 32; j++) {
237                    idx = i + ff_nelly_delta_table[j];
238                    if (idx > idx_max)
239                        break;
240                    if (idx >= idx_min) {
241                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
242                        if (opt[band][idx] > tmp) {
243                            opt[band][idx] = tmp;
244                            path[band][idx] = j;
245                            c = 1;
246                        }
247                    }
248                }
249            }
250        }
251        assert(c); //FIXME
252    }
253
254    best_val = INFINITY;
255    best_idx = -1;
256    band = NELLY_BANDS - 1;
257    for (i = 0; i < OPT_SIZE; i++) {
258        if (best_val > opt[band][i]) {
259            best_val = opt[band][i];
260            best_idx = i;
261        }
262    }
263    for (band = NELLY_BANDS - 1; band >= 0; band--) {
264        idx_table[band] = path[band][best_idx];
265        if (band) {
266            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
267        }
268    }
269}
270
271/**
272 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
273 *  @param s               encoder context
274 *  @param output          output buffer
275 *  @param output_size     size of output buffer
276 */
277static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
278{
279    PutBitContext pb;
280    int i, j, band, block, best_idx, power_idx = 0;
281    float power_val, coeff, coeff_sum;
282    float pows[NELLY_FILL_LEN];
283    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
284    float cand[NELLY_BANDS];
285
286    apply_mdct(s);
287
288    init_put_bits(&pb, output, output_size * 8);
289
290    i = 0;
291    for (band = 0; band < NELLY_BANDS; band++) {
292        coeff_sum = 0;
293        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
294            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
295                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
296        }
297        cand[band] =
298            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
299    }
300
301    if (s->avctx->trellis) {
302        get_exponent_dynamic(s, cand, idx_table);
303    } else {
304        get_exponent_greedy(s, cand, idx_table);
305    }
306
307    i = 0;
308    for (band = 0; band < NELLY_BANDS; band++) {
309        if (band) {
310            power_idx += ff_nelly_delta_table[idx_table[band]];
311            put_bits(&pb, 5, idx_table[band]);
312        } else {
313            power_idx = ff_nelly_init_table[idx_table[0]];
314            put_bits(&pb, 6, idx_table[0]);
315        }
316        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
317        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
318            s->mdct_out[i] *= power_val;
319            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
320            pows[i] = power_idx;
321        }
322    }
323
324    ff_nelly_get_sample_bits(pows, bits);
325
326    for (block = 0; block < 2; block++) {
327        for (i = 0; i < NELLY_FILL_LEN; i++) {
328            if (bits[i] > 0) {
329                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
330                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
331                best_idx =
332                    quant_lut[av_clip (
333                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
334                            quant_lut_offset[bits[i]],
335                            quant_lut_offset[bits[i]+1] - 1
336                            )];
337                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
338                    best_idx++;
339
340                put_bits(&pb, bits[i], best_idx);
341            }
342        }
343        if (!block)
344            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
345    }
346
347    flush_put_bits(&pb);
348}
349
350static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
351{
352    NellyMoserEncodeContext *s = avctx->priv_data;
353    int16_t *samples = data;
354    int i;
355
356    if (s->last_frame)
357        return 0;
358
359    if (data) {
360        for (i = 0; i < avctx->frame_size; i++) {
361            s->buf[s->bufsel][i] = samples[i];
362        }
363        for (; i < NELLY_SAMPLES; i++) {
364            s->buf[s->bufsel][i] = 0;
365        }
366        s->bufsel = 1 - s->bufsel;
367        if (!s->have_saved) {
368            s->have_saved = 1;
369            return 0;
370        }
371    } else {
372        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
373        s->bufsel = 1 - s->bufsel;
374        s->last_frame = 1;
375    }
376
377    if (s->have_saved) {
378        encode_block(s, frame, buf_size);
379        return NELLY_BLOCK_LEN;
380    }
381    return 0;
382}
383
384AVCodec nellymoser_encoder = {
385    .name = "nellymoser",
386    .type = CODEC_TYPE_AUDIO,
387    .id = CODEC_ID_NELLYMOSER,
388    .priv_data_size = sizeof(NellyMoserEncodeContext),
389    .init = encode_init,
390    .encode = encode_frame,
391    .close = encode_end,
392    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
393    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
394};
395