1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "libavutil/mathematics.h"
39#include "nellymoser.h"
40#include "avcodec.h"
41#include "dsputil.h"
42#include "fft.h"
43#include "sinewin.h"
44
45#define BITSTREAM_WRITER_LE
46#include "put_bits.h"
47
48#define POW_TABLE_SIZE (1<<11)
49#define POW_TABLE_OFFSET 3
50#define OPT_SIZE ((1<<15) + 3000)
51
52typedef struct NellyMoserEncodeContext {
53    AVCodecContext  *avctx;
54    int             last_frame;
55    int             bufsel;
56    int             have_saved;
57    DSPContext      dsp;
58    FFTContext      mdct_ctx;
59    DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
60    DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
61    DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
62    float           (*opt )[NELLY_BANDS];
63    uint8_t         (*path)[NELLY_BANDS];
64} NellyMoserEncodeContext;
65
66static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
67
68static const uint8_t sf_lut[96] = {
69     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
70     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
71    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
72    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
73    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
74    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
75};
76
77static const uint8_t sf_delta_lut[78] = {
78     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
79     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
80    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
81    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
82    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
83};
84
85static const uint8_t quant_lut[230] = {
86     0,
87
88     0,  1,  2,
89
90     0,  1,  2,  3,  4,  5,  6,
91
92     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
93    12, 13, 13, 13, 14,
94
95     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
96     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
97    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
98    30,
99
100     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
101     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
102    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
103    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
104    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
105    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
106    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
107    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
108    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
109    61, 61, 61, 61, 62,
110};
111
112static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
113static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
114static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
115
116static void apply_mdct(NellyMoserEncodeContext *s)
117{
118    s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
119    s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
120                               NELLY_BUF_LEN);
121    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
122
123    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
124                       ff_sine_128, NELLY_BUF_LEN);
125    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
126                               NELLY_BUF_LEN);
127    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
128}
129
130static av_cold int encode_init(AVCodecContext *avctx)
131{
132    NellyMoserEncodeContext *s = avctx->priv_data;
133    int i;
134
135    if (avctx->channels != 1) {
136        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
137        return -1;
138    }
139
140    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
141        avctx->sample_rate != 11025 &&
142        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
143        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
144        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
145        return -1;
146    }
147
148    avctx->frame_size = NELLY_SAMPLES;
149    s->avctx = avctx;
150    ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
151    dsputil_init(&s->dsp, avctx);
152
153    /* Generate overlap window */
154    ff_sine_window_init(ff_sine_128, 128);
155    for (i = 0; i < POW_TABLE_SIZE; i++)
156        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
157
158    if (s->avctx->trellis) {
159        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
160        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
161    }
162
163    return 0;
164}
165
166static av_cold int encode_end(AVCodecContext *avctx)
167{
168    NellyMoserEncodeContext *s = avctx->priv_data;
169
170    ff_mdct_end(&s->mdct_ctx);
171
172    if (s->avctx->trellis) {
173        av_free(s->opt);
174        av_free(s->path);
175    }
176
177    return 0;
178}
179
180#define find_best(val, table, LUT, LUT_add, LUT_size) \
181    best_idx = \
182        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
183    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
184        best_idx++;
185
186static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
187{
188    int band, best_idx, power_idx = 0;
189    float power_candidate;
190
191    //base exponent
192    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
193    idx_table[0] = best_idx;
194    power_idx = ff_nelly_init_table[best_idx];
195
196    for (band = 1; band < NELLY_BANDS; band++) {
197        power_candidate = cand[band] - power_idx;
198        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
199        idx_table[band] = best_idx;
200        power_idx += ff_nelly_delta_table[best_idx];
201    }
202}
203
204static inline float distance(float x, float y, int band)
205{
206    //return pow(fabs(x-y), 2.0);
207    float tmp = x - y;
208    return tmp * tmp;
209}
210
211static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
212{
213    int i, j, band, best_idx;
214    float power_candidate, best_val;
215
216    float  (*opt )[NELLY_BANDS] = s->opt ;
217    uint8_t(*path)[NELLY_BANDS] = s->path;
218
219    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
220        opt[0][i] = INFINITY;
221    }
222
223    for (i = 0; i < 64; i++) {
224        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
225        path[0][ff_nelly_init_table[i]] = i;
226    }
227
228    for (band = 1; band < NELLY_BANDS; band++) {
229        int q, c = 0;
230        float tmp;
231        int idx_min, idx_max, idx;
232        power_candidate = cand[band];
233        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
234            idx_min = FFMAX(0, cand[band] - q);
235            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
236            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
237                if ( isinf(opt[band - 1][i]) )
238                    continue;
239                for (j = 0; j < 32; j++) {
240                    idx = i + ff_nelly_delta_table[j];
241                    if (idx > idx_max)
242                        break;
243                    if (idx >= idx_min) {
244                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
245                        if (opt[band][idx] > tmp) {
246                            opt[band][idx] = tmp;
247                            path[band][idx] = j;
248                            c = 1;
249                        }
250                    }
251                }
252            }
253        }
254        assert(c); //FIXME
255    }
256
257    best_val = INFINITY;
258    best_idx = -1;
259    band = NELLY_BANDS - 1;
260    for (i = 0; i < OPT_SIZE; i++) {
261        if (best_val > opt[band][i]) {
262            best_val = opt[band][i];
263            best_idx = i;
264        }
265    }
266    for (band = NELLY_BANDS - 1; band >= 0; band--) {
267        idx_table[band] = path[band][best_idx];
268        if (band) {
269            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
270        }
271    }
272}
273
274/**
275 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
276 *  @param s               encoder context
277 *  @param output          output buffer
278 *  @param output_size     size of output buffer
279 */
280static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
281{
282    PutBitContext pb;
283    int i, j, band, block, best_idx, power_idx = 0;
284    float power_val, coeff, coeff_sum;
285    float pows[NELLY_FILL_LEN];
286    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
287    float cand[NELLY_BANDS];
288
289    apply_mdct(s);
290
291    init_put_bits(&pb, output, output_size * 8);
292
293    i = 0;
294    for (band = 0; band < NELLY_BANDS; band++) {
295        coeff_sum = 0;
296        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
297            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
298                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
299        }
300        cand[band] =
301            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
302    }
303
304    if (s->avctx->trellis) {
305        get_exponent_dynamic(s, cand, idx_table);
306    } else {
307        get_exponent_greedy(s, cand, idx_table);
308    }
309
310    i = 0;
311    for (band = 0; band < NELLY_BANDS; band++) {
312        if (band) {
313            power_idx += ff_nelly_delta_table[idx_table[band]];
314            put_bits(&pb, 5, idx_table[band]);
315        } else {
316            power_idx = ff_nelly_init_table[idx_table[0]];
317            put_bits(&pb, 6, idx_table[0]);
318        }
319        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
320        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321            s->mdct_out[i] *= power_val;
322            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
323            pows[i] = power_idx;
324        }
325    }
326
327    ff_nelly_get_sample_bits(pows, bits);
328
329    for (block = 0; block < 2; block++) {
330        for (i = 0; i < NELLY_FILL_LEN; i++) {
331            if (bits[i] > 0) {
332                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
333                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
334                best_idx =
335                    quant_lut[av_clip (
336                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
337                            quant_lut_offset[bits[i]],
338                            quant_lut_offset[bits[i]+1] - 1
339                            )];
340                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
341                    best_idx++;
342
343                put_bits(&pb, bits[i], best_idx);
344            }
345        }
346        if (!block)
347            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
348    }
349
350    flush_put_bits(&pb);
351}
352
353static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
354{
355    NellyMoserEncodeContext *s = avctx->priv_data;
356    const float *samples = data;
357    int i;
358
359    if (s->last_frame)
360        return 0;
361
362    if (data) {
363        memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
364        for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
365            s->buf[s->bufsel][i] = 0;
366        }
367        s->bufsel = 1 - s->bufsel;
368        if (!s->have_saved) {
369            s->have_saved = 1;
370            return 0;
371        }
372    } else {
373        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374        s->bufsel = 1 - s->bufsel;
375        s->last_frame = 1;
376    }
377
378    if (s->have_saved) {
379        encode_block(s, frame, buf_size);
380        return NELLY_BLOCK_LEN;
381    }
382    return 0;
383}
384
385AVCodec ff_nellymoser_encoder = {
386    .name = "nellymoser",
387    .type = AVMEDIA_TYPE_AUDIO,
388    .id = CODEC_ID_NELLYMOSER,
389    .priv_data_size = sizeof(NellyMoserEncodeContext),
390    .init = encode_init,
391    .encode = encode_frame,
392    .close = encode_end,
393    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
394    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
396};
397