1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "libavutil/common.h"
39#include "libavutil/float_dsp.h"
40#include "libavutil/mathematics.h"
41
42#include "audio_frame_queue.h"
43#include "avcodec.h"
44#include "fft.h"
45#include "internal.h"
46#include "nellymoser.h"
47#include "sinewin.h"
48
49#define BITSTREAM_WRITER_LE
50#include "put_bits.h"
51
52#define POW_TABLE_SIZE (1<<11)
53#define POW_TABLE_OFFSET 3
54#define OPT_SIZE ((1<<15) + 3000)
55
56typedef struct NellyMoserEncodeContext {
57    AVCodecContext  *avctx;
58    int             last_frame;
59    AVFloatDSPContext fdsp;
60    FFTContext      mdct_ctx;
61    AudioFrameQueue afq;
62    DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63    DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64    DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
65    float           (*opt )[OPT_SIZE];
66    uint8_t         (*path)[OPT_SIZE];
67} NellyMoserEncodeContext;
68
69static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
70
71static const uint8_t sf_lut[96] = {
72     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
73     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
74    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78};
79
80static const uint8_t sf_delta_lut[78] = {
81     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
82     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
83    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86};
87
88static const uint8_t quant_lut[230] = {
89     0,
90
91     0,  1,  2,
92
93     0,  1,  2,  3,  4,  5,  6,
94
95     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
96    12, 13, 13, 13, 14,
97
98     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
99     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101    30,
102
103     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
104     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
105    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112    61, 61, 61, 61, 62,
113};
114
115static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
116static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
117static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118
119static void apply_mdct(NellyMoserEncodeContext *s)
120{
121    float *in0 = s->buf;
122    float *in1 = s->buf + NELLY_BUF_LEN;
123    float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124
125    s->fdsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
126    s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128
129    s->fdsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
130    s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
132}
133
134static av_cold int encode_end(AVCodecContext *avctx)
135{
136    NellyMoserEncodeContext *s = avctx->priv_data;
137
138    ff_mdct_end(&s->mdct_ctx);
139
140    if (s->avctx->trellis) {
141        av_free(s->opt);
142        av_free(s->path);
143    }
144    ff_af_queue_close(&s->afq);
145
146    return 0;
147}
148
149static av_cold int encode_init(AVCodecContext *avctx)
150{
151    NellyMoserEncodeContext *s = avctx->priv_data;
152    int i, ret;
153
154    if (avctx->channels != 1) {
155        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
156        return AVERROR(EINVAL);
157    }
158
159    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
160        avctx->sample_rate != 11025 &&
161        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
162        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
163        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
164        return AVERROR(EINVAL);
165    }
166
167    avctx->frame_size = NELLY_SAMPLES;
168    avctx->delay      = NELLY_BUF_LEN;
169    ff_af_queue_init(avctx, &s->afq);
170    s->avctx = avctx;
171    if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
172        goto error;
173    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
174
175    /* Generate overlap window */
176    ff_init_ff_sine_windows(7);
177    for (i = 0; i < POW_TABLE_SIZE; i++)
178        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
179
180    if (s->avctx->trellis) {
181        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
182        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
183        if (!s->opt || !s->path) {
184            ret = AVERROR(ENOMEM);
185            goto error;
186        }
187    }
188
189    return 0;
190error:
191    encode_end(avctx);
192    return ret;
193}
194
195#define find_best(val, table, LUT, LUT_add, LUT_size) \
196    best_idx = \
197        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
198    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
199        best_idx++;
200
201static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
202{
203    int band, best_idx, power_idx = 0;
204    float power_candidate;
205
206    //base exponent
207    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
208    idx_table[0] = best_idx;
209    power_idx = ff_nelly_init_table[best_idx];
210
211    for (band = 1; band < NELLY_BANDS; band++) {
212        power_candidate = cand[band] - power_idx;
213        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
214        idx_table[band] = best_idx;
215        power_idx += ff_nelly_delta_table[best_idx];
216    }
217}
218
219static inline float distance(float x, float y, int band)
220{
221    //return pow(fabs(x-y), 2.0);
222    float tmp = x - y;
223    return tmp * tmp;
224}
225
226static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
227{
228    int i, j, band, best_idx;
229    float power_candidate, best_val;
230
231    float  (*opt )[OPT_SIZE] = s->opt ;
232    uint8_t(*path)[OPT_SIZE] = s->path;
233
234    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
235        opt[0][i] = INFINITY;
236    }
237
238    for (i = 0; i < 64; i++) {
239        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
240        path[0][ff_nelly_init_table[i]] = i;
241    }
242
243    for (band = 1; band < NELLY_BANDS; band++) {
244        int q, c = 0;
245        float tmp;
246        int idx_min, idx_max, idx;
247        power_candidate = cand[band];
248        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
249            idx_min = FFMAX(0, cand[band] - q);
250            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
251            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
252                if ( isinf(opt[band - 1][i]) )
253                    continue;
254                for (j = 0; j < 32; j++) {
255                    idx = i + ff_nelly_delta_table[j];
256                    if (idx > idx_max)
257                        break;
258                    if (idx >= idx_min) {
259                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
260                        if (opt[band][idx] > tmp) {
261                            opt[band][idx] = tmp;
262                            path[band][idx] = j;
263                            c = 1;
264                        }
265                    }
266                }
267            }
268        }
269        assert(c); //FIXME
270    }
271
272    best_val = INFINITY;
273    best_idx = -1;
274    band = NELLY_BANDS - 1;
275    for (i = 0; i < OPT_SIZE; i++) {
276        if (best_val > opt[band][i]) {
277            best_val = opt[band][i];
278            best_idx = i;
279        }
280    }
281    for (band = NELLY_BANDS - 1; band >= 0; band--) {
282        idx_table[band] = path[band][best_idx];
283        if (band) {
284            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
285        }
286    }
287}
288
289/**
290 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
291 *  @param s               encoder context
292 *  @param output          output buffer
293 *  @param output_size     size of output buffer
294 */
295static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
296{
297    PutBitContext pb;
298    int i, j, band, block, best_idx, power_idx = 0;
299    float power_val, coeff, coeff_sum;
300    float pows[NELLY_FILL_LEN];
301    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
302    float cand[NELLY_BANDS];
303
304    apply_mdct(s);
305
306    init_put_bits(&pb, output, output_size * 8);
307
308    i = 0;
309    for (band = 0; band < NELLY_BANDS; band++) {
310        coeff_sum = 0;
311        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
312            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
313                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
314        }
315        cand[band] =
316            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
317    }
318
319    if (s->avctx->trellis) {
320        get_exponent_dynamic(s, cand, idx_table);
321    } else {
322        get_exponent_greedy(s, cand, idx_table);
323    }
324
325    i = 0;
326    for (band = 0; band < NELLY_BANDS; band++) {
327        if (band) {
328            power_idx += ff_nelly_delta_table[idx_table[band]];
329            put_bits(&pb, 5, idx_table[band]);
330        } else {
331            power_idx = ff_nelly_init_table[idx_table[0]];
332            put_bits(&pb, 6, idx_table[0]);
333        }
334        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
335        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
336            s->mdct_out[i] *= power_val;
337            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
338            pows[i] = power_idx;
339        }
340    }
341
342    ff_nelly_get_sample_bits(pows, bits);
343
344    for (block = 0; block < 2; block++) {
345        for (i = 0; i < NELLY_FILL_LEN; i++) {
346            if (bits[i] > 0) {
347                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
348                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
349                best_idx =
350                    quant_lut[av_clip (
351                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
352                            quant_lut_offset[bits[i]],
353                            quant_lut_offset[bits[i]+1] - 1
354                            )];
355                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
356                    best_idx++;
357
358                put_bits(&pb, bits[i], best_idx);
359            }
360        }
361        if (!block)
362            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
363    }
364
365    flush_put_bits(&pb);
366    memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
367}
368
369static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
370                        const AVFrame *frame, int *got_packet_ptr)
371{
372    NellyMoserEncodeContext *s = avctx->priv_data;
373    int ret;
374
375    if (s->last_frame)
376        return 0;
377
378    memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
379    if (frame) {
380        memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
381               frame->nb_samples * sizeof(*s->buf));
382        if (frame->nb_samples < NELLY_SAMPLES) {
383            memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
384                   (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
385            if (frame->nb_samples >= NELLY_BUF_LEN)
386                s->last_frame = 1;
387        }
388        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
389            return ret;
390    } else {
391        memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
392        s->last_frame = 1;
393    }
394
395    if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
396        return ret;
397    encode_block(s, avpkt->data, avpkt->size);
398
399    /* Get the next frame pts/duration */
400    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
401                       &avpkt->duration);
402
403    *got_packet_ptr = 1;
404    return 0;
405}
406
407AVCodec ff_nellymoser_encoder = {
408    .name           = "nellymoser",
409    .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
410    .type           = AVMEDIA_TYPE_AUDIO,
411    .id             = AV_CODEC_ID_NELLYMOSER,
412    .priv_data_size = sizeof(NellyMoserEncodeContext),
413    .init           = encode_init,
414    .encode2        = encode_frame,
415    .close          = encode_end,
416    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
417    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
418                                                     AV_SAMPLE_FMT_NONE },
419};
420