1/*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 *                        Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25/**
26 * @file
27 * G.722 ADPCM audio encoder
28 */
29
30#include "libavutil/avassert.h"
31#include "avcodec.h"
32#include "internal.h"
33#include "g722.h"
34#include "libavutil/common.h"
35
36#define FREEZE_INTERVAL 128
37
38/* This is an arbitrary value. Allowing insanely large values leads to strange
39   problems, so we limit it to a reasonable value */
40#define MAX_FRAME_SIZE 32768
41
42/* We clip the value of avctx->trellis to prevent data type overflows and
43   undefined behavior. Using larger values is insanely slow anyway. */
44#define MIN_TRELLIS 0
45#define MAX_TRELLIS 16
46
47static av_cold int g722_encode_close(AVCodecContext *avctx)
48{
49    G722Context *c = avctx->priv_data;
50    int i;
51    for (i = 0; i < 2; i++) {
52        av_freep(&c->paths[i]);
53        av_freep(&c->node_buf[i]);
54        av_freep(&c->nodep_buf[i]);
55    }
56    return 0;
57}
58
59static av_cold int g722_encode_init(AVCodecContext * avctx)
60{
61    G722Context *c = avctx->priv_data;
62    int ret;
63
64    if (avctx->channels != 1) {
65        av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66        return AVERROR_INVALIDDATA;
67    }
68
69    c->band[0].scale_factor = 8;
70    c->band[1].scale_factor = 2;
71    c->prev_samples_pos = 22;
72
73    if (avctx->trellis) {
74        int frontier = 1 << avctx->trellis;
75        int max_paths = frontier * FREEZE_INTERVAL;
76        int i;
77        for (i = 0; i < 2; i++) {
78            c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
79            c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
80            c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
81            if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82                ret = AVERROR(ENOMEM);
83                goto error;
84            }
85        }
86    }
87
88    if (avctx->frame_size) {
89        /* validate frame size */
90        if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91            int new_frame_size;
92
93            if (avctx->frame_size == 1)
94                new_frame_size = 2;
95            else if (avctx->frame_size > MAX_FRAME_SIZE)
96                new_frame_size = MAX_FRAME_SIZE;
97            else
98                new_frame_size = avctx->frame_size - 1;
99
100            av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101                   "allowed. Using %d instead of %d\n", new_frame_size,
102                   avctx->frame_size);
103            avctx->frame_size = new_frame_size;
104        }
105    } else {
106        /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107           a common packet size for VoIP applications */
108        avctx->frame_size = 320;
109    }
110    avctx->delay = 22;
111
112    if (avctx->trellis) {
113        /* validate trellis */
114        if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115            int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116            av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117                   "allowed. Using %d instead of %d\n", new_trellis,
118                   avctx->trellis);
119            avctx->trellis = new_trellis;
120        }
121    }
122
123    return 0;
124error:
125    g722_encode_close(avctx);
126    return ret;
127}
128
129static const int16_t low_quant[33] = {
130      35,   72,  110,  150,  190,  233,  276,  323,
131     370,  422,  473,  530,  587,  650,  714,  786,
132     858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
133    1765, 1980, 2195, 2557, 2919
134};
135
136static inline void filter_samples(G722Context *c, const int16_t *samples,
137                                  int *xlow, int *xhigh)
138{
139    int xout1, xout2;
140    c->prev_samples[c->prev_samples_pos++] = samples[0];
141    c->prev_samples[c->prev_samples_pos++] = samples[1];
142    ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
143    *xlow  = xout1 + xout2 >> 14;
144    *xhigh = xout1 - xout2 >> 14;
145    if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
146        memmove(c->prev_samples,
147                c->prev_samples + c->prev_samples_pos - 22,
148                22 * sizeof(c->prev_samples[0]));
149        c->prev_samples_pos = 22;
150    }
151}
152
153static inline int encode_high(const struct G722Band *state, int xhigh)
154{
155    int diff = av_clip_int16(xhigh - state->s_predictor);
156    int pred = 141 * state->scale_factor >> 8;
157           /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
158    return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
159}
160
161static inline int encode_low(const struct G722Band* state, int xlow)
162{
163    int diff  = av_clip_int16(xlow - state->s_predictor);
164           /* = diff >= 0 ? diff : -(diff + 1) */
165    int limit = diff ^ (diff >> (sizeof(diff)*8-1));
166    int i = 0;
167    limit = limit + 1 << 10;
168    if (limit > low_quant[8] * state->scale_factor)
169        i = 9;
170    while (i < 29 && limit > low_quant[i] * state->scale_factor)
171        i++;
172    return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
173}
174
175static void g722_encode_trellis(G722Context *c, int trellis,
176                                uint8_t *dst, int nb_samples,
177                                const int16_t *samples)
178{
179    int i, j, k;
180    int frontier = 1 << trellis;
181    struct TrellisNode **nodes[2];
182    struct TrellisNode **nodes_next[2];
183    int pathn[2] = {0, 0}, froze = -1;
184    struct TrellisPath *p[2];
185
186    for (i = 0; i < 2; i++) {
187        nodes[i] = c->nodep_buf[i];
188        nodes_next[i] = c->nodep_buf[i] + frontier;
189        memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
190        nodes[i][0] = c->node_buf[i] + frontier;
191        nodes[i][0]->ssd = 0;
192        nodes[i][0]->path = 0;
193        nodes[i][0]->state = c->band[i];
194    }
195
196    for (i = 0; i < nb_samples >> 1; i++) {
197        int xlow, xhigh;
198        struct TrellisNode *next[2];
199        int heap_pos[2] = {0, 0};
200
201        for (j = 0; j < 2; j++) {
202            next[j] = c->node_buf[j] + frontier*(i & 1);
203            memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
204        }
205
206        filter_samples(c, &samples[2*i], &xlow, &xhigh);
207
208        for (j = 0; j < frontier && nodes[0][j]; j++) {
209            /* Only k >> 2 affects the future adaptive state, therefore testing
210             * small steps that don't change k >> 2 is useless, the original
211             * value from encode_low is better than them. Since we step k
212             * in steps of 4, make sure range is a multiple of 4, so that
213             * we don't miss the original value from encode_low. */
214            int range = j < frontier/2 ? 4 : 0;
215            struct TrellisNode *cur_node = nodes[0][j];
216
217            int ilow = encode_low(&cur_node->state, xlow);
218
219            for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
220                int decoded, dec_diff, pos;
221                uint32_t ssd;
222                struct TrellisNode* node;
223
224                if (k < 0)
225                    continue;
226
227                decoded = av_clip((cur_node->state.scale_factor *
228                                  ff_g722_low_inv_quant6[k] >> 10)
229                                + cur_node->state.s_predictor, -16384, 16383);
230                dec_diff = xlow - decoded;
231
232#define STORE_NODE(index, UPDATE, VALUE)\
233                ssd = cur_node->ssd + dec_diff*dec_diff;\
234                /* Check for wraparound. Using 64 bit ssd counters would \
235                 * be simpler, but is slower on x86 32 bit. */\
236                if (ssd < cur_node->ssd)\
237                    continue;\
238                if (heap_pos[index] < frontier) {\
239                    pos = heap_pos[index]++;\
240                    av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
241                    node = nodes_next[index][pos] = next[index]++;\
242                    node->path = pathn[index]++;\
243                } else {\
244                    /* Try to replace one of the leaf nodes with the new \
245                     * one, but not always testing the same leaf position */\
246                    pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
247                    if (ssd >= nodes_next[index][pos]->ssd)\
248                        continue;\
249                    heap_pos[index]++;\
250                    node = nodes_next[index][pos];\
251                }\
252                node->ssd = ssd;\
253                node->state = cur_node->state;\
254                UPDATE;\
255                c->paths[index][node->path].value = VALUE;\
256                c->paths[index][node->path].prev = cur_node->path;\
257                /* Sift the newly inserted node up in the heap to restore \
258                 * the heap property */\
259                while (pos > 0) {\
260                    int parent = (pos - 1) >> 1;\
261                    if (nodes_next[index][parent]->ssd <= ssd)\
262                        break;\
263                    FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
264                                                nodes_next[index][pos]);\
265                    pos = parent;\
266                }
267                STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
268            }
269        }
270
271        for (j = 0; j < frontier && nodes[1][j]; j++) {
272            int ihigh;
273            struct TrellisNode *cur_node = nodes[1][j];
274
275            /* We don't try to get any initial guess for ihigh via
276             * encode_high - since there's only 4 possible values, test
277             * them all. Testing all of these gives a much, much larger
278             * gain than testing a larger range around ilow. */
279            for (ihigh = 0; ihigh < 4; ihigh++) {
280                int dhigh, decoded, dec_diff, pos;
281                uint32_t ssd;
282                struct TrellisNode* node;
283
284                dhigh = cur_node->state.scale_factor *
285                        ff_g722_high_inv_quant[ihigh] >> 10;
286                decoded = av_clip(dhigh + cur_node->state.s_predictor,
287                                  -16384, 16383);
288                dec_diff = xhigh - decoded;
289
290                STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
291            }
292        }
293
294        for (j = 0; j < 2; j++) {
295            FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
296
297            if (nodes[j][0]->ssd > (1 << 16)) {
298                for (k = 1; k < frontier && nodes[j][k]; k++)
299                    nodes[j][k]->ssd -= nodes[j][0]->ssd;
300                nodes[j][0]->ssd = 0;
301            }
302        }
303
304        if (i == froze + FREEZE_INTERVAL) {
305            p[0] = &c->paths[0][nodes[0][0]->path];
306            p[1] = &c->paths[1][nodes[1][0]->path];
307            for (j = i; j > froze; j--) {
308                dst[j] = p[1]->value << 6 | p[0]->value;
309                p[0] = &c->paths[0][p[0]->prev];
310                p[1] = &c->paths[1][p[1]->prev];
311            }
312            froze = i;
313            pathn[0] = pathn[1] = 0;
314            memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
315            memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
316        }
317    }
318
319    p[0] = &c->paths[0][nodes[0][0]->path];
320    p[1] = &c->paths[1][nodes[1][0]->path];
321    for (j = i; j > froze; j--) {
322        dst[j] = p[1]->value << 6 | p[0]->value;
323        p[0] = &c->paths[0][p[0]->prev];
324        p[1] = &c->paths[1][p[1]->prev];
325    }
326    c->band[0] = nodes[0][0]->state;
327    c->band[1] = nodes[1][0]->state;
328}
329
330static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
331                                         const int16_t *samples)
332{
333    int xlow, xhigh, ilow, ihigh;
334    filter_samples(c, samples, &xlow, &xhigh);
335    ihigh = encode_high(&c->band[1], xhigh);
336    ilow  = encode_low (&c->band[0], xlow);
337    ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
338                                ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
339    ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
340    *dst = ihigh << 6 | ilow;
341}
342
343static void g722_encode_no_trellis(G722Context *c,
344                                   uint8_t *dst, int nb_samples,
345                                   const int16_t *samples)
346{
347    int i;
348    for (i = 0; i < nb_samples; i += 2)
349        encode_byte(c, dst++, &samples[i]);
350}
351
352static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
353                             const AVFrame *frame, int *got_packet_ptr)
354{
355    G722Context *c = avctx->priv_data;
356    const int16_t *samples = (const int16_t *)frame->data[0];
357    int nb_samples, out_size, ret;
358
359    out_size = (frame->nb_samples + 1) / 2;
360    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
361        return ret;
362
363    nb_samples = frame->nb_samples - (frame->nb_samples & 1);
364
365    if (avctx->trellis)
366        g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
367    else
368        g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
369
370    /* handle last frame with odd frame_size */
371    if (nb_samples < frame->nb_samples) {
372        int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
373        encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
374    }
375
376    if (frame->pts != AV_NOPTS_VALUE)
377        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
378    *got_packet_ptr = 1;
379    return 0;
380}
381
382AVCodec ff_adpcm_g722_encoder = {
383    .name           = "g722",
384    .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
385    .type           = AVMEDIA_TYPE_AUDIO,
386    .id             = AV_CODEC_ID_ADPCM_G722,
387    .priv_data_size = sizeof(G722Context),
388    .init           = g722_encode_init,
389    .close          = g722_encode_close,
390    .encode2        = g722_encode_frame,
391    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
392    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
393                                                     AV_SAMPLE_FMT_NONE },
394};
395