1/*
2 * TTA (The Lossless True Audio) decoder
3 * Copyright (c) 2006 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * TTA (The Lossless True Audio) decoder
25 * (www.true-audio.com or tta.corecodec.org)
26 * @author Alex Beregszaszi
27 *
28 */
29
30#define ALT_BITSTREAM_READER_LE
31//#define DEBUG
32#include <limits.h>
33#include "avcodec.h"
34#include "get_bits.h"
35
36#define FORMAT_INT 1
37#define FORMAT_FLOAT 3
38
39typedef struct TTAContext {
40    AVCodecContext *avctx;
41    GetBitContext gb;
42
43    int flags, channels, bps, is_float, data_length;
44    int frame_length, last_frame_length, total_frames;
45
46    int32_t *decode_buffer;
47} TTAContext;
48
49#if 0
50static inline int shift_1(int i)
51{
52    if (i < 32)
53        return 1 << i;
54    else
55        return 0x80000000; // 16 << 31
56}
57
58static inline int shift_16(int i)
59{
60    if (i < 28)
61        return 16 << i;
62    else
63        return 0x80000000; // 16 << 27
64}
65#else
66static const uint32_t shift_1[] = {
67    0x00000001, 0x00000002, 0x00000004, 0x00000008,
68    0x00000010, 0x00000020, 0x00000040, 0x00000080,
69    0x00000100, 0x00000200, 0x00000400, 0x00000800,
70    0x00001000, 0x00002000, 0x00004000, 0x00008000,
71    0x00010000, 0x00020000, 0x00040000, 0x00080000,
72    0x00100000, 0x00200000, 0x00400000, 0x00800000,
73    0x01000000, 0x02000000, 0x04000000, 0x08000000,
74    0x10000000, 0x20000000, 0x40000000, 0x80000000,
75    0x80000000, 0x80000000, 0x80000000, 0x80000000,
76    0x80000000, 0x80000000, 0x80000000, 0x80000000
77};
78
79static const uint32_t * const shift_16 = shift_1 + 4;
80#endif
81
82#define MAX_ORDER 16
83typedef struct TTAFilter {
84    int32_t shift, round, error, mode;
85    int32_t qm[MAX_ORDER];
86    int32_t dx[MAX_ORDER];
87    int32_t dl[MAX_ORDER];
88} TTAFilter;
89
90static const int32_t ttafilter_configs[4][2] = {
91    {10, 1},
92    {9, 1},
93    {10, 1},
94    {12, 0}
95};
96
97static void ttafilter_init(TTAFilter *c, int32_t shift, int32_t mode) {
98    memset(c, 0, sizeof(TTAFilter));
99    c->shift = shift;
100   c->round = shift_1[shift-1];
101//    c->round = 1 << (shift - 1);
102    c->mode = mode;
103}
104
105// FIXME: copy paste from original
106static inline void memshl(register int32_t *a, register int32_t *b) {
107    *a++ = *b++;
108    *a++ = *b++;
109    *a++ = *b++;
110    *a++ = *b++;
111    *a++ = *b++;
112    *a++ = *b++;
113    *a++ = *b++;
114    *a = *b;
115}
116
117// FIXME: copy paste from original
118// mode=1 encoder, mode=0 decoder
119static inline void ttafilter_process(TTAFilter *c, int32_t *in, int32_t mode) {
120    register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
121
122    if (!c->error) {
123        sum += *dl++ * *qm, qm++;
124        sum += *dl++ * *qm, qm++;
125        sum += *dl++ * *qm, qm++;
126        sum += *dl++ * *qm, qm++;
127        sum += *dl++ * *qm, qm++;
128        sum += *dl++ * *qm, qm++;
129        sum += *dl++ * *qm, qm++;
130        sum += *dl++ * *qm, qm++;
131        dx += 8;
132    } else if(c->error < 0) {
133        sum += *dl++ * (*qm -= *dx++), qm++;
134        sum += *dl++ * (*qm -= *dx++), qm++;
135        sum += *dl++ * (*qm -= *dx++), qm++;
136        sum += *dl++ * (*qm -= *dx++), qm++;
137        sum += *dl++ * (*qm -= *dx++), qm++;
138        sum += *dl++ * (*qm -= *dx++), qm++;
139        sum += *dl++ * (*qm -= *dx++), qm++;
140        sum += *dl++ * (*qm -= *dx++), qm++;
141    } else {
142        sum += *dl++ * (*qm += *dx++), qm++;
143        sum += *dl++ * (*qm += *dx++), qm++;
144        sum += *dl++ * (*qm += *dx++), qm++;
145        sum += *dl++ * (*qm += *dx++), qm++;
146        sum += *dl++ * (*qm += *dx++), qm++;
147        sum += *dl++ * (*qm += *dx++), qm++;
148        sum += *dl++ * (*qm += *dx++), qm++;
149        sum += *dl++ * (*qm += *dx++), qm++;
150    }
151
152    *(dx-0) = ((*(dl-1) >> 30) | 1) << 2;
153    *(dx-1) = ((*(dl-2) >> 30) | 1) << 1;
154    *(dx-2) = ((*(dl-3) >> 30) | 1) << 1;
155    *(dx-3) = ((*(dl-4) >> 30) | 1);
156
157    // compress
158    if (mode) {
159        *dl = *in;
160        *in -= (sum >> c->shift);
161        c->error = *in;
162    } else {
163        c->error = *in;
164        *in += (sum >> c->shift);
165        *dl = *in;
166    }
167
168    if (c->mode) {
169        *(dl-1) = *dl - *(dl-1);
170        *(dl-2) = *(dl-1) - *(dl-2);
171        *(dl-3) = *(dl-2) - *(dl-3);
172    }
173
174    memshl(c->dl, c->dl + 1);
175    memshl(c->dx, c->dx + 1);
176}
177
178typedef struct TTARice {
179    uint32_t k0, k1, sum0, sum1;
180} TTARice;
181
182static void rice_init(TTARice *c, uint32_t k0, uint32_t k1)
183{
184    c->k0 = k0;
185    c->k1 = k1;
186    c->sum0 = shift_16[k0];
187    c->sum1 = shift_16[k1];
188}
189
190static int tta_get_unary(GetBitContext *gb)
191{
192    int ret = 0;
193
194    // count ones
195    while(get_bits1(gb))
196        ret++;
197    return ret;
198}
199
200static av_cold int tta_decode_init(AVCodecContext * avctx)
201{
202    TTAContext *s = avctx->priv_data;
203    int i;
204
205    s->avctx = avctx;
206
207    // 30bytes includes a seektable with one frame
208    if (avctx->extradata_size < 30)
209        return -1;
210
211    init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size);
212    if (show_bits_long(&s->gb, 32) == AV_RL32("TTA1"))
213    {
214        /* signature */
215        skip_bits(&s->gb, 32);
216//        if (get_bits_long(&s->gb, 32) != bswap_32(AV_RL32("TTA1"))) {
217//            av_log(s->avctx, AV_LOG_ERROR, "Missing magic\n");
218//            return -1;
219//        }
220
221        s->flags = get_bits(&s->gb, 16);
222        if (s->flags != 1 && s->flags != 3)
223        {
224            av_log(s->avctx, AV_LOG_ERROR, "Invalid flags\n");
225            return -1;
226        }
227        s->is_float = (s->flags == FORMAT_FLOAT);
228        avctx->channels = s->channels = get_bits(&s->gb, 16);
229        avctx->bits_per_coded_sample = get_bits(&s->gb, 16);
230        s->bps = (avctx->bits_per_coded_sample + 7) / 8;
231        avctx->sample_rate = get_bits_long(&s->gb, 32);
232        if(avctx->sample_rate > 1000000){ //prevent FRAME_TIME * avctx->sample_rate from overflowing and sanity check
233            av_log(avctx, AV_LOG_ERROR, "sample_rate too large\n");
234            return -1;
235        }
236        s->data_length = get_bits_long(&s->gb, 32);
237        skip_bits(&s->gb, 32); // CRC32 of header
238
239        if (s->is_float)
240        {
241            avctx->sample_fmt = SAMPLE_FMT_FLT;
242            av_log(s->avctx, AV_LOG_ERROR, "Unsupported sample format. Please contact the developers.\n");
243            return -1;
244        }
245        else switch(s->bps) {
246//            case 1: avctx->sample_fmt = SAMPLE_FMT_U8; break;
247            case 2: avctx->sample_fmt = SAMPLE_FMT_S16; break;
248//            case 3: avctx->sample_fmt = SAMPLE_FMT_S24; break;
249            case 4: avctx->sample_fmt = SAMPLE_FMT_S32; break;
250            default:
251                av_log(s->avctx, AV_LOG_ERROR, "Invalid/unsupported sample format. Please contact the developers.\n");
252                return -1;
253        }
254
255        // FIXME: horribly broken, but directly from reference source
256#define FRAME_TIME 1.04489795918367346939
257        s->frame_length = (int)(FRAME_TIME * avctx->sample_rate);
258
259        s->last_frame_length = s->data_length % s->frame_length;
260        s->total_frames = s->data_length / s->frame_length +
261                        (s->last_frame_length ? 1 : 0);
262
263        av_log(s->avctx, AV_LOG_DEBUG, "flags: %x chans: %d bps: %d rate: %d block: %d\n",
264            s->flags, avctx->channels, avctx->bits_per_coded_sample, avctx->sample_rate,
265            avctx->block_align);
266        av_log(s->avctx, AV_LOG_DEBUG, "data_length: %d frame_length: %d last: %d total: %d\n",
267            s->data_length, s->frame_length, s->last_frame_length, s->total_frames);
268
269        // FIXME: seek table
270        for (i = 0; i < s->total_frames; i++)
271            skip_bits(&s->gb, 32);
272        skip_bits(&s->gb, 32); // CRC32 of seektable
273
274        if(s->frame_length >= UINT_MAX / (s->channels * sizeof(int32_t))){
275            av_log(avctx, AV_LOG_ERROR, "frame_length too large\n");
276            return -1;
277        }
278
279        s->decode_buffer = av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
280    } else {
281        av_log(avctx, AV_LOG_ERROR, "Wrong extradata present\n");
282        return -1;
283    }
284
285    return 0;
286}
287
288static int tta_decode_frame(AVCodecContext *avctx,
289        void *data, int *data_size,
290        AVPacket *avpkt)
291{
292    const uint8_t *buf = avpkt->data;
293    int buf_size = avpkt->size;
294    TTAContext *s = avctx->priv_data;
295    int i;
296
297    init_get_bits(&s->gb, buf, buf_size*8);
298    {
299        int32_t predictors[s->channels];
300        TTAFilter filters[s->channels];
301        TTARice rices[s->channels];
302        int cur_chan = 0, framelen = s->frame_length;
303        int32_t *p;
304
305        if (*data_size < (framelen * s->channels * 2)) {
306            av_log(avctx, AV_LOG_ERROR, "Output buffer size is too small.\n");
307            return -1;
308        }
309        // FIXME: seeking
310        s->total_frames--;
311        if (!s->total_frames && s->last_frame_length)
312            framelen = s->last_frame_length;
313
314        // init per channel states
315        for (i = 0; i < s->channels; i++) {
316            predictors[i] = 0;
317            ttafilter_init(&(filters[i]), ttafilter_configs[s->bps-1][0], ttafilter_configs[s->bps-1][1]);
318            rice_init(&(rices[i]), 10, 10);
319        }
320
321        for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++) {
322            int32_t *predictor = &(predictors[cur_chan]);
323            TTAFilter *filter = &(filters[cur_chan]);
324            TTARice *rice = &(rices[cur_chan]);
325            uint32_t unary, depth, k;
326            int32_t value;
327
328            unary = tta_get_unary(&s->gb);
329
330            if (unary == 0) {
331                depth = 0;
332                k = rice->k0;
333            } else {
334                depth = 1;
335                k = rice->k1;
336                unary--;
337            }
338
339            if (get_bits_left(&s->gb) < k)
340                return -1;
341
342            if (k) {
343                if (k > MIN_CACHE_BITS)
344                    return -1;
345                value = (unary << k) + get_bits(&s->gb, k);
346            } else
347                value = unary;
348
349            // FIXME: copy paste from original
350            switch (depth) {
351            case 1:
352                rice->sum1 += value - (rice->sum1 >> 4);
353                if (rice->k1 > 0 && rice->sum1 < shift_16[rice->k1])
354                    rice->k1--;
355                else if(rice->sum1 > shift_16[rice->k1 + 1])
356                    rice->k1++;
357                value += shift_1[rice->k0];
358            default:
359                rice->sum0 += value - (rice->sum0 >> 4);
360                if (rice->k0 > 0 && rice->sum0 < shift_16[rice->k0])
361                    rice->k0--;
362                else if(rice->sum0 > shift_16[rice->k0 + 1])
363                    rice->k0++;
364            }
365
366            // extract coded value
367#define UNFOLD(x) (((x)&1) ? (++(x)>>1) : (-(x)>>1))
368            *p = UNFOLD(value);
369
370            // run hybrid filter
371            ttafilter_process(filter, p, 0);
372
373            // fixed order prediction
374#define PRED(x, k) (int32_t)((((uint64_t)x << k) - x) >> k)
375            switch (s->bps) {
376                case 1: *p += PRED(*predictor, 4); break;
377                case 2:
378                case 3: *p += PRED(*predictor, 5); break;
379                case 4: *p += *predictor; break;
380            }
381            *predictor = *p;
382
383#if 0
384            // extract 32bit float from last two int samples
385            if (s->is_float && ((p - data) & 1)) {
386                uint32_t neg = *p & 0x80000000;
387                uint32_t hi = *(p - 1);
388                uint32_t lo = abs(*p) - 1;
389
390                hi += (hi || lo) ? 0x3f80 : 0;
391                // SWAP16: swap all the 16 bits
392                *(p - 1) = (hi << 16) | SWAP16(lo) | neg;
393            }
394#endif
395
396            /*if ((get_bits_count(&s->gb)+7)/8 > buf_size)
397            {
398                av_log(NULL, AV_LOG_INFO, "overread!!\n");
399                break;
400            }*/
401
402            // flip channels
403            if (cur_chan < (s->channels-1))
404                cur_chan++;
405            else {
406                // decorrelate in case of stereo integer
407                if (!s->is_float && (s->channels > 1)) {
408                    int32_t *r = p - 1;
409                    for (*p += *r / 2; r > p - s->channels; r--)
410                        *r = *(r + 1) - *r;
411                }
412                cur_chan = 0;
413            }
414        }
415
416        if (get_bits_left(&s->gb) < 32)
417            return -1;
418        skip_bits(&s->gb, 32); // frame crc
419
420        // convert to output buffer
421        switch(s->bps) {
422            case 2: {
423                uint16_t *samples = data;
424                for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++) {
425//                    *samples++ = (unsigned char)*p;
426//                    *samples++ = (unsigned char)(*p >> 8);
427                    *samples++ = *p;
428                }
429                *data_size = (uint8_t *)samples - (uint8_t *)data;
430                break;
431            }
432            default:
433                av_log(s->avctx, AV_LOG_ERROR, "Error, only 16bit samples supported!\n");
434        }
435    }
436
437//    return get_bits_count(&s->gb)+7)/8;
438    return buf_size;
439}
440
441static av_cold int tta_decode_close(AVCodecContext *avctx) {
442    TTAContext *s = avctx->priv_data;
443
444    if (s->decode_buffer)
445        av_free(s->decode_buffer);
446
447    return 0;
448}
449
450AVCodec tta_decoder = {
451    "tta",
452    AVMEDIA_TYPE_AUDIO,
453    CODEC_ID_TTA,
454    sizeof(TTAContext),
455    tta_decode_init,
456    NULL,
457    tta_decode_close,
458    tta_decode_frame,
459    .long_name = NULL_IF_CONFIG_SMALL("True Audio (TTA)"),
460};
461