1/*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6 *
7 * AAC LATM decoder
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28/**
29 * @file
30 * AAC decoder
31 * @author Oded Shimon  ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 */
34
35/*
36 * supported tools
37 *
38 * Support?             Name
39 * N (code in SoC repo) gain control
40 * Y                    block switching
41 * Y                    window shapes - standard
42 * N                    window shapes - Low Delay
43 * Y                    filterbank - standard
44 * N (code in SoC repo) filterbank - Scalable Sample Rate
45 * Y                    Temporal Noise Shaping
46 * Y                    Long Term Prediction
47 * Y                    intensity stereo
48 * Y                    channel coupling
49 * Y                    frequency domain prediction
50 * Y                    Perceptual Noise Substitution
51 * Y                    Mid/Side stereo
52 * N                    Scalable Inverse AAC Quantization
53 * N                    Frequency Selective Switch
54 * N                    upsampling filter
55 * Y                    quantization & coding - AAC
56 * N                    quantization & coding - TwinVQ
57 * N                    quantization & coding - BSAC
58 * N                    AAC Error Resilience tools
59 * N                    Error Resilience payload syntax
60 * N                    Error Protection tool
61 * N                    CELP
62 * N                    Silence Compression
63 * N                    HVXC
64 * N                    HVXC 4kbits/s VR
65 * N                    Structured Audio tools
66 * N                    Structured Audio Sample Bank Format
67 * N                    MIDI
68 * N                    Harmonic and Individual Lines plus Noise
69 * N                    Text-To-Speech Interface
70 * Y                    Spectral Band Replication
71 * Y (not in this code) Layer-1
72 * Y (not in this code) Layer-2
73 * Y (not in this code) Layer-3
74 * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
75 * Y                    Parametric Stereo
76 * N                    Direct Stream Transfer
77 * Y                    Enhanced AAC Low Delay (ER AAC ELD)
78 *
79 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
80 *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
81           Parametric Stereo.
82 */
83
84#include "libavutil/float_dsp.h"
85#include "libavutil/opt.h"
86#include "avcodec.h"
87#include "internal.h"
88#include "get_bits.h"
89#include "fft.h"
90#include "fmtconvert.h"
91#include "lpc.h"
92#include "kbdwin.h"
93#include "sinewin.h"
94
95#include "aac.h"
96#include "aactab.h"
97#include "aacdectab.h"
98#include "cbrt_tablegen.h"
99#include "sbr.h"
100#include "aacsbr.h"
101#include "mpeg4audio.h"
102#include "aacadtsdec.h"
103#include "libavutil/intfloat.h"
104
105#include <assert.h>
106#include <errno.h>
107#include <math.h>
108#include <stdint.h>
109#include <string.h>
110
111#if ARCH_ARM
112#   include "arm/aac.h"
113#elif ARCH_MIPS
114#   include "mips/aacdec_mips.h"
115#endif
116
117static VLC vlc_scalefactors;
118static VLC vlc_spectral[11];
119
120static int output_configure(AACContext *ac,
121                            uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
122                            enum OCStatus oc_type, int get_new_frame);
123
124#define overread_err "Input buffer exhausted before END element found\n"
125
126static int count_channels(uint8_t (*layout)[3], int tags)
127{
128    int i, sum = 0;
129    for (i = 0; i < tags; i++) {
130        int syn_ele = layout[i][0];
131        int pos     = layout[i][2];
132        sum += (1 + (syn_ele == TYPE_CPE)) *
133               (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
134    }
135    return sum;
136}
137
138/**
139 * Check for the channel element in the current channel position configuration.
140 * If it exists, make sure the appropriate element is allocated and map the
141 * channel order to match the internal FFmpeg channel layout.
142 *
143 * @param   che_pos current channel position configuration
144 * @param   type channel element type
145 * @param   id channel element id
146 * @param   channels count of the number of channels in the configuration
147 *
148 * @return  Returns error status. 0 - OK, !0 - error
149 */
150static av_cold int che_configure(AACContext *ac,
151                                 enum ChannelPosition che_pos,
152                                 int type, int id, int *channels)
153{
154    if (*channels >= MAX_CHANNELS)
155        return AVERROR_INVALIDDATA;
156    if (che_pos) {
157        if (!ac->che[type][id]) {
158            if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
159                return AVERROR(ENOMEM);
160            ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
161        }
162        if (type != TYPE_CCE) {
163            if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
164                av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
165                return AVERROR_INVALIDDATA;
166            }
167            ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
168            if (type == TYPE_CPE ||
169                (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
170                ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
171            }
172        }
173    } else {
174        if (ac->che[type][id])
175            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
176        av_freep(&ac->che[type][id]);
177    }
178    return 0;
179}
180
181static int frame_configure_elements(AVCodecContext *avctx)
182{
183    AACContext *ac = avctx->priv_data;
184    int type, id, ch, ret;
185
186    /* set channel pointers to internal buffers by default */
187    for (type = 0; type < 4; type++) {
188        for (id = 0; id < MAX_ELEM_ID; id++) {
189            ChannelElement *che = ac->che[type][id];
190            if (che) {
191                che->ch[0].ret = che->ch[0].ret_buf;
192                che->ch[1].ret = che->ch[1].ret_buf;
193            }
194        }
195    }
196
197    /* get output buffer */
198    av_frame_unref(ac->frame);
199    if (!avctx->channels)
200        return 1;
201
202    ac->frame->nb_samples = 2048;
203    if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
204        return ret;
205
206    /* map output channel pointers to AVFrame data */
207    for (ch = 0; ch < avctx->channels; ch++) {
208        if (ac->output_element[ch])
209            ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch];
210    }
211
212    return 0;
213}
214
215struct elem_to_channel {
216    uint64_t av_position;
217    uint8_t syn_ele;
218    uint8_t elem_id;
219    uint8_t aac_position;
220};
221
222static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
223                       uint8_t (*layout_map)[3], int offset, uint64_t left,
224                       uint64_t right, int pos)
225{
226    if (layout_map[offset][0] == TYPE_CPE) {
227        e2c_vec[offset] = (struct elem_to_channel) {
228            .av_position  = left | right,
229            .syn_ele      = TYPE_CPE,
230            .elem_id      = layout_map[offset][1],
231            .aac_position = pos
232        };
233        return 1;
234    } else {
235        e2c_vec[offset] = (struct elem_to_channel) {
236            .av_position  = left,
237            .syn_ele      = TYPE_SCE,
238            .elem_id      = layout_map[offset][1],
239            .aac_position = pos
240        };
241        e2c_vec[offset + 1] = (struct elem_to_channel) {
242            .av_position  = right,
243            .syn_ele      = TYPE_SCE,
244            .elem_id      = layout_map[offset + 1][1],
245            .aac_position = pos
246        };
247        return 2;
248    }
249}
250
251static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos,
252                                 int *current)
253{
254    int num_pos_channels = 0;
255    int first_cpe        = 0;
256    int sce_parity       = 0;
257    int i;
258    for (i = *current; i < tags; i++) {
259        if (layout_map[i][2] != pos)
260            break;
261        if (layout_map[i][0] == TYPE_CPE) {
262            if (sce_parity) {
263                if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
264                    sce_parity = 0;
265                } else {
266                    return -1;
267                }
268            }
269            num_pos_channels += 2;
270            first_cpe         = 1;
271        } else {
272            num_pos_channels++;
273            sce_parity ^= 1;
274        }
275    }
276    if (sce_parity &&
277        ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
278        return -1;
279    *current = i;
280    return num_pos_channels;
281}
282
283static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
284{
285    int i, n, total_non_cc_elements;
286    struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } };
287    int num_front_channels, num_side_channels, num_back_channels;
288    uint64_t layout;
289
290    if (FF_ARRAY_ELEMS(e2c_vec) < tags)
291        return 0;
292
293    i = 0;
294    num_front_channels =
295        count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
296    if (num_front_channels < 0)
297        return 0;
298    num_side_channels =
299        count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
300    if (num_side_channels < 0)
301        return 0;
302    num_back_channels =
303        count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
304    if (num_back_channels < 0)
305        return 0;
306
307    i = 0;
308    if (num_front_channels & 1) {
309        e2c_vec[i] = (struct elem_to_channel) {
310            .av_position  = AV_CH_FRONT_CENTER,
311            .syn_ele      = TYPE_SCE,
312            .elem_id      = layout_map[i][1],
313            .aac_position = AAC_CHANNEL_FRONT
314        };
315        i++;
316        num_front_channels--;
317    }
318    if (num_front_channels >= 4) {
319        i += assign_pair(e2c_vec, layout_map, i,
320                         AV_CH_FRONT_LEFT_OF_CENTER,
321                         AV_CH_FRONT_RIGHT_OF_CENTER,
322                         AAC_CHANNEL_FRONT);
323        num_front_channels -= 2;
324    }
325    if (num_front_channels >= 2) {
326        i += assign_pair(e2c_vec, layout_map, i,
327                         AV_CH_FRONT_LEFT,
328                         AV_CH_FRONT_RIGHT,
329                         AAC_CHANNEL_FRONT);
330        num_front_channels -= 2;
331    }
332    while (num_front_channels >= 2) {
333        i += assign_pair(e2c_vec, layout_map, i,
334                         UINT64_MAX,
335                         UINT64_MAX,
336                         AAC_CHANNEL_FRONT);
337        num_front_channels -= 2;
338    }
339
340    if (num_side_channels >= 2) {
341        i += assign_pair(e2c_vec, layout_map, i,
342                         AV_CH_SIDE_LEFT,
343                         AV_CH_SIDE_RIGHT,
344                         AAC_CHANNEL_FRONT);
345        num_side_channels -= 2;
346    }
347    while (num_side_channels >= 2) {
348        i += assign_pair(e2c_vec, layout_map, i,
349                         UINT64_MAX,
350                         UINT64_MAX,
351                         AAC_CHANNEL_SIDE);
352        num_side_channels -= 2;
353    }
354
355    while (num_back_channels >= 4) {
356        i += assign_pair(e2c_vec, layout_map, i,
357                         UINT64_MAX,
358                         UINT64_MAX,
359                         AAC_CHANNEL_BACK);
360        num_back_channels -= 2;
361    }
362    if (num_back_channels >= 2) {
363        i += assign_pair(e2c_vec, layout_map, i,
364                         AV_CH_BACK_LEFT,
365                         AV_CH_BACK_RIGHT,
366                         AAC_CHANNEL_BACK);
367        num_back_channels -= 2;
368    }
369    if (num_back_channels) {
370        e2c_vec[i] = (struct elem_to_channel) {
371            .av_position  = AV_CH_BACK_CENTER,
372            .syn_ele      = TYPE_SCE,
373            .elem_id      = layout_map[i][1],
374            .aac_position = AAC_CHANNEL_BACK
375        };
376        i++;
377        num_back_channels--;
378    }
379
380    if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
381        e2c_vec[i] = (struct elem_to_channel) {
382            .av_position  = AV_CH_LOW_FREQUENCY,
383            .syn_ele      = TYPE_LFE,
384            .elem_id      = layout_map[i][1],
385            .aac_position = AAC_CHANNEL_LFE
386        };
387        i++;
388    }
389    while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
390        e2c_vec[i] = (struct elem_to_channel) {
391            .av_position  = UINT64_MAX,
392            .syn_ele      = TYPE_LFE,
393            .elem_id      = layout_map[i][1],
394            .aac_position = AAC_CHANNEL_LFE
395        };
396        i++;
397    }
398
399    // Must choose a stable sort
400    total_non_cc_elements = n = i;
401    do {
402        int next_n = 0;
403        for (i = 1; i < n; i++)
404            if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) {
405                FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]);
406                next_n = i;
407            }
408        n = next_n;
409    } while (n > 0);
410
411    layout = 0;
412    for (i = 0; i < total_non_cc_elements; i++) {
413        layout_map[i][0] = e2c_vec[i].syn_ele;
414        layout_map[i][1] = e2c_vec[i].elem_id;
415        layout_map[i][2] = e2c_vec[i].aac_position;
416        if (e2c_vec[i].av_position != UINT64_MAX) {
417            layout |= e2c_vec[i].av_position;
418        }
419    }
420
421    return layout;
422}
423
424/**
425 * Save current output configuration if and only if it has been locked.
426 */
427static void push_output_configuration(AACContext *ac) {
428    if (ac->oc[1].status == OC_LOCKED) {
429        ac->oc[0] = ac->oc[1];
430    }
431    ac->oc[1].status = OC_NONE;
432}
433
434/**
435 * Restore the previous output configuration if and only if the current
436 * configuration is unlocked.
437 */
438static void pop_output_configuration(AACContext *ac) {
439    if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
440        ac->oc[1] = ac->oc[0];
441        ac->avctx->channels = ac->oc[1].channels;
442        ac->avctx->channel_layout = ac->oc[1].channel_layout;
443        output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
444                         ac->oc[1].status, 0);
445    }
446}
447
448/**
449 * Configure output channel order based on the current program
450 * configuration element.
451 *
452 * @return  Returns error status. 0 - OK, !0 - error
453 */
454static int output_configure(AACContext *ac,
455                            uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags,
456                            enum OCStatus oc_type, int get_new_frame)
457{
458    AVCodecContext *avctx = ac->avctx;
459    int i, channels = 0, ret;
460    uint64_t layout = 0;
461
462    if (ac->oc[1].layout_map != layout_map) {
463        memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
464        ac->oc[1].layout_map_tags = tags;
465    }
466
467    // Try to sniff a reasonable channel order, otherwise output the
468    // channels in the order the PCE declared them.
469    if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
470        layout = sniff_channel_order(layout_map, tags);
471    for (i = 0; i < tags; i++) {
472        int type =     layout_map[i][0];
473        int id =       layout_map[i][1];
474        int position = layout_map[i][2];
475        // Allocate or free elements depending on if they are in the
476        // current program configuration.
477        ret = che_configure(ac, position, type, id, &channels);
478        if (ret < 0)
479            return ret;
480    }
481    if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
482        if (layout == AV_CH_FRONT_CENTER) {
483            layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
484        } else {
485            layout = 0;
486        }
487    }
488
489    memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
490    if (layout) avctx->channel_layout = layout;
491                            ac->oc[1].channel_layout = layout;
492    avctx->channels       = ac->oc[1].channels       = channels;
493    ac->oc[1].status = oc_type;
494
495    if (get_new_frame) {
496        if ((ret = frame_configure_elements(ac->avctx)) < 0)
497            return ret;
498    }
499
500    return 0;
501}
502
503static void flush(AVCodecContext *avctx)
504{
505    AACContext *ac= avctx->priv_data;
506    int type, i, j;
507
508    for (type = 3; type >= 0; type--) {
509        for (i = 0; i < MAX_ELEM_ID; i++) {
510            ChannelElement *che = ac->che[type][i];
511            if (che) {
512                for (j = 0; j <= 1; j++) {
513                    memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
514                }
515            }
516        }
517    }
518}
519
520/**
521 * Set up channel positions based on a default channel configuration
522 * as specified in table 1.17.
523 *
524 * @return  Returns error status. 0 - OK, !0 - error
525 */
526static int set_default_channel_config(AVCodecContext *avctx,
527                                      uint8_t (*layout_map)[3],
528                                      int *tags,
529                                      int channel_config)
530{
531    if (channel_config < 1 || channel_config > 7) {
532        av_log(avctx, AV_LOG_ERROR,
533               "invalid default channel configuration (%d)\n",
534               channel_config);
535        return AVERROR_INVALIDDATA;
536    }
537    *tags = tags_per_config[channel_config];
538    memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
539           *tags * sizeof(*layout_map));
540
541    /*
542     * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
543     * However, at least Nero AAC encoder encodes 7.1 streams using the default
544     * channel config 7, mapping the side channels of the original audio stream
545     * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
546     * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
547     * the incorrect streams as if they were correct (and as the encoder intended).
548     *
549     * As actual intended 7.1(wide) streams are very rare, default to assuming a
550     * 7.1 layout was intended.
551     */
552    if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) {
553        av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout"
554               " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
555               " according to the specification instead.\n", FF_COMPLIANCE_STRICT);
556        layout_map[2][2] = AAC_CHANNEL_SIDE;
557    }
558
559    return 0;
560}
561
562static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
563{
564    /* For PCE based channel configurations map the channels solely based
565     * on tags. */
566    if (!ac->oc[1].m4ac.chan_config) {
567        return ac->tag_che_map[type][elem_id];
568    }
569    // Allow single CPE stereo files to be signalled with mono configuration.
570    if (!ac->tags_mapped && type == TYPE_CPE &&
571        ac->oc[1].m4ac.chan_config == 1) {
572        uint8_t layout_map[MAX_ELEM_ID*4][3];
573        int layout_map_tags;
574        push_output_configuration(ac);
575
576        av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
577
578        if (set_default_channel_config(ac->avctx, layout_map,
579                                       &layout_map_tags, 2) < 0)
580            return NULL;
581        if (output_configure(ac, layout_map, layout_map_tags,
582                             OC_TRIAL_FRAME, 1) < 0)
583            return NULL;
584
585        ac->oc[1].m4ac.chan_config = 2;
586        ac->oc[1].m4ac.ps = 0;
587    }
588    // And vice-versa
589    if (!ac->tags_mapped && type == TYPE_SCE &&
590        ac->oc[1].m4ac.chan_config == 2) {
591        uint8_t layout_map[MAX_ELEM_ID * 4][3];
592        int layout_map_tags;
593        push_output_configuration(ac);
594
595        av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
596
597        if (set_default_channel_config(ac->avctx, layout_map,
598                                       &layout_map_tags, 1) < 0)
599            return NULL;
600        if (output_configure(ac, layout_map, layout_map_tags,
601                             OC_TRIAL_FRAME, 1) < 0)
602            return NULL;
603
604        ac->oc[1].m4ac.chan_config = 1;
605        if (ac->oc[1].m4ac.sbr)
606            ac->oc[1].m4ac.ps = -1;
607    }
608    /* For indexed channel configurations map the channels solely based
609     * on position. */
610    switch (ac->oc[1].m4ac.chan_config) {
611    case 7:
612        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
613            ac->tags_mapped++;
614            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
615        }
616    case 6:
617        /* Some streams incorrectly code 5.1 audio as
618         * SCE[0] CPE[0] CPE[1] SCE[1]
619         * instead of
620         * SCE[0] CPE[0] CPE[1] LFE[0].
621         * If we seem to have encountered such a stream, transfer
622         * the LFE[0] element to the SCE[1]'s mapping */
623        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
624            ac->tags_mapped++;
625            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
626        }
627    case 5:
628        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
629            ac->tags_mapped++;
630            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
631        }
632    case 4:
633        if (ac->tags_mapped == 2 &&
634            ac->oc[1].m4ac.chan_config == 4 &&
635            type == TYPE_SCE) {
636            ac->tags_mapped++;
637            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
638        }
639    case 3:
640    case 2:
641        if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) &&
642            type == TYPE_CPE) {
643            ac->tags_mapped++;
644            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
645        } else if (ac->oc[1].m4ac.chan_config == 2) {
646            return NULL;
647        }
648    case 1:
649        if (!ac->tags_mapped && type == TYPE_SCE) {
650            ac->tags_mapped++;
651            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
652        }
653    default:
654        return NULL;
655    }
656}
657
658/**
659 * Decode an array of 4 bit element IDs, optionally interleaved with a
660 * stereo/mono switching bit.
661 *
662 * @param type speaker type/position for these channels
663 */
664static void decode_channel_map(uint8_t layout_map[][3],
665                               enum ChannelPosition type,
666                               GetBitContext *gb, int n)
667{
668    while (n--) {
669        enum RawDataBlockType syn_ele;
670        switch (type) {
671        case AAC_CHANNEL_FRONT:
672        case AAC_CHANNEL_BACK:
673        case AAC_CHANNEL_SIDE:
674            syn_ele = get_bits1(gb);
675            break;
676        case AAC_CHANNEL_CC:
677            skip_bits1(gb);
678            syn_ele = TYPE_CCE;
679            break;
680        case AAC_CHANNEL_LFE:
681            syn_ele = TYPE_LFE;
682            break;
683        default:
684            av_assert0(0);
685        }
686        layout_map[0][0] = syn_ele;
687        layout_map[0][1] = get_bits(gb, 4);
688        layout_map[0][2] = type;
689        layout_map++;
690    }
691}
692
693/**
694 * Decode program configuration element; reference: table 4.2.
695 *
696 * @return  Returns error status. 0 - OK, !0 - error
697 */
698static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
699                      uint8_t (*layout_map)[3],
700                      GetBitContext *gb)
701{
702    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
703    int sampling_index;
704    int comment_len;
705    int tags;
706
707    skip_bits(gb, 2);  // object_type
708
709    sampling_index = get_bits(gb, 4);
710    if (m4ac->sampling_index != sampling_index)
711        av_log(avctx, AV_LOG_WARNING,
712               "Sample rate index in program config element does not "
713               "match the sample rate index configured by the container.\n");
714
715    num_front       = get_bits(gb, 4);
716    num_side        = get_bits(gb, 4);
717    num_back        = get_bits(gb, 4);
718    num_lfe         = get_bits(gb, 2);
719    num_assoc_data  = get_bits(gb, 3);
720    num_cc          = get_bits(gb, 4);
721
722    if (get_bits1(gb))
723        skip_bits(gb, 4); // mono_mixdown_tag
724    if (get_bits1(gb))
725        skip_bits(gb, 4); // stereo_mixdown_tag
726
727    if (get_bits1(gb))
728        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
729
730    if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
731        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
732        return -1;
733    }
734    decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
735    tags = num_front;
736    decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
737    tags += num_side;
738    decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
739    tags += num_back;
740    decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
741    tags += num_lfe;
742
743    skip_bits_long(gb, 4 * num_assoc_data);
744
745    decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
746    tags += num_cc;
747
748    align_get_bits(gb);
749
750    /* comment field, first byte is length */
751    comment_len = get_bits(gb, 8) * 8;
752    if (get_bits_left(gb) < comment_len) {
753        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
754        return AVERROR_INVALIDDATA;
755    }
756    skip_bits_long(gb, comment_len);
757    return tags;
758}
759
760/**
761 * Decode GA "General Audio" specific configuration; reference: table 4.1.
762 *
763 * @param   ac          pointer to AACContext, may be null
764 * @param   avctx       pointer to AVCCodecContext, used for logging
765 *
766 * @return  Returns error status. 0 - OK, !0 - error
767 */
768static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
769                                     GetBitContext *gb,
770                                     MPEG4AudioConfig *m4ac,
771                                     int channel_config)
772{
773    int extension_flag, ret, ep_config, res_flags;
774    uint8_t layout_map[MAX_ELEM_ID*4][3];
775    int tags = 0;
776
777    if (get_bits1(gb)) { // frameLengthFlag
778        avpriv_request_sample(avctx, "960/120 MDCT window");
779        return AVERROR_PATCHWELCOME;
780    }
781
782    if (get_bits1(gb))       // dependsOnCoreCoder
783        skip_bits(gb, 14);   // coreCoderDelay
784    extension_flag = get_bits1(gb);
785
786    if (m4ac->object_type == AOT_AAC_SCALABLE ||
787        m4ac->object_type == AOT_ER_AAC_SCALABLE)
788        skip_bits(gb, 3);     // layerNr
789
790    if (channel_config == 0) {
791        skip_bits(gb, 4);  // element_instance_tag
792        tags = decode_pce(avctx, m4ac, layout_map, gb);
793        if (tags < 0)
794            return tags;
795    } else {
796        if ((ret = set_default_channel_config(avctx, layout_map,
797                                              &tags, channel_config)))
798            return ret;
799    }
800
801    if (count_channels(layout_map, tags) > 1) {
802        m4ac->ps = 0;
803    } else if (m4ac->sbr == 1 && m4ac->ps == -1)
804        m4ac->ps = 1;
805
806    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
807        return ret;
808
809    if (extension_flag) {
810        switch (m4ac->object_type) {
811        case AOT_ER_BSAC:
812            skip_bits(gb, 5);    // numOfSubFrame
813            skip_bits(gb, 11);   // layer_length
814            break;
815        case AOT_ER_AAC_LC:
816        case AOT_ER_AAC_LTP:
817        case AOT_ER_AAC_SCALABLE:
818        case AOT_ER_AAC_LD:
819            res_flags = get_bits(gb, 3);
820            if (res_flags) {
821                avpriv_report_missing_feature(avctx,
822                                              "AAC data resilience (flags %x)",
823                                              res_flags);
824                return AVERROR_PATCHWELCOME;
825            }
826            break;
827        }
828        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
829    }
830    switch (m4ac->object_type) {
831    case AOT_ER_AAC_LC:
832    case AOT_ER_AAC_LTP:
833    case AOT_ER_AAC_SCALABLE:
834    case AOT_ER_AAC_LD:
835        ep_config = get_bits(gb, 2);
836        if (ep_config) {
837            avpriv_report_missing_feature(avctx,
838                                          "epConfig %d", ep_config);
839            return AVERROR_PATCHWELCOME;
840        }
841    }
842    return 0;
843}
844
845static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx,
846                                     GetBitContext *gb,
847                                     MPEG4AudioConfig *m4ac,
848                                     int channel_config)
849{
850    int ret, ep_config, res_flags;
851    uint8_t layout_map[MAX_ELEM_ID*4][3];
852    int tags = 0;
853    const int ELDEXT_TERM = 0;
854
855    m4ac->ps  = 0;
856    m4ac->sbr = 0;
857
858    if (get_bits1(gb)) { // frameLengthFlag
859        avpriv_request_sample(avctx, "960/120 MDCT window");
860        return AVERROR_PATCHWELCOME;
861    }
862
863    res_flags = get_bits(gb, 3);
864    if (res_flags) {
865        avpriv_report_missing_feature(avctx,
866                                      "AAC data resilience (flags %x)",
867                                      res_flags);
868        return AVERROR_PATCHWELCOME;
869    }
870
871    if (get_bits1(gb)) { // ldSbrPresentFlag
872        avpriv_report_missing_feature(avctx,
873                                      "Low Delay SBR");
874        return AVERROR_PATCHWELCOME;
875    }
876
877    while (get_bits(gb, 4) != ELDEXT_TERM) {
878        int len = get_bits(gb, 4);
879        if (len == 15)
880            len += get_bits(gb, 8);
881        if (len == 15 + 255)
882            len += get_bits(gb, 16);
883        if (get_bits_left(gb) < len * 8 + 4) {
884            av_log(ac->avctx, AV_LOG_ERROR, overread_err);
885            return AVERROR_INVALIDDATA;
886        }
887        skip_bits_long(gb, 8 * len);
888    }
889
890    if ((ret = set_default_channel_config(avctx, layout_map,
891                                          &tags, channel_config)))
892        return ret;
893
894    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
895        return ret;
896
897    ep_config = get_bits(gb, 2);
898    if (ep_config) {
899        avpriv_report_missing_feature(avctx,
900                                      "epConfig %d", ep_config);
901        return AVERROR_PATCHWELCOME;
902    }
903    return 0;
904}
905
906/**
907 * Decode audio specific configuration; reference: table 1.13.
908 *
909 * @param   ac          pointer to AACContext, may be null
910 * @param   avctx       pointer to AVCCodecContext, used for logging
911 * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
912 * @param   data        pointer to buffer holding an audio specific config
913 * @param   bit_size    size of audio specific config or data in bits
914 * @param   sync_extension look for an appended sync extension
915 *
916 * @return  Returns error status or number of consumed bits. <0 - error
917 */
918static int decode_audio_specific_config(AACContext *ac,
919                                        AVCodecContext *avctx,
920                                        MPEG4AudioConfig *m4ac,
921                                        const uint8_t *data, int bit_size,
922                                        int sync_extension)
923{
924    GetBitContext gb;
925    int i, ret;
926
927    av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
928    for (i = 0; i < bit_size >> 3; i++)
929        av_dlog(avctx, "%02x ", data[i]);
930    av_dlog(avctx, "\n");
931
932    if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
933        return ret;
934
935    if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size,
936                                          sync_extension)) < 0)
937        return AVERROR_INVALIDDATA;
938    if (m4ac->sampling_index > 12) {
939        av_log(avctx, AV_LOG_ERROR,
940               "invalid sampling rate index %d\n",
941               m4ac->sampling_index);
942        return AVERROR_INVALIDDATA;
943    }
944    if (m4ac->object_type == AOT_ER_AAC_LD &&
945        (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) {
946        av_log(avctx, AV_LOG_ERROR,
947               "invalid low delay sampling rate index %d\n",
948               m4ac->sampling_index);
949        return AVERROR_INVALIDDATA;
950    }
951
952    skip_bits_long(&gb, i);
953
954    switch (m4ac->object_type) {
955    case AOT_AAC_MAIN:
956    case AOT_AAC_LC:
957    case AOT_AAC_LTP:
958    case AOT_ER_AAC_LC:
959    case AOT_ER_AAC_LD:
960        if ((ret = decode_ga_specific_config(ac, avctx, &gb,
961                                            m4ac, m4ac->chan_config)) < 0)
962            return ret;
963        break;
964    case AOT_ER_AAC_ELD:
965        if ((ret = decode_eld_specific_config(ac, avctx, &gb,
966                                              m4ac, m4ac->chan_config)) < 0)
967            return ret;
968        break;
969    default:
970        avpriv_report_missing_feature(avctx,
971                                      "Audio object type %s%d",
972                                      m4ac->sbr == 1 ? "SBR+" : "",
973                                      m4ac->object_type);
974        return AVERROR(ENOSYS);
975    }
976
977    av_dlog(avctx,
978            "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
979            m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
980            m4ac->sample_rate, m4ac->sbr,
981            m4ac->ps);
982
983    return get_bits_count(&gb);
984}
985
986/**
987 * linear congruential pseudorandom number generator
988 *
989 * @param   previous_val    pointer to the current state of the generator
990 *
991 * @return  Returns a 32-bit pseudorandom integer
992 */
993static av_always_inline int lcg_random(unsigned previous_val)
994{
995    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
996    return v.s;
997}
998
999static av_always_inline void reset_predict_state(PredictorState *ps)
1000{
1001    ps->r0   = 0.0f;
1002    ps->r1   = 0.0f;
1003    ps->cor0 = 0.0f;
1004    ps->cor1 = 0.0f;
1005    ps->var0 = 1.0f;
1006    ps->var1 = 1.0f;
1007}
1008
1009static void reset_all_predictors(PredictorState *ps)
1010{
1011    int i;
1012    for (i = 0; i < MAX_PREDICTORS; i++)
1013        reset_predict_state(&ps[i]);
1014}
1015
1016static int sample_rate_idx (int rate)
1017{
1018         if (92017 <= rate) return 0;
1019    else if (75132 <= rate) return 1;
1020    else if (55426 <= rate) return 2;
1021    else if (46009 <= rate) return 3;
1022    else if (37566 <= rate) return 4;
1023    else if (27713 <= rate) return 5;
1024    else if (23004 <= rate) return 6;
1025    else if (18783 <= rate) return 7;
1026    else if (13856 <= rate) return 8;
1027    else if (11502 <= rate) return 9;
1028    else if (9391  <= rate) return 10;
1029    else                    return 11;
1030}
1031
1032static void reset_predictor_group(PredictorState *ps, int group_num)
1033{
1034    int i;
1035    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
1036        reset_predict_state(&ps[i]);
1037}
1038
1039#define AAC_INIT_VLC_STATIC(num, size)                                     \
1040    INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num],     \
1041         ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]),  \
1042                                    sizeof(ff_aac_spectral_bits[num][0]),  \
1043        ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
1044                                    sizeof(ff_aac_spectral_codes[num][0]), \
1045        size);
1046
1047static void aacdec_init(AACContext *ac);
1048
1049static av_cold int aac_decode_init(AVCodecContext *avctx)
1050{
1051    AACContext *ac = avctx->priv_data;
1052    int ret;
1053
1054    ac->avctx = avctx;
1055    ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
1056
1057    aacdec_init(ac);
1058
1059    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
1060
1061    if (avctx->extradata_size > 0) {
1062        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
1063                                                avctx->extradata,
1064                                                avctx->extradata_size * 8,
1065                                                1)) < 0)
1066            return ret;
1067    } else {
1068        int sr, i;
1069        uint8_t layout_map[MAX_ELEM_ID*4][3];
1070        int layout_map_tags;
1071
1072        sr = sample_rate_idx(avctx->sample_rate);
1073        ac->oc[1].m4ac.sampling_index = sr;
1074        ac->oc[1].m4ac.channels = avctx->channels;
1075        ac->oc[1].m4ac.sbr = -1;
1076        ac->oc[1].m4ac.ps = -1;
1077
1078        for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
1079            if (ff_mpeg4audio_channels[i] == avctx->channels)
1080                break;
1081        if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
1082            i = 0;
1083        }
1084        ac->oc[1].m4ac.chan_config = i;
1085
1086        if (ac->oc[1].m4ac.chan_config) {
1087            int ret = set_default_channel_config(avctx, layout_map,
1088                &layout_map_tags, ac->oc[1].m4ac.chan_config);
1089            if (!ret)
1090                output_configure(ac, layout_map, layout_map_tags,
1091                                 OC_GLOBAL_HDR, 0);
1092            else if (avctx->err_recognition & AV_EF_EXPLODE)
1093                return AVERROR_INVALIDDATA;
1094        }
1095    }
1096
1097    if (avctx->channels > MAX_CHANNELS) {
1098        av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
1099        return AVERROR_INVALIDDATA;
1100    }
1101
1102    AAC_INIT_VLC_STATIC( 0, 304);
1103    AAC_INIT_VLC_STATIC( 1, 270);
1104    AAC_INIT_VLC_STATIC( 2, 550);
1105    AAC_INIT_VLC_STATIC( 3, 300);
1106    AAC_INIT_VLC_STATIC( 4, 328);
1107    AAC_INIT_VLC_STATIC( 5, 294);
1108    AAC_INIT_VLC_STATIC( 6, 306);
1109    AAC_INIT_VLC_STATIC( 7, 268);
1110    AAC_INIT_VLC_STATIC( 8, 510);
1111    AAC_INIT_VLC_STATIC( 9, 366);
1112    AAC_INIT_VLC_STATIC(10, 462);
1113
1114    ff_aac_sbr_init();
1115
1116    ff_fmt_convert_init(&ac->fmt_conv, avctx);
1117    avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
1118
1119    ac->random_state = 0x1f2e3d4c;
1120
1121    ff_aac_tableinit();
1122
1123    INIT_VLC_STATIC(&vlc_scalefactors, 7,
1124                    FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
1125                    ff_aac_scalefactor_bits,
1126                    sizeof(ff_aac_scalefactor_bits[0]),
1127                    sizeof(ff_aac_scalefactor_bits[0]),
1128                    ff_aac_scalefactor_code,
1129                    sizeof(ff_aac_scalefactor_code[0]),
1130                    sizeof(ff_aac_scalefactor_code[0]),
1131                    352);
1132
1133    ff_mdct_init(&ac->mdct,       11, 1, 1.0 / (32768.0 * 1024.0));
1134    ff_mdct_init(&ac->mdct_ld,    10, 1, 1.0 / (32768.0 * 512.0));
1135    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0 / (32768.0 * 128.0));
1136    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0 * 32768.0);
1137    // window initialization
1138    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
1139    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
1140    ff_init_ff_sine_windows(10);
1141    ff_init_ff_sine_windows( 9);
1142    ff_init_ff_sine_windows( 7);
1143
1144    cbrt_tableinit();
1145
1146    return 0;
1147}
1148
1149/**
1150 * Skip data_stream_element; reference: table 4.10.
1151 */
1152static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
1153{
1154    int byte_align = get_bits1(gb);
1155    int count = get_bits(gb, 8);
1156    if (count == 255)
1157        count += get_bits(gb, 8);
1158    if (byte_align)
1159        align_get_bits(gb);
1160
1161    if (get_bits_left(gb) < 8 * count) {
1162        av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
1163        return AVERROR_INVALIDDATA;
1164    }
1165    skip_bits_long(gb, 8 * count);
1166    return 0;
1167}
1168
1169static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
1170                             GetBitContext *gb)
1171{
1172    int sfb;
1173    if (get_bits1(gb)) {
1174        ics->predictor_reset_group = get_bits(gb, 5);
1175        if (ics->predictor_reset_group == 0 ||
1176            ics->predictor_reset_group > 30) {
1177            av_log(ac->avctx, AV_LOG_ERROR,
1178                   "Invalid Predictor Reset Group.\n");
1179            return AVERROR_INVALIDDATA;
1180        }
1181    }
1182    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
1183        ics->prediction_used[sfb] = get_bits1(gb);
1184    }
1185    return 0;
1186}
1187
1188/**
1189 * Decode Long Term Prediction data; reference: table 4.xx.
1190 */
1191static void decode_ltp(LongTermPrediction *ltp,
1192                       GetBitContext *gb, uint8_t max_sfb)
1193{
1194    int sfb;
1195
1196    ltp->lag  = get_bits(gb, 11);
1197    ltp->coef = ltp_coef[get_bits(gb, 3)];
1198    for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1199        ltp->used[sfb] = get_bits1(gb);
1200}
1201
1202/**
1203 * Decode Individual Channel Stream info; reference: table 4.6.
1204 */
1205static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
1206                           GetBitContext *gb)
1207{
1208    int aot = ac->oc[1].m4ac.object_type;
1209    if (aot != AOT_ER_AAC_ELD) {
1210        if (get_bits1(gb)) {
1211            av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1212            return AVERROR_INVALIDDATA;
1213        }
1214        ics->window_sequence[1] = ics->window_sequence[0];
1215        ics->window_sequence[0] = get_bits(gb, 2);
1216        if (aot == AOT_ER_AAC_LD &&
1217            ics->window_sequence[0] != ONLY_LONG_SEQUENCE) {
1218            av_log(ac->avctx, AV_LOG_ERROR,
1219                   "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
1220                   "window sequence %d found.\n", ics->window_sequence[0]);
1221            ics->window_sequence[0] = ONLY_LONG_SEQUENCE;
1222            return AVERROR_INVALIDDATA;
1223        }
1224        ics->use_kb_window[1]   = ics->use_kb_window[0];
1225        ics->use_kb_window[0]   = get_bits1(gb);
1226    }
1227    ics->num_window_groups  = 1;
1228    ics->group_len[0]       = 1;
1229    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1230        int i;
1231        ics->max_sfb = get_bits(gb, 4);
1232        for (i = 0; i < 7; i++) {
1233            if (get_bits1(gb)) {
1234                ics->group_len[ics->num_window_groups - 1]++;
1235            } else {
1236                ics->num_window_groups++;
1237                ics->group_len[ics->num_window_groups - 1] = 1;
1238            }
1239        }
1240        ics->num_windows       = 8;
1241        ics->swb_offset        =    ff_swb_offset_128[ac->oc[1].m4ac.sampling_index];
1242        ics->num_swb           =   ff_aac_num_swb_128[ac->oc[1].m4ac.sampling_index];
1243        ics->tns_max_bands     = ff_tns_max_bands_128[ac->oc[1].m4ac.sampling_index];
1244        ics->predictor_present = 0;
1245    } else {
1246        ics->max_sfb               = get_bits(gb, 6);
1247        ics->num_windows           = 1;
1248        if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) {
1249            ics->swb_offset        =     ff_swb_offset_512[ac->oc[1].m4ac.sampling_index];
1250            ics->num_swb           =    ff_aac_num_swb_512[ac->oc[1].m4ac.sampling_index];
1251            ics->tns_max_bands     =  ff_tns_max_bands_512[ac->oc[1].m4ac.sampling_index];
1252            if (!ics->num_swb || !ics->swb_offset)
1253                return AVERROR_BUG;
1254        } else {
1255            ics->swb_offset        =    ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index];
1256            ics->num_swb           =   ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index];
1257            ics->tns_max_bands     = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index];
1258        }
1259        if (aot != AOT_ER_AAC_ELD) {
1260            ics->predictor_present     = get_bits1(gb);
1261            ics->predictor_reset_group = 0;
1262        }
1263        if (ics->predictor_present) {
1264            if (aot == AOT_AAC_MAIN) {
1265                if (decode_prediction(ac, ics, gb)) {
1266                    goto fail;
1267                }
1268            } else if (aot == AOT_AAC_LC ||
1269                       aot == AOT_ER_AAC_LC) {
1270                av_log(ac->avctx, AV_LOG_ERROR,
1271                       "Prediction is not allowed in AAC-LC.\n");
1272                goto fail;
1273            } else {
1274                if (aot == AOT_ER_AAC_LD) {
1275                    av_log(ac->avctx, AV_LOG_ERROR,
1276                           "LTP in ER AAC LD not yet implemented.\n");
1277                    return AVERROR_PATCHWELCOME;
1278                }
1279                if ((ics->ltp.present = get_bits(gb, 1)))
1280                    decode_ltp(&ics->ltp, gb, ics->max_sfb);
1281            }
1282        }
1283    }
1284
1285    if (ics->max_sfb > ics->num_swb) {
1286        av_log(ac->avctx, AV_LOG_ERROR,
1287               "Number of scalefactor bands in group (%d) "
1288               "exceeds limit (%d).\n",
1289               ics->max_sfb, ics->num_swb);
1290        goto fail;
1291    }
1292
1293    return 0;
1294fail:
1295    ics->max_sfb = 0;
1296    return AVERROR_INVALIDDATA;
1297}
1298
1299/**
1300 * Decode band types (section_data payload); reference: table 4.46.
1301 *
1302 * @param   band_type           array of the used band type
1303 * @param   band_type_run_end   array of the last scalefactor band of a band type run
1304 *
1305 * @return  Returns error status. 0 - OK, !0 - error
1306 */
1307static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1308                             int band_type_run_end[120], GetBitContext *gb,
1309                             IndividualChannelStream *ics)
1310{
1311    int g, idx = 0;
1312    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1313    for (g = 0; g < ics->num_window_groups; g++) {
1314        int k = 0;
1315        while (k < ics->max_sfb) {
1316            uint8_t sect_end = k;
1317            int sect_len_incr;
1318            int sect_band_type = get_bits(gb, 4);
1319            if (sect_band_type == 12) {
1320                av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1321                return AVERROR_INVALIDDATA;
1322            }
1323            do {
1324                sect_len_incr = get_bits(gb, bits);
1325                sect_end += sect_len_incr;
1326                if (get_bits_left(gb) < 0) {
1327                    av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1328                    return AVERROR_INVALIDDATA;
1329                }
1330                if (sect_end > ics->max_sfb) {
1331                    av_log(ac->avctx, AV_LOG_ERROR,
1332                           "Number of bands (%d) exceeds limit (%d).\n",
1333                           sect_end, ics->max_sfb);
1334                    return AVERROR_INVALIDDATA;
1335                }
1336            } while (sect_len_incr == (1 << bits) - 1);
1337            for (; k < sect_end; k++) {
1338                band_type        [idx]   = sect_band_type;
1339                band_type_run_end[idx++] = sect_end;
1340            }
1341        }
1342    }
1343    return 0;
1344}
1345
1346/**
1347 * Decode scalefactors; reference: table 4.47.
1348 *
1349 * @param   global_gain         first scalefactor value as scalefactors are differentially coded
1350 * @param   band_type           array of the used band type
1351 * @param   band_type_run_end   array of the last scalefactor band of a band type run
1352 * @param   sf                  array of scalefactors or intensity stereo positions
1353 *
1354 * @return  Returns error status. 0 - OK, !0 - error
1355 */
1356static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1357                               unsigned int global_gain,
1358                               IndividualChannelStream *ics,
1359                               enum BandType band_type[120],
1360                               int band_type_run_end[120])
1361{
1362    int g, i, idx = 0;
1363    int offset[3] = { global_gain, global_gain - 90, 0 };
1364    int clipped_offset;
1365    int noise_flag = 1;
1366    for (g = 0; g < ics->num_window_groups; g++) {
1367        for (i = 0; i < ics->max_sfb;) {
1368            int run_end = band_type_run_end[idx];
1369            if (band_type[idx] == ZERO_BT) {
1370                for (; i < run_end; i++, idx++)
1371                    sf[idx] = 0.0;
1372            } else if ((band_type[idx] == INTENSITY_BT) ||
1373                       (band_type[idx] == INTENSITY_BT2)) {
1374                for (; i < run_end; i++, idx++) {
1375                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1376                    clipped_offset = av_clip(offset[2], -155, 100);
1377                    if (offset[2] != clipped_offset) {
1378                        avpriv_request_sample(ac->avctx,
1379                                              "If you heard an audible artifact, there may be a bug in the decoder. "
1380                                              "Clipped intensity stereo position (%d -> %d)",
1381                                              offset[2], clipped_offset);
1382                    }
1383                    sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1384                }
1385            } else if (band_type[idx] == NOISE_BT) {
1386                for (; i < run_end; i++, idx++) {
1387                    if (noise_flag-- > 0)
1388                        offset[1] += get_bits(gb, 9) - 256;
1389                    else
1390                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1391                    clipped_offset = av_clip(offset[1], -100, 155);
1392                    if (offset[1] != clipped_offset) {
1393                        avpriv_request_sample(ac->avctx,
1394                                              "If you heard an audible artifact, there may be a bug in the decoder. "
1395                                              "Clipped noise gain (%d -> %d)",
1396                                              offset[1], clipped_offset);
1397                    }
1398                    sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1399                }
1400            } else {
1401                for (; i < run_end; i++, idx++) {
1402                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1403                    if (offset[0] > 255U) {
1404                        av_log(ac->avctx, AV_LOG_ERROR,
1405                               "Scalefactor (%d) out of range.\n", offset[0]);
1406                        return AVERROR_INVALIDDATA;
1407                    }
1408                    sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1409                }
1410            }
1411        }
1412    }
1413    return 0;
1414}
1415
1416/**
1417 * Decode pulse data; reference: table 4.7.
1418 */
1419static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1420                         const uint16_t *swb_offset, int num_swb)
1421{
1422    int i, pulse_swb;
1423    pulse->num_pulse = get_bits(gb, 2) + 1;
1424    pulse_swb        = get_bits(gb, 6);
1425    if (pulse_swb >= num_swb)
1426        return -1;
1427    pulse->pos[0]    = swb_offset[pulse_swb];
1428    pulse->pos[0]   += get_bits(gb, 5);
1429    if (pulse->pos[0] >= swb_offset[num_swb])
1430        return -1;
1431    pulse->amp[0]    = get_bits(gb, 4);
1432    for (i = 1; i < pulse->num_pulse; i++) {
1433        pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1434        if (pulse->pos[i] >= swb_offset[num_swb])
1435            return -1;
1436        pulse->amp[i] = get_bits(gb, 4);
1437    }
1438    return 0;
1439}
1440
1441/**
1442 * Decode Temporal Noise Shaping data; reference: table 4.48.
1443 *
1444 * @return  Returns error status. 0 - OK, !0 - error
1445 */
1446static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
1447                      GetBitContext *gb, const IndividualChannelStream *ics)
1448{
1449    int w, filt, i, coef_len, coef_res, coef_compress;
1450    const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1451    const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1452    for (w = 0; w < ics->num_windows; w++) {
1453        if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1454            coef_res = get_bits1(gb);
1455
1456            for (filt = 0; filt < tns->n_filt[w]; filt++) {
1457                int tmp2_idx;
1458                tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1459
1460                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1461                    av_log(ac->avctx, AV_LOG_ERROR,
1462                           "TNS filter order %d is greater than maximum %d.\n",
1463                           tns->order[w][filt], tns_max_order);
1464                    tns->order[w][filt] = 0;
1465                    return AVERROR_INVALIDDATA;
1466                }
1467                if (tns->order[w][filt]) {
1468                    tns->direction[w][filt] = get_bits1(gb);
1469                    coef_compress = get_bits1(gb);
1470                    coef_len = coef_res + 3 - coef_compress;
1471                    tmp2_idx = 2 * coef_compress + coef_res;
1472
1473                    for (i = 0; i < tns->order[w][filt]; i++)
1474                        tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1475                }
1476            }
1477        }
1478    }
1479    return 0;
1480}
1481
1482/**
1483 * Decode Mid/Side data; reference: table 4.54.
1484 *
1485 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1486 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1487 *                      [3] reserved for scalable AAC
1488 */
1489static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
1490                                   int ms_present)
1491{
1492    int idx;
1493    if (ms_present == 1) {
1494        for (idx = 0;
1495             idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
1496             idx++)
1497            cpe->ms_mask[idx] = get_bits1(gb);
1498    } else if (ms_present == 2) {
1499        memset(cpe->ms_mask, 1,  sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
1500    }
1501}
1502
1503#ifndef VMUL2
1504static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1505                           const float *scale)
1506{
1507    float s = *scale;
1508    *dst++ = v[idx    & 15] * s;
1509    *dst++ = v[idx>>4 & 15] * s;
1510    return dst;
1511}
1512#endif
1513
1514#ifndef VMUL4
1515static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1516                           const float *scale)
1517{
1518    float s = *scale;
1519    *dst++ = v[idx    & 3] * s;
1520    *dst++ = v[idx>>2 & 3] * s;
1521    *dst++ = v[idx>>4 & 3] * s;
1522    *dst++ = v[idx>>6 & 3] * s;
1523    return dst;
1524}
1525#endif
1526
1527#ifndef VMUL2S
1528static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1529                            unsigned sign, const float *scale)
1530{
1531    union av_intfloat32 s0, s1;
1532
1533    s0.f = s1.f = *scale;
1534    s0.i ^= sign >> 1 << 31;
1535    s1.i ^= sign      << 31;
1536
1537    *dst++ = v[idx    & 15] * s0.f;
1538    *dst++ = v[idx>>4 & 15] * s1.f;
1539
1540    return dst;
1541}
1542#endif
1543
1544#ifndef VMUL4S
1545static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1546                            unsigned sign, const float *scale)
1547{
1548    unsigned nz = idx >> 12;
1549    union av_intfloat32 s = { .f = *scale };
1550    union av_intfloat32 t;
1551
1552    t.i = s.i ^ (sign & 1U<<31);
1553    *dst++ = v[idx    & 3] * t.f;
1554
1555    sign <<= nz & 1; nz >>= 1;
1556    t.i = s.i ^ (sign & 1U<<31);
1557    *dst++ = v[idx>>2 & 3] * t.f;
1558
1559    sign <<= nz & 1; nz >>= 1;
1560    t.i = s.i ^ (sign & 1U<<31);
1561    *dst++ = v[idx>>4 & 3] * t.f;
1562
1563    sign <<= nz & 1;
1564    t.i = s.i ^ (sign & 1U<<31);
1565    *dst++ = v[idx>>6 & 3] * t.f;
1566
1567    return dst;
1568}
1569#endif
1570
1571/**
1572 * Decode spectral data; reference: table 4.50.
1573 * Dequantize and scale spectral data; reference: 4.6.3.3.
1574 *
1575 * @param   coef            array of dequantized, scaled spectral data
1576 * @param   sf              array of scalefactors or intensity stereo positions
1577 * @param   pulse_present   set if pulses are present
1578 * @param   pulse           pointer to pulse data struct
1579 * @param   band_type       array of the used band type
1580 *
1581 * @return  Returns error status. 0 - OK, !0 - error
1582 */
1583static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1584                                       GetBitContext *gb, const float sf[120],
1585                                       int pulse_present, const Pulse *pulse,
1586                                       const IndividualChannelStream *ics,
1587                                       enum BandType band_type[120])
1588{
1589    int i, k, g, idx = 0;
1590    const int c = 1024 / ics->num_windows;
1591    const uint16_t *offsets = ics->swb_offset;
1592    float *coef_base = coef;
1593
1594    for (g = 0; g < ics->num_windows; g++)
1595        memset(coef + g * 128 + offsets[ics->max_sfb], 0,
1596               sizeof(float) * (c - offsets[ics->max_sfb]));
1597
1598    for (g = 0; g < ics->num_window_groups; g++) {
1599        unsigned g_len = ics->group_len[g];
1600
1601        for (i = 0; i < ics->max_sfb; i++, idx++) {
1602            const unsigned cbt_m1 = band_type[idx] - 1;
1603            float *cfo = coef + offsets[i];
1604            int off_len = offsets[i + 1] - offsets[i];
1605            int group;
1606
1607            if (cbt_m1 >= INTENSITY_BT2 - 1) {
1608                for (group = 0; group < g_len; group++, cfo+=128) {
1609                    memset(cfo, 0, off_len * sizeof(float));
1610                }
1611            } else if (cbt_m1 == NOISE_BT - 1) {
1612                for (group = 0; group < g_len; group++, cfo+=128) {
1613                    float scale;
1614                    float band_energy;
1615
1616                    for (k = 0; k < off_len; k++) {
1617                        ac->random_state  = lcg_random(ac->random_state);
1618                        cfo[k] = ac->random_state;
1619                    }
1620
1621                    band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
1622                    scale = sf[idx] / sqrtf(band_energy);
1623                    ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1624                }
1625            } else {
1626                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1627                const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1628                VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1629                OPEN_READER(re, gb);
1630
1631                switch (cbt_m1 >> 1) {
1632                case 0:
1633                    for (group = 0; group < g_len; group++, cfo+=128) {
1634                        float *cf = cfo;
1635                        int len = off_len;
1636
1637                        do {
1638                            int code;
1639                            unsigned cb_idx;
1640
1641                            UPDATE_CACHE(re, gb);
1642                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
1643                            cb_idx = cb_vector_idx[code];
1644                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
1645                        } while (len -= 4);
1646                    }
1647                    break;
1648
1649                case 1:
1650                    for (group = 0; group < g_len; group++, cfo+=128) {
1651                        float *cf = cfo;
1652                        int len = off_len;
1653
1654                        do {
1655                            int code;
1656                            unsigned nnz;
1657                            unsigned cb_idx;
1658                            uint32_t bits;
1659
1660                            UPDATE_CACHE(re, gb);
1661                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
1662                            cb_idx = cb_vector_idx[code];
1663                            nnz = cb_idx >> 8 & 15;
1664                            bits = nnz ? GET_CACHE(re, gb) : 0;
1665                            LAST_SKIP_BITS(re, gb, nnz);
1666                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1667                        } while (len -= 4);
1668                    }
1669                    break;
1670
1671                case 2:
1672                    for (group = 0; group < g_len; group++, cfo+=128) {
1673                        float *cf = cfo;
1674                        int len = off_len;
1675
1676                        do {
1677                            int code;
1678                            unsigned cb_idx;
1679
1680                            UPDATE_CACHE(re, gb);
1681                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
1682                            cb_idx = cb_vector_idx[code];
1683                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
1684                        } while (len -= 2);
1685                    }
1686                    break;
1687
1688                case 3:
1689                case 4:
1690                    for (group = 0; group < g_len; group++, cfo+=128) {
1691                        float *cf = cfo;
1692                        int len = off_len;
1693
1694                        do {
1695                            int code;
1696                            unsigned nnz;
1697                            unsigned cb_idx;
1698                            unsigned sign;
1699
1700                            UPDATE_CACHE(re, gb);
1701                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
1702                            cb_idx = cb_vector_idx[code];
1703                            nnz = cb_idx >> 8 & 15;
1704                            sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1705                            LAST_SKIP_BITS(re, gb, nnz);
1706                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1707                        } while (len -= 2);
1708                    }
1709                    break;
1710
1711                default:
1712                    for (group = 0; group < g_len; group++, cfo+=128) {
1713                        float *cf = cfo;
1714                        uint32_t *icf = (uint32_t *) cf;
1715                        int len = off_len;
1716
1717                        do {
1718                            int code;
1719                            unsigned nzt, nnz;
1720                            unsigned cb_idx;
1721                            uint32_t bits;
1722                            int j;
1723
1724                            UPDATE_CACHE(re, gb);
1725                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
1726
1727                            if (!code) {
1728                                *icf++ = 0;
1729                                *icf++ = 0;
1730                                continue;
1731                            }
1732
1733                            cb_idx = cb_vector_idx[code];
1734                            nnz = cb_idx >> 12;
1735                            nzt = cb_idx >> 8;
1736                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1737                            LAST_SKIP_BITS(re, gb, nnz);
1738
1739                            for (j = 0; j < 2; j++) {
1740                                if (nzt & 1<<j) {
1741                                    uint32_t b;
1742                                    int n;
1743                                    /* The total length of escape_sequence must be < 22 bits according
1744                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1745                                    UPDATE_CACHE(re, gb);
1746                                    b = GET_CACHE(re, gb);
1747                                    b = 31 - av_log2(~b);
1748
1749                                    if (b > 8) {
1750                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1751                                        return AVERROR_INVALIDDATA;
1752                                    }
1753
1754                                    SKIP_BITS(re, gb, b + 1);
1755                                    b += 4;
1756                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
1757                                    LAST_SKIP_BITS(re, gb, b);
1758                                    *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1759                                    bits <<= 1;
1760                                } else {
1761                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1762                                    *icf++ = (bits & 1U<<31) | v;
1763                                    bits <<= !!v;
1764                                }
1765                                cb_idx >>= 4;
1766                            }
1767                        } while (len -= 2);
1768
1769                        ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1770                    }
1771                }
1772
1773                CLOSE_READER(re, gb);
1774            }
1775        }
1776        coef += g_len << 7;
1777    }
1778
1779    if (pulse_present) {
1780        idx = 0;
1781        for (i = 0; i < pulse->num_pulse; i++) {
1782            float co = coef_base[ pulse->pos[i] ];
1783            while (offsets[idx + 1] <= pulse->pos[i])
1784                idx++;
1785            if (band_type[idx] != NOISE_BT && sf[idx]) {
1786                float ico = -pulse->amp[i];
1787                if (co) {
1788                    co /= sf[idx];
1789                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1790                }
1791                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1792            }
1793        }
1794    }
1795    return 0;
1796}
1797
1798static av_always_inline float flt16_round(float pf)
1799{
1800    union av_intfloat32 tmp;
1801    tmp.f = pf;
1802    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1803    return tmp.f;
1804}
1805
1806static av_always_inline float flt16_even(float pf)
1807{
1808    union av_intfloat32 tmp;
1809    tmp.f = pf;
1810    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1811    return tmp.f;
1812}
1813
1814static av_always_inline float flt16_trunc(float pf)
1815{
1816    union av_intfloat32 pun;
1817    pun.f = pf;
1818    pun.i &= 0xFFFF0000U;
1819    return pun.f;
1820}
1821
1822static av_always_inline void predict(PredictorState *ps, float *coef,
1823                                     int output_enable)
1824{
1825    const float a     = 0.953125; // 61.0 / 64
1826    const float alpha = 0.90625;  // 29.0 / 32
1827    float e0, e1;
1828    float pv;
1829    float k1, k2;
1830    float   r0 = ps->r0,     r1 = ps->r1;
1831    float cor0 = ps->cor0, cor1 = ps->cor1;
1832    float var0 = ps->var0, var1 = ps->var1;
1833
1834    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1835    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1836
1837    pv = flt16_round(k1 * r0 + k2 * r1);
1838    if (output_enable)
1839        *coef += pv;
1840
1841    e0 = *coef;
1842    e1 = e0 - k1 * r0;
1843
1844    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1845    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1846    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1847    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1848
1849    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1850    ps->r0 = flt16_trunc(a * e0);
1851}
1852
1853/**
1854 * Apply AAC-Main style frequency domain prediction.
1855 */
1856static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1857{
1858    int sfb, k;
1859
1860    if (!sce->ics.predictor_initialized) {
1861        reset_all_predictors(sce->predictor_state);
1862        sce->ics.predictor_initialized = 1;
1863    }
1864
1865    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1866        for (sfb = 0;
1867             sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
1868             sfb++) {
1869            for (k = sce->ics.swb_offset[sfb];
1870                 k < sce->ics.swb_offset[sfb + 1];
1871                 k++) {
1872                predict(&sce->predictor_state[k], &sce->coeffs[k],
1873                        sce->ics.predictor_present &&
1874                        sce->ics.prediction_used[sfb]);
1875            }
1876        }
1877        if (sce->ics.predictor_reset_group)
1878            reset_predictor_group(sce->predictor_state,
1879                                  sce->ics.predictor_reset_group);
1880    } else
1881        reset_all_predictors(sce->predictor_state);
1882}
1883
1884/**
1885 * Decode an individual_channel_stream payload; reference: table 4.44.
1886 *
1887 * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1888 * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1889 *
1890 * @return  Returns error status. 0 - OK, !0 - error
1891 */
1892static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1893                      GetBitContext *gb, int common_window, int scale_flag)
1894{
1895    Pulse pulse;
1896    TemporalNoiseShaping    *tns = &sce->tns;
1897    IndividualChannelStream *ics = &sce->ics;
1898    float *out = sce->coeffs;
1899    int global_gain, eld_syntax, er_syntax, pulse_present = 0;
1900    int ret;
1901
1902    eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
1903    er_syntax  = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC ||
1904                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP ||
1905                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD ||
1906                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
1907
1908    /* This assignment is to silence a GCC warning about the variable being used
1909     * uninitialized when in fact it always is.
1910     */
1911    pulse.num_pulse = 0;
1912
1913    global_gain = get_bits(gb, 8);
1914
1915    if (!common_window && !scale_flag) {
1916        if (decode_ics_info(ac, ics, gb) < 0)
1917            return AVERROR_INVALIDDATA;
1918    }
1919
1920    if ((ret = decode_band_types(ac, sce->band_type,
1921                                 sce->band_type_run_end, gb, ics)) < 0)
1922        return ret;
1923    if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
1924                                  sce->band_type, sce->band_type_run_end)) < 0)
1925        return ret;
1926
1927    pulse_present = 0;
1928    if (!scale_flag) {
1929        if (!eld_syntax && (pulse_present = get_bits1(gb))) {
1930            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1931                av_log(ac->avctx, AV_LOG_ERROR,
1932                       "Pulse tool not allowed in eight short sequence.\n");
1933                return AVERROR_INVALIDDATA;
1934            }
1935            if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1936                av_log(ac->avctx, AV_LOG_ERROR,
1937                       "Pulse data corrupt or invalid.\n");
1938                return AVERROR_INVALIDDATA;
1939            }
1940        }
1941        tns->present = get_bits1(gb);
1942        if (tns->present && !er_syntax)
1943            if (decode_tns(ac, tns, gb, ics) < 0)
1944                return AVERROR_INVALIDDATA;
1945        if (!eld_syntax && get_bits1(gb)) {
1946            avpriv_request_sample(ac->avctx, "SSR");
1947            return AVERROR_PATCHWELCOME;
1948        }
1949        // I see no textual basis in the spec for this occurring after SSR gain
1950        // control, but this is what both reference and real implmentations do
1951        if (tns->present && er_syntax)
1952            if (decode_tns(ac, tns, gb, ics) < 0)
1953                return AVERROR_INVALIDDATA;
1954    }
1955
1956    if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
1957                                    &pulse, ics, sce->band_type) < 0)
1958        return AVERROR_INVALIDDATA;
1959
1960    if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1961        apply_prediction(ac, sce);
1962
1963    return 0;
1964}
1965
1966/**
1967 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1968 */
1969static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1970{
1971    const IndividualChannelStream *ics = &cpe->ch[0].ics;
1972    float *ch0 = cpe->ch[0].coeffs;
1973    float *ch1 = cpe->ch[1].coeffs;
1974    int g, i, group, idx = 0;
1975    const uint16_t *offsets = ics->swb_offset;
1976    for (g = 0; g < ics->num_window_groups; g++) {
1977        for (i = 0; i < ics->max_sfb; i++, idx++) {
1978            if (cpe->ms_mask[idx] &&
1979                cpe->ch[0].band_type[idx] < NOISE_BT &&
1980                cpe->ch[1].band_type[idx] < NOISE_BT) {
1981                for (group = 0; group < ics->group_len[g]; group++) {
1982                    ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i],
1983                                               ch1 + group * 128 + offsets[i],
1984                                               offsets[i+1] - offsets[i]);
1985                }
1986            }
1987        }
1988        ch0 += ics->group_len[g] * 128;
1989        ch1 += ics->group_len[g] * 128;
1990    }
1991}
1992
1993/**
1994 * intensity stereo decoding; reference: 4.6.8.2.3
1995 *
1996 * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1997 *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1998 *                      [3] reserved for scalable AAC
1999 */
2000static void apply_intensity_stereo(AACContext *ac,
2001                                   ChannelElement *cpe, int ms_present)
2002{
2003    const IndividualChannelStream *ics = &cpe->ch[1].ics;
2004    SingleChannelElement         *sce1 = &cpe->ch[1];
2005    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
2006    const uint16_t *offsets = ics->swb_offset;
2007    int g, group, i, idx = 0;
2008    int c;
2009    float scale;
2010    for (g = 0; g < ics->num_window_groups; g++) {
2011        for (i = 0; i < ics->max_sfb;) {
2012            if (sce1->band_type[idx] == INTENSITY_BT ||
2013                sce1->band_type[idx] == INTENSITY_BT2) {
2014                const int bt_run_end = sce1->band_type_run_end[idx];
2015                for (; i < bt_run_end; i++, idx++) {
2016                    c = -1 + 2 * (sce1->band_type[idx] - 14);
2017                    if (ms_present)
2018                        c *= 1 - 2 * cpe->ms_mask[idx];
2019                    scale = c * sce1->sf[idx];
2020                    for (group = 0; group < ics->group_len[g]; group++)
2021                        ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
2022                                                    coef0 + group * 128 + offsets[i],
2023                                                    scale,
2024                                                    offsets[i + 1] - offsets[i]);
2025                }
2026            } else {
2027                int bt_run_end = sce1->band_type_run_end[idx];
2028                idx += bt_run_end - i;
2029                i    = bt_run_end;
2030            }
2031        }
2032        coef0 += ics->group_len[g] * 128;
2033        coef1 += ics->group_len[g] * 128;
2034    }
2035}
2036
2037/**
2038 * Decode a channel_pair_element; reference: table 4.4.
2039 *
2040 * @return  Returns error status. 0 - OK, !0 - error
2041 */
2042static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
2043{
2044    int i, ret, common_window, ms_present = 0;
2045    int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
2046
2047    common_window = eld_syntax || get_bits1(gb);
2048    if (common_window) {
2049        if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
2050            return AVERROR_INVALIDDATA;
2051        i = cpe->ch[1].ics.use_kb_window[0];
2052        cpe->ch[1].ics = cpe->ch[0].ics;
2053        cpe->ch[1].ics.use_kb_window[1] = i;
2054        if (cpe->ch[1].ics.predictor_present &&
2055            (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
2056            if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
2057                decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
2058        ms_present = get_bits(gb, 2);
2059        if (ms_present == 3) {
2060            av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
2061            return AVERROR_INVALIDDATA;
2062        } else if (ms_present)
2063            decode_mid_side_stereo(cpe, gb, ms_present);
2064    }
2065    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
2066        return ret;
2067    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
2068        return ret;
2069
2070    if (common_window) {
2071        if (ms_present)
2072            apply_mid_side_stereo(ac, cpe);
2073        if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
2074            apply_prediction(ac, &cpe->ch[0]);
2075            apply_prediction(ac, &cpe->ch[1]);
2076        }
2077    }
2078
2079    apply_intensity_stereo(ac, cpe, ms_present);
2080    return 0;
2081}
2082
2083static const float cce_scale[] = {
2084    1.09050773266525765921, //2^(1/8)
2085    1.18920711500272106672, //2^(1/4)
2086    M_SQRT2,
2087    2,
2088};
2089
2090/**
2091 * Decode coupling_channel_element; reference: table 4.8.
2092 *
2093 * @return  Returns error status. 0 - OK, !0 - error
2094 */
2095static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
2096{
2097    int num_gain = 0;
2098    int c, g, sfb, ret;
2099    int sign;
2100    float scale;
2101    SingleChannelElement *sce = &che->ch[0];
2102    ChannelCoupling     *coup = &che->coup;
2103
2104    coup->coupling_point = 2 * get_bits1(gb);
2105    coup->num_coupled = get_bits(gb, 3);
2106    for (c = 0; c <= coup->num_coupled; c++) {
2107        num_gain++;
2108        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
2109        coup->id_select[c] = get_bits(gb, 4);
2110        if (coup->type[c] == TYPE_CPE) {
2111            coup->ch_select[c] = get_bits(gb, 2);
2112            if (coup->ch_select[c] == 3)
2113                num_gain++;
2114        } else
2115            coup->ch_select[c] = 2;
2116    }
2117    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
2118
2119    sign  = get_bits(gb, 1);
2120    scale = cce_scale[get_bits(gb, 2)];
2121
2122    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
2123        return ret;
2124
2125    for (c = 0; c < num_gain; c++) {
2126        int idx  = 0;
2127        int cge  = 1;
2128        int gain = 0;
2129        float gain_cache = 1.0;
2130        if (c) {
2131            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
2132            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
2133            gain_cache = powf(scale, -gain);
2134        }
2135        if (coup->coupling_point == AFTER_IMDCT) {
2136            coup->gain[c][0] = gain_cache;
2137        } else {
2138            for (g = 0; g < sce->ics.num_window_groups; g++) {
2139                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
2140                    if (sce->band_type[idx] != ZERO_BT) {
2141                        if (!cge) {
2142                            int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
2143                            if (t) {
2144                                int s = 1;
2145                                t = gain += t;
2146                                if (sign) {
2147                                    s  -= 2 * (t & 0x1);
2148                                    t >>= 1;
2149                                }
2150                                gain_cache = powf(scale, -t) * s;
2151                            }
2152                        }
2153                        coup->gain[c][idx] = gain_cache;
2154                    }
2155                }
2156            }
2157        }
2158    }
2159    return 0;
2160}
2161
2162/**
2163 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
2164 *
2165 * @return  Returns number of bytes consumed.
2166 */
2167static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
2168                                         GetBitContext *gb)
2169{
2170    int i;
2171    int num_excl_chan = 0;
2172
2173    do {
2174        for (i = 0; i < 7; i++)
2175            che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
2176    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
2177
2178    return num_excl_chan / 7;
2179}
2180
2181/**
2182 * Decode dynamic range information; reference: table 4.52.
2183 *
2184 * @return  Returns number of bytes consumed.
2185 */
2186static int decode_dynamic_range(DynamicRangeControl *che_drc,
2187                                GetBitContext *gb)
2188{
2189    int n             = 1;
2190    int drc_num_bands = 1;
2191    int i;
2192
2193    /* pce_tag_present? */
2194    if (get_bits1(gb)) {
2195        che_drc->pce_instance_tag  = get_bits(gb, 4);
2196        skip_bits(gb, 4); // tag_reserved_bits
2197        n++;
2198    }
2199
2200    /* excluded_chns_present? */
2201    if (get_bits1(gb)) {
2202        n += decode_drc_channel_exclusions(che_drc, gb);
2203    }
2204
2205    /* drc_bands_present? */
2206    if (get_bits1(gb)) {
2207        che_drc->band_incr            = get_bits(gb, 4);
2208        che_drc->interpolation_scheme = get_bits(gb, 4);
2209        n++;
2210        drc_num_bands += che_drc->band_incr;
2211        for (i = 0; i < drc_num_bands; i++) {
2212            che_drc->band_top[i] = get_bits(gb, 8);
2213            n++;
2214        }
2215    }
2216
2217    /* prog_ref_level_present? */
2218    if (get_bits1(gb)) {
2219        che_drc->prog_ref_level = get_bits(gb, 7);
2220        skip_bits1(gb); // prog_ref_level_reserved_bits
2221        n++;
2222    }
2223
2224    for (i = 0; i < drc_num_bands; i++) {
2225        che_drc->dyn_rng_sgn[i] = get_bits1(gb);
2226        che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
2227        n++;
2228    }
2229
2230    return n;
2231}
2232
2233static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
2234    uint8_t buf[256];
2235    int i, major, minor;
2236
2237    if (len < 13+7*8)
2238        goto unknown;
2239
2240    get_bits(gb, 13); len -= 13;
2241
2242    for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
2243        buf[i] = get_bits(gb, 8);
2244
2245    buf[i] = 0;
2246    if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
2247        av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2248
2249    if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2250        ac->avctx->internal->skip_samples = 1024;
2251    }
2252
2253unknown:
2254    skip_bits_long(gb, len);
2255
2256    return 0;
2257}
2258
2259/**
2260 * Decode extension data (incomplete); reference: table 4.51.
2261 *
2262 * @param   cnt length of TYPE_FIL syntactic element in bytes
2263 *
2264 * @return Returns number of bytes consumed
2265 */
2266static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
2267                                    ChannelElement *che, enum RawDataBlockType elem_type)
2268{
2269    int crc_flag = 0;
2270    int res = cnt;
2271    switch (get_bits(gb, 4)) { // extension type
2272    case EXT_SBR_DATA_CRC:
2273        crc_flag++;
2274    case EXT_SBR_DATA:
2275        if (!che) {
2276            av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2277            return res;
2278        } else if (!ac->oc[1].m4ac.sbr) {
2279            av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2280            skip_bits_long(gb, 8 * cnt - 4);
2281            return res;
2282        } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2283            av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2284            skip_bits_long(gb, 8 * cnt - 4);
2285            return res;
2286        } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
2287            ac->oc[1].m4ac.sbr = 1;
2288            ac->oc[1].m4ac.ps = 1;
2289            ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
2290            output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2291                             ac->oc[1].status, 1);
2292        } else {
2293            ac->oc[1].m4ac.sbr = 1;
2294            ac->avctx->profile = FF_PROFILE_AAC_HE;
2295        }
2296        res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2297        break;
2298    case EXT_DYNAMIC_RANGE:
2299        res = decode_dynamic_range(&ac->che_drc, gb);
2300        break;
2301    case EXT_FILL:
2302        decode_fill(ac, gb, 8 * cnt - 4);
2303        break;
2304    case EXT_FILL_DATA:
2305    case EXT_DATA_ELEMENT:
2306    default:
2307        skip_bits_long(gb, 8 * cnt - 4);
2308        break;
2309    };
2310    return res;
2311}
2312
2313/**
2314 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2315 *
2316 * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
2317 * @param   coef    spectral coefficients
2318 */
2319static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
2320                      IndividualChannelStream *ics, int decode)
2321{
2322    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2323    int w, filt, m, i;
2324    int bottom, top, order, start, end, size, inc;
2325    float lpc[TNS_MAX_ORDER];
2326    float tmp[TNS_MAX_ORDER+1];
2327
2328    for (w = 0; w < ics->num_windows; w++) {
2329        bottom = ics->num_swb;
2330        for (filt = 0; filt < tns->n_filt[w]; filt++) {
2331            top    = bottom;
2332            bottom = FFMAX(0, top - tns->length[w][filt]);
2333            order  = tns->order[w][filt];
2334            if (order == 0)
2335                continue;
2336
2337            // tns_decode_coef
2338            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
2339
2340            start = ics->swb_offset[FFMIN(bottom, mmm)];
2341            end   = ics->swb_offset[FFMIN(   top, mmm)];
2342            if ((size = end - start) <= 0)
2343                continue;
2344            if (tns->direction[w][filt]) {
2345                inc = -1;
2346                start = end - 1;
2347            } else {
2348                inc = 1;
2349            }
2350            start += w * 128;
2351
2352            if (decode) {
2353                // ar filter
2354                for (m = 0; m < size; m++, start += inc)
2355                    for (i = 1; i <= FFMIN(m, order); i++)
2356                        coef[start] -= coef[start - i * inc] * lpc[i - 1];
2357            } else {
2358                // ma filter
2359                for (m = 0; m < size; m++, start += inc) {
2360                    tmp[0] = coef[start];
2361                    for (i = 1; i <= FFMIN(m, order); i++)
2362                        coef[start] += tmp[i] * lpc[i - 1];
2363                    for (i = order; i > 0; i--)
2364                        tmp[i] = tmp[i - 1];
2365                }
2366            }
2367        }
2368    }
2369}
2370
2371/**
2372 *  Apply windowing and MDCT to obtain the spectral
2373 *  coefficient from the predicted sample by LTP.
2374 */
2375static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2376                                   float *in, IndividualChannelStream *ics)
2377{
2378    const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2379    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2380    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2381    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2382
2383    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2384        ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2385    } else {
2386        memset(in, 0, 448 * sizeof(float));
2387        ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2388    }
2389    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2390        ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2391    } else {
2392        ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2393        memset(in + 1024 + 576, 0, 448 * sizeof(float));
2394    }
2395    ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2396}
2397
2398/**
2399 * Apply the long term prediction
2400 */
2401static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
2402{
2403    const LongTermPrediction *ltp = &sce->ics.ltp;
2404    const uint16_t *offsets = sce->ics.swb_offset;
2405    int i, sfb;
2406
2407    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2408        float *predTime = sce->ret;
2409        float *predFreq = ac->buf_mdct;
2410        int16_t num_samples = 2048;
2411
2412        if (ltp->lag < 1024)
2413            num_samples = ltp->lag + 1024;
2414        for (i = 0; i < num_samples; i++)
2415            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2416        memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2417
2418        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2419
2420        if (sce->tns.present)
2421            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2422
2423        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2424            if (ltp->used[sfb])
2425                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2426                    sce->coeffs[i] += predFreq[i];
2427    }
2428}
2429
2430/**
2431 * Update the LTP buffer for next frame
2432 */
2433static void update_ltp(AACContext *ac, SingleChannelElement *sce)
2434{
2435    IndividualChannelStream *ics = &sce->ics;
2436    float *saved     = sce->saved;
2437    float *saved_ltp = sce->coeffs;
2438    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2439    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2440    int i;
2441
2442    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2443        memcpy(saved_ltp,       saved, 512 * sizeof(float));
2444        memset(saved_ltp + 576, 0,     448 * sizeof(float));
2445        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2446        for (i = 0; i < 64; i++)
2447            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2448    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2449        memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
2450        memset(saved_ltp + 576, 0,                  448 * sizeof(float));
2451        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2452        for (i = 0; i < 64; i++)
2453            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2454    } else { // LONG_STOP or ONLY_LONG
2455        ac->fdsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
2456        for (i = 0; i < 512; i++)
2457            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2458    }
2459
2460    memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2461    memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
2462    memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
2463}
2464
2465/**
2466 * Conduct IMDCT and windowing.
2467 */
2468static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
2469{
2470    IndividualChannelStream *ics = &sce->ics;
2471    float *in    = sce->coeffs;
2472    float *out   = sce->ret;
2473    float *saved = sce->saved;
2474    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2475    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2476    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2477    float *buf  = ac->buf_mdct;
2478    float *temp = ac->temp;
2479    int i;
2480
2481    // imdct
2482    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2483        for (i = 0; i < 1024; i += 128)
2484            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2485    } else
2486        ac->mdct.imdct_half(&ac->mdct, buf, in);
2487
2488    /* window overlapping
2489     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2490     * and long to short transitions are considered to be short to short
2491     * transitions. This leaves just two cases (long to long and short to short)
2492     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2493     */
2494    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2495            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2496        ac->fdsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
2497    } else {
2498        memcpy(                         out,               saved,            448 * sizeof(float));
2499
2500        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2501            ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
2502            ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
2503            ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
2504            ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
2505            ac->fdsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
2506            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(float));
2507        } else {
2508            ac->fdsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
2509            memcpy(                     out + 576,         buf + 64,         448 * sizeof(float));
2510        }
2511    }
2512
2513    // buffer update
2514    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2515        memcpy(                     saved,       temp + 64,         64 * sizeof(float));
2516        ac->fdsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
2517        ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2518        ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2519        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2520    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2521        memcpy(                     saved,       buf + 512,        448 * sizeof(float));
2522        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2523    } else { // LONG_STOP or ONLY_LONG
2524        memcpy(                     saved,       buf + 512,        512 * sizeof(float));
2525    }
2526}
2527
2528static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
2529{
2530    IndividualChannelStream *ics = &sce->ics;
2531    float *in    = sce->coeffs;
2532    float *out   = sce->ret;
2533    float *saved = sce->saved;
2534    float *buf  = ac->buf_mdct;
2535
2536    // imdct
2537    ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
2538
2539    // window overlapping
2540    if (ics->use_kb_window[1]) {
2541        // AAC LD uses a low overlap sine window instead of a KBD window
2542        memcpy(out, saved, 192 * sizeof(float));
2543        ac->fdsp.vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64);
2544        memcpy(                     out + 320, buf + 64, 192 * sizeof(float));
2545    } else {
2546        ac->fdsp.vector_fmul_window(out, saved, buf, ff_sine_512, 256);
2547    }
2548
2549    // buffer update
2550    memcpy(saved, buf + 256, 256 * sizeof(float));
2551}
2552
2553static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
2554{
2555    float *in    = sce->coeffs;
2556    float *out   = sce->ret;
2557    float *saved = sce->saved;
2558    const float *const window = ff_aac_eld_window;
2559    float *buf  = ac->buf_mdct;
2560    int i;
2561    const int n  = 512;
2562    const int n2 = n >> 1;
2563    const int n4 = n >> 2;
2564
2565    // Inverse transform, mapped to the conventional IMDCT by
2566    // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
2567    // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
2568    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
2569    // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
2570    for (i = 0; i < n2; i+=2) {
2571        float temp;
2572        temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
2573        temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
2574    }
2575    ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
2576    for (i = 0; i < n; i+=2) {
2577        buf[i] = -buf[i];
2578    }
2579    // Like with the regular IMDCT at this point we still have the middle half
2580    // of a transform but with even symmetry on the left and odd symmetry on
2581    // the right
2582
2583    // window overlapping
2584    // The spec says to use samples [0..511] but the reference decoder uses
2585    // samples [128..639].
2586    for (i = n4; i < n2; i ++) {
2587        out[i - n4] =    buf[n2 - 1 - i]       * window[i       - n4] +
2588                       saved[      i + n2]     * window[i +   n - n4] +
2589                      -saved[  n + n2 - 1 - i] * window[i + 2*n - n4] +
2590                      -saved[2*n + n2 + i]     * window[i + 3*n - n4];
2591    }
2592    for (i = 0; i < n2; i ++) {
2593        out[n4 + i] =    buf[i]               * window[i + n2       - n4] +
2594                      -saved[      n - 1 - i] * window[i + n2 +   n - n4] +
2595                      -saved[  n + i]         * window[i + n2 + 2*n - n4] +
2596                       saved[2*n + n - 1 - i] * window[i + n2 + 3*n - n4];
2597    }
2598    for (i = 0; i < n4; i ++) {
2599        out[n2 + n4 + i] =    buf[      i + n2]     * window[i +   n - n4] +
2600                           -saved[      n2 - 1 - i] * window[i + 2*n - n4] +
2601                           -saved[  n + n2 + i]     * window[i + 3*n - n4];
2602    }
2603
2604    // buffer update
2605    memmove(saved + n, saved, 2 * n * sizeof(float));
2606    memcpy( saved,       buf,     n * sizeof(float));
2607}
2608
2609/**
2610 * Apply dependent channel coupling (applied before IMDCT).
2611 *
2612 * @param   index   index into coupling gain array
2613 */
2614static void apply_dependent_coupling(AACContext *ac,
2615                                     SingleChannelElement *target,
2616                                     ChannelElement *cce, int index)
2617{
2618    IndividualChannelStream *ics = &cce->ch[0].ics;
2619    const uint16_t *offsets = ics->swb_offset;
2620    float *dest = target->coeffs;
2621    const float *src = cce->ch[0].coeffs;
2622    int g, i, group, k, idx = 0;
2623    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2624        av_log(ac->avctx, AV_LOG_ERROR,
2625               "Dependent coupling is not supported together with LTP\n");
2626        return;
2627    }
2628    for (g = 0; g < ics->num_window_groups; g++) {
2629        for (i = 0; i < ics->max_sfb; i++, idx++) {
2630            if (cce->ch[0].band_type[idx] != ZERO_BT) {
2631                const float gain = cce->coup.gain[index][idx];
2632                for (group = 0; group < ics->group_len[g]; group++) {
2633                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
2634                        // FIXME: SIMDify
2635                        dest[group * 128 + k] += gain * src[group * 128 + k];
2636                    }
2637                }
2638            }
2639        }
2640        dest += ics->group_len[g] * 128;
2641        src  += ics->group_len[g] * 128;
2642    }
2643}
2644
2645/**
2646 * Apply independent channel coupling (applied after IMDCT).
2647 *
2648 * @param   index   index into coupling gain array
2649 */
2650static void apply_independent_coupling(AACContext *ac,
2651                                       SingleChannelElement *target,
2652                                       ChannelElement *cce, int index)
2653{
2654    int i;
2655    const float gain = cce->coup.gain[index][0];
2656    const float *src = cce->ch[0].ret;
2657    float *dest = target->ret;
2658    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2659
2660    for (i = 0; i < len; i++)
2661        dest[i] += gain * src[i];
2662}
2663
2664/**
2665 * channel coupling transformation interface
2666 *
2667 * @param   apply_coupling_method   pointer to (in)dependent coupling function
2668 */
2669static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
2670                                   enum RawDataBlockType type, int elem_id,
2671                                   enum CouplingPoint coupling_point,
2672                                   void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2673{
2674    int i, c;
2675
2676    for (i = 0; i < MAX_ELEM_ID; i++) {
2677        ChannelElement *cce = ac->che[TYPE_CCE][i];
2678        int index = 0;
2679
2680        if (cce && cce->coup.coupling_point == coupling_point) {
2681            ChannelCoupling *coup = &cce->coup;
2682
2683            for (c = 0; c <= coup->num_coupled; c++) {
2684                if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2685                    if (coup->ch_select[c] != 1) {
2686                        apply_coupling_method(ac, &cc->ch[0], cce, index);
2687                        if (coup->ch_select[c] != 0)
2688                            index++;
2689                    }
2690                    if (coup->ch_select[c] != 2)
2691                        apply_coupling_method(ac, &cc->ch[1], cce, index++);
2692                } else
2693                    index += 1 + (coup->ch_select[c] == 3);
2694            }
2695        }
2696    }
2697}
2698
2699/**
2700 * Convert spectral data to float samples, applying all supported tools as appropriate.
2701 */
2702static void spectral_to_sample(AACContext *ac)
2703{
2704    int i, type;
2705    void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce);
2706    switch (ac->oc[1].m4ac.object_type) {
2707    case AOT_ER_AAC_LD:
2708        imdct_and_window = imdct_and_windowing_ld;
2709        break;
2710    case AOT_ER_AAC_ELD:
2711        imdct_and_window = imdct_and_windowing_eld;
2712        break;
2713    default:
2714        imdct_and_window = ac->imdct_and_windowing;
2715    }
2716    for (type = 3; type >= 0; type--) {
2717        for (i = 0; i < MAX_ELEM_ID; i++) {
2718            ChannelElement *che = ac->che[type][i];
2719            if (che) {
2720                if (type <= TYPE_CPE)
2721                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
2722                if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2723                    if (che->ch[0].ics.predictor_present) {
2724                        if (che->ch[0].ics.ltp.present)
2725                            ac->apply_ltp(ac, &che->ch[0]);
2726                        if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2727                            ac->apply_ltp(ac, &che->ch[1]);
2728                    }
2729                }
2730                if (che->ch[0].tns.present)
2731                    ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2732                if (che->ch[1].tns.present)
2733                    ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2734                if (type <= TYPE_CPE)
2735                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2736                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2737                    imdct_and_window(ac, &che->ch[0]);
2738                    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2739                        ac->update_ltp(ac, &che->ch[0]);
2740                    if (type == TYPE_CPE) {
2741                        imdct_and_window(ac, &che->ch[1]);
2742                        if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2743                            ac->update_ltp(ac, &che->ch[1]);
2744                    }
2745                    if (ac->oc[1].m4ac.sbr > 0) {
2746                        ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2747                    }
2748                }
2749                if (type <= TYPE_CCE)
2750                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2751            }
2752        }
2753    }
2754}
2755
2756static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2757{
2758    int size;
2759    AACADTSHeaderInfo hdr_info;
2760    uint8_t layout_map[MAX_ELEM_ID*4][3];
2761    int layout_map_tags, ret;
2762
2763    size = avpriv_aac_parse_header(gb, &hdr_info);
2764    if (size > 0) {
2765        if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
2766            // This is 2 for "VLB " audio in NSV files.
2767            // See samples/nsv/vlb_audio.
2768            avpriv_report_missing_feature(ac->avctx,
2769                                          "More than one AAC RDB per ADTS frame");
2770            ac->warned_num_aac_frames = 1;
2771        }
2772        push_output_configuration(ac);
2773        if (hdr_info.chan_config) {
2774            ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2775            if ((ret = set_default_channel_config(ac->avctx,
2776                                                  layout_map,
2777                                                  &layout_map_tags,
2778                                                  hdr_info.chan_config)) < 0)
2779                return ret;
2780            if ((ret = output_configure(ac, layout_map, layout_map_tags,
2781                                        FFMAX(ac->oc[1].status,
2782                                              OC_TRIAL_FRAME), 0)) < 0)
2783                return ret;
2784        } else {
2785            ac->oc[1].m4ac.chan_config = 0;
2786            /**
2787             * dual mono frames in Japanese DTV can have chan_config 0
2788             * WITHOUT specifying PCE.
2789             *  thus, set dual mono as default.
2790             */
2791            if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
2792                layout_map_tags = 2;
2793                layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
2794                layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
2795                layout_map[0][1] = 0;
2796                layout_map[1][1] = 1;
2797                if (output_configure(ac, layout_map, layout_map_tags,
2798                                     OC_TRIAL_FRAME, 0))
2799                    return -7;
2800            }
2801        }
2802        ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
2803        ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
2804        ac->oc[1].m4ac.object_type     = hdr_info.object_type;
2805        if (ac->oc[0].status != OC_LOCKED ||
2806            ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2807            ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2808            ac->oc[1].m4ac.sbr = -1;
2809            ac->oc[1].m4ac.ps  = -1;
2810        }
2811        if (!hdr_info.crc_absent)
2812            skip_bits(gb, 16);
2813    }
2814    return size;
2815}
2816
2817static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
2818                               int *got_frame_ptr, GetBitContext *gb)
2819{
2820    AACContext *ac = avctx->priv_data;
2821    ChannelElement *che;
2822    int err, i;
2823    int samples = 1024;
2824    int chan_config = ac->oc[1].m4ac.chan_config;
2825    int aot = ac->oc[1].m4ac.object_type;
2826
2827    if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD)
2828        samples >>= 1;
2829
2830    ac->frame = data;
2831
2832    if ((err = frame_configure_elements(avctx)) < 0)
2833        return err;
2834
2835    // The FF_PROFILE_AAC_* defines are all object_type - 1
2836    // This may lead to an undefined profile being signaled
2837    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
2838
2839    ac->tags_mapped = 0;
2840
2841    if (chan_config < 0 || chan_config >= 8) {
2842        avpriv_request_sample(avctx, "Unknown ER channel configuration %d",
2843                              ac->oc[1].m4ac.chan_config);
2844        return AVERROR_INVALIDDATA;
2845    }
2846    for (i = 0; i < tags_per_config[chan_config]; i++) {
2847        const int elem_type = aac_channel_layout_map[chan_config-1][i][0];
2848        const int elem_id   = aac_channel_layout_map[chan_config-1][i][1];
2849        if (!(che=get_che(ac, elem_type, elem_id))) {
2850            av_log(ac->avctx, AV_LOG_ERROR,
2851                   "channel element %d.%d is not allocated\n",
2852                   elem_type, elem_id);
2853            return AVERROR_INVALIDDATA;
2854        }
2855        if (aot != AOT_ER_AAC_ELD)
2856            skip_bits(gb, 4);
2857        switch (elem_type) {
2858        case TYPE_SCE:
2859            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2860            break;
2861        case TYPE_CPE:
2862            err = decode_cpe(ac, gb, che);
2863            break;
2864        case TYPE_LFE:
2865            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2866            break;
2867        }
2868        if (err < 0)
2869            return err;
2870    }
2871
2872    spectral_to_sample(ac);
2873
2874    ac->frame->nb_samples = samples;
2875    ac->frame->sample_rate = avctx->sample_rate;
2876    *got_frame_ptr = 1;
2877
2878    skip_bits_long(gb, get_bits_left(gb));
2879    return 0;
2880}
2881
2882static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2883                                int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
2884{
2885    AACContext *ac = avctx->priv_data;
2886    ChannelElement *che = NULL, *che_prev = NULL;
2887    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2888    int err, elem_id;
2889    int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2890    int is_dmono, sce_count = 0;
2891
2892    ac->frame = data;
2893
2894    if (show_bits(gb, 12) == 0xfff) {
2895        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
2896            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2897            goto fail;
2898        }
2899        if (ac->oc[1].m4ac.sampling_index > 12) {
2900            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2901            err = AVERROR_INVALIDDATA;
2902            goto fail;
2903        }
2904    }
2905
2906    if ((err = frame_configure_elements(avctx)) < 0)
2907        goto fail;
2908
2909    // The FF_PROFILE_AAC_* defines are all object_type - 1
2910    // This may lead to an undefined profile being signaled
2911    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
2912
2913    ac->tags_mapped = 0;
2914    // parse
2915    while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2916        elem_id = get_bits(gb, 4);
2917
2918        if (elem_type < TYPE_DSE) {
2919            if (!(che=get_che(ac, elem_type, elem_id))) {
2920                av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2921                       elem_type, elem_id);
2922                err = AVERROR_INVALIDDATA;
2923                goto fail;
2924            }
2925            samples = 1024;
2926        }
2927
2928        switch (elem_type) {
2929
2930        case TYPE_SCE:
2931            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2932            audio_found = 1;
2933            sce_count++;
2934            break;
2935
2936        case TYPE_CPE:
2937            err = decode_cpe(ac, gb, che);
2938            audio_found = 1;
2939            break;
2940
2941        case TYPE_CCE:
2942            err = decode_cce(ac, gb, che);
2943            break;
2944
2945        case TYPE_LFE:
2946            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2947            audio_found = 1;
2948            break;
2949
2950        case TYPE_DSE:
2951            err = skip_data_stream_element(ac, gb);
2952            break;
2953
2954        case TYPE_PCE: {
2955            uint8_t layout_map[MAX_ELEM_ID*4][3];
2956            int tags;
2957            push_output_configuration(ac);
2958            tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2959            if (tags < 0) {
2960                err = tags;
2961                break;
2962            }
2963            if (pce_found) {
2964                av_log(avctx, AV_LOG_ERROR,
2965                       "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2966            } else {
2967                err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
2968                if (!err)
2969                    ac->oc[1].m4ac.chan_config = 0;
2970                pce_found = 1;
2971            }
2972            break;
2973        }
2974
2975        case TYPE_FIL:
2976            if (elem_id == 15)
2977                elem_id += get_bits(gb, 8) - 1;
2978            if (get_bits_left(gb) < 8 * elem_id) {
2979                    av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
2980                    err = AVERROR_INVALIDDATA;
2981                    goto fail;
2982            }
2983            while (elem_id > 0)
2984                elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2985            err = 0; /* FIXME */
2986            break;
2987
2988        default:
2989            err = AVERROR_BUG; /* should not happen, but keeps compiler happy */
2990            break;
2991        }
2992
2993        che_prev       = che;
2994        elem_type_prev = elem_type;
2995
2996        if (err)
2997            goto fail;
2998
2999        if (get_bits_left(gb) < 3) {
3000            av_log(avctx, AV_LOG_ERROR, overread_err);
3001            err = AVERROR_INVALIDDATA;
3002            goto fail;
3003        }
3004    }
3005
3006    spectral_to_sample(ac);
3007
3008    multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
3009    samples <<= multiplier;
3010
3011    if (ac->oc[1].status && audio_found) {
3012        avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
3013        avctx->frame_size = samples;
3014        ac->oc[1].status = OC_LOCKED;
3015    }
3016
3017    if (multiplier) {
3018        int side_size;
3019        const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
3020        if (side && side_size>=4)
3021            AV_WL32(side, 2*AV_RL32(side));
3022    }
3023
3024    *got_frame_ptr = !!samples;
3025    if (samples) {
3026        ac->frame->nb_samples = samples;
3027        ac->frame->sample_rate = avctx->sample_rate;
3028    } else
3029        av_frame_unref(ac->frame);
3030    *got_frame_ptr = !!samples;
3031
3032    /* for dual-mono audio (SCE + SCE) */
3033    is_dmono = ac->dmono_mode && sce_count == 2 &&
3034               ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT);
3035    if (is_dmono) {
3036        if (ac->dmono_mode == 1)
3037            ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
3038        else if (ac->dmono_mode == 2)
3039            ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
3040    }
3041
3042    return 0;
3043fail:
3044    pop_output_configuration(ac);
3045    return err;
3046}
3047
3048static int aac_decode_frame(AVCodecContext *avctx, void *data,
3049                            int *got_frame_ptr, AVPacket *avpkt)
3050{
3051    AACContext *ac = avctx->priv_data;
3052    const uint8_t *buf = avpkt->data;
3053    int buf_size = avpkt->size;
3054    GetBitContext gb;
3055    int buf_consumed;
3056    int buf_offset;
3057    int err;
3058    int new_extradata_size;
3059    const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
3060                                       AV_PKT_DATA_NEW_EXTRADATA,
3061                                       &new_extradata_size);
3062    int jp_dualmono_size;
3063    const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
3064                                       AV_PKT_DATA_JP_DUALMONO,
3065                                       &jp_dualmono_size);
3066
3067    if (new_extradata && 0) {
3068        av_free(avctx->extradata);
3069        avctx->extradata = av_mallocz(new_extradata_size +
3070                                      FF_INPUT_BUFFER_PADDING_SIZE);
3071        if (!avctx->extradata)
3072            return AVERROR(ENOMEM);
3073        avctx->extradata_size = new_extradata_size;
3074        memcpy(avctx->extradata, new_extradata, new_extradata_size);
3075        push_output_configuration(ac);
3076        if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
3077                                         avctx->extradata,
3078                                         avctx->extradata_size*8, 1) < 0) {
3079            pop_output_configuration(ac);
3080            return AVERROR_INVALIDDATA;
3081        }
3082    }
3083
3084    ac->dmono_mode = 0;
3085    if (jp_dualmono && jp_dualmono_size > 0)
3086        ac->dmono_mode =  1 + *jp_dualmono;
3087    if (ac->force_dmono_mode >= 0)
3088        ac->dmono_mode = ac->force_dmono_mode;
3089
3090    if (INT_MAX / 8 <= buf_size)
3091        return AVERROR_INVALIDDATA;
3092
3093    if ((err = init_get_bits(&gb, buf, buf_size * 8)) < 0)
3094        return err;
3095
3096    switch (ac->oc[1].m4ac.object_type) {
3097    case AOT_ER_AAC_LC:
3098    case AOT_ER_AAC_LTP:
3099    case AOT_ER_AAC_LD:
3100    case AOT_ER_AAC_ELD:
3101        err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb);
3102        break;
3103    default:
3104        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt);
3105    }
3106    if (err < 0)
3107        return err;
3108
3109    buf_consumed = (get_bits_count(&gb) + 7) >> 3;
3110    for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
3111        if (buf[buf_offset])
3112            break;
3113
3114    return buf_size > buf_offset ? buf_consumed : buf_size;
3115}
3116
3117static av_cold int aac_decode_close(AVCodecContext *avctx)
3118{
3119    AACContext *ac = avctx->priv_data;
3120    int i, type;
3121
3122    for (i = 0; i < MAX_ELEM_ID; i++) {
3123        for (type = 0; type < 4; type++) {
3124            if (ac->che[type][i])
3125                ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
3126            av_freep(&ac->che[type][i]);
3127        }
3128    }
3129
3130    ff_mdct_end(&ac->mdct);
3131    ff_mdct_end(&ac->mdct_small);
3132    ff_mdct_end(&ac->mdct_ld);
3133    ff_mdct_end(&ac->mdct_ltp);
3134    return 0;
3135}
3136
3137
3138#define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
3139
3140struct LATMContext {
3141    AACContext aac_ctx;     ///< containing AACContext
3142    int initialized;        ///< initialized after a valid extradata was seen
3143
3144    // parser data
3145    int audio_mux_version_A; ///< LATM syntax version
3146    int frame_length_type;   ///< 0/1 variable/fixed frame length
3147    int frame_length;        ///< frame length for fixed frame length
3148};
3149
3150static inline uint32_t latm_get_value(GetBitContext *b)
3151{
3152    int length = get_bits(b, 2);
3153
3154    return get_bits_long(b, (length+1)*8);
3155}
3156
3157static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
3158                                             GetBitContext *gb, int asclen)
3159{
3160    AACContext *ac        = &latmctx->aac_ctx;
3161    AVCodecContext *avctx = ac->avctx;
3162    MPEG4AudioConfig m4ac = { 0 };
3163    int config_start_bit  = get_bits_count(gb);
3164    int sync_extension    = 0;
3165    int bits_consumed, esize;
3166
3167    if (asclen) {
3168        sync_extension = 1;
3169        asclen         = FFMIN(asclen, get_bits_left(gb));
3170    } else
3171        asclen         = get_bits_left(gb);
3172
3173    if (config_start_bit % 8) {
3174        avpriv_request_sample(latmctx->aac_ctx.avctx,
3175                              "Non-byte-aligned audio-specific config");
3176        return AVERROR_PATCHWELCOME;
3177    }
3178    if (asclen <= 0)
3179        return AVERROR_INVALIDDATA;
3180    bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
3181                                         gb->buffer + (config_start_bit / 8),
3182                                         asclen, sync_extension);
3183
3184    if (bits_consumed < 0)
3185        return AVERROR_INVALIDDATA;
3186
3187    if (!latmctx->initialized ||
3188        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
3189        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
3190
3191        if(latmctx->initialized) {
3192            av_log(avctx, AV_LOG_INFO, "audio config changed\n");
3193        } else {
3194            av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
3195        }
3196        latmctx->initialized = 0;
3197
3198        esize = (bits_consumed+7) / 8;
3199
3200        if (avctx->extradata_size < esize) {
3201            av_free(avctx->extradata);
3202            avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
3203            if (!avctx->extradata)
3204                return AVERROR(ENOMEM);
3205        }
3206
3207        avctx->extradata_size = esize;
3208        memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
3209        memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
3210    }
3211    skip_bits_long(gb, bits_consumed);
3212
3213    return bits_consumed;
3214}
3215
3216static int read_stream_mux_config(struct LATMContext *latmctx,
3217                                  GetBitContext *gb)
3218{
3219    int ret, audio_mux_version = get_bits(gb, 1);
3220
3221    latmctx->audio_mux_version_A = 0;
3222    if (audio_mux_version)
3223        latmctx->audio_mux_version_A = get_bits(gb, 1);
3224
3225    if (!latmctx->audio_mux_version_A) {
3226
3227        if (audio_mux_version)
3228            latm_get_value(gb);                 // taraFullness
3229
3230        skip_bits(gb, 1);                       // allStreamSameTimeFraming
3231        skip_bits(gb, 6);                       // numSubFrames
3232        // numPrograms
3233        if (get_bits(gb, 4)) {                  // numPrograms
3234            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
3235            return AVERROR_PATCHWELCOME;
3236        }
3237
3238        // for each program (which there is only one in DVB)
3239
3240        // for each layer (which there is only one in DVB)
3241        if (get_bits(gb, 3)) {                   // numLayer
3242            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
3243            return AVERROR_PATCHWELCOME;
3244        }
3245
3246        // for all but first stream: use_same_config = get_bits(gb, 1);
3247        if (!audio_mux_version) {
3248            if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
3249                return ret;
3250        } else {
3251            int ascLen = latm_get_value(gb);
3252            if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
3253                return ret;
3254            ascLen -= ret;
3255            skip_bits_long(gb, ascLen);
3256        }
3257
3258        latmctx->frame_length_type = get_bits(gb, 3);
3259        switch (latmctx->frame_length_type) {
3260        case 0:
3261            skip_bits(gb, 8);       // latmBufferFullness
3262            break;
3263        case 1:
3264            latmctx->frame_length = get_bits(gb, 9);
3265            break;
3266        case 3:
3267        case 4:
3268        case 5:
3269            skip_bits(gb, 6);       // CELP frame length table index
3270            break;
3271        case 6:
3272        case 7:
3273            skip_bits(gb, 1);       // HVXC frame length table index
3274            break;
3275        }
3276
3277        if (get_bits(gb, 1)) {                  // other data
3278            if (audio_mux_version) {
3279                latm_get_value(gb);             // other_data_bits
3280            } else {
3281                int esc;
3282                do {
3283                    esc = get_bits(gb, 1);
3284                    skip_bits(gb, 8);
3285                } while (esc);
3286            }
3287        }
3288
3289        if (get_bits(gb, 1))                     // crc present
3290            skip_bits(gb, 8);                    // config_crc
3291    }
3292
3293    return 0;
3294}
3295
3296static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
3297{
3298    uint8_t tmp;
3299
3300    if (ctx->frame_length_type == 0) {
3301        int mux_slot_length = 0;
3302        do {
3303            tmp = get_bits(gb, 8);
3304            mux_slot_length += tmp;
3305        } while (tmp == 255);
3306        return mux_slot_length;
3307    } else if (ctx->frame_length_type == 1) {
3308        return ctx->frame_length;
3309    } else if (ctx->frame_length_type == 3 ||
3310               ctx->frame_length_type == 5 ||
3311               ctx->frame_length_type == 7) {
3312        skip_bits(gb, 2);          // mux_slot_length_coded
3313    }
3314    return 0;
3315}
3316
3317static int read_audio_mux_element(struct LATMContext *latmctx,
3318                                  GetBitContext *gb)
3319{
3320    int err;
3321    uint8_t use_same_mux = get_bits(gb, 1);
3322    if (!use_same_mux) {
3323        if ((err = read_stream_mux_config(latmctx, gb)) < 0)
3324            return err;
3325    } else if (!latmctx->aac_ctx.avctx->extradata) {
3326        av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
3327               "no decoder config found\n");
3328        return AVERROR(EAGAIN);
3329    }
3330    if (latmctx->audio_mux_version_A == 0) {
3331        int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
3332        if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
3333            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
3334            return AVERROR_INVALIDDATA;
3335        } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
3336            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
3337                   "frame length mismatch %d << %d\n",
3338                   mux_slot_length_bytes * 8, get_bits_left(gb));
3339            return AVERROR_INVALIDDATA;
3340        }
3341    }
3342    return 0;
3343}
3344
3345
3346static int latm_decode_frame(AVCodecContext *avctx, void *out,
3347                             int *got_frame_ptr, AVPacket *avpkt)
3348{
3349    struct LATMContext *latmctx = avctx->priv_data;
3350    int                 muxlength, err;
3351    GetBitContext       gb;
3352
3353    if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
3354        return err;
3355
3356    // check for LOAS sync word
3357    if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
3358        return AVERROR_INVALIDDATA;
3359
3360    muxlength = get_bits(&gb, 13) + 3;
3361    // not enough data, the parser should have sorted this out
3362    if (muxlength > avpkt->size)
3363        return AVERROR_INVALIDDATA;
3364
3365    if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
3366        return err;
3367
3368    if (!latmctx->initialized) {
3369        if (!avctx->extradata) {
3370            *got_frame_ptr = 0;
3371            return avpkt->size;
3372        } else {
3373            push_output_configuration(&latmctx->aac_ctx);
3374            if ((err = decode_audio_specific_config(
3375                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
3376                    avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
3377                pop_output_configuration(&latmctx->aac_ctx);
3378                return err;
3379            }
3380            latmctx->initialized = 1;
3381        }
3382    }
3383
3384    if (show_bits(&gb, 12) == 0xfff) {
3385        av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
3386               "ADTS header detected, probably as result of configuration "
3387               "misparsing\n");
3388        return AVERROR_INVALIDDATA;
3389    }
3390
3391    if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
3392        return err;
3393
3394    return muxlength;
3395}
3396
3397static av_cold int latm_decode_init(AVCodecContext *avctx)
3398{
3399    struct LATMContext *latmctx = avctx->priv_data;
3400    int ret = aac_decode_init(avctx);
3401
3402    if (avctx->extradata_size > 0)
3403        latmctx->initialized = !ret;
3404
3405    return ret;
3406}
3407
3408static void aacdec_init(AACContext *c)
3409{
3410    c->imdct_and_windowing                      = imdct_and_windowing;
3411    c->apply_ltp                                = apply_ltp;
3412    c->apply_tns                                = apply_tns;
3413    c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
3414    c->update_ltp                               = update_ltp;
3415
3416    if(ARCH_MIPS)
3417        ff_aacdec_init_mips(c);
3418}
3419/**
3420 * AVOptions for Japanese DTV specific extensions (ADTS only)
3421 */
3422#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3423static const AVOption options[] = {
3424    {"dual_mono_mode", "Select the channel to decode for dual mono",
3425     offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
3426     AACDEC_FLAGS, "dual_mono_mode"},
3427
3428    {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3429    {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3430    {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3431    {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3432
3433    {NULL},
3434};
3435
3436static const AVClass aac_decoder_class = {
3437    .class_name = "AAC decoder",
3438    .item_name  = av_default_item_name,
3439    .option     = options,
3440    .version    = LIBAVUTIL_VERSION_INT,
3441};
3442
3443AVCodec ff_aac_decoder = {
3444    .name            = "aac",
3445    .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3446    .type            = AVMEDIA_TYPE_AUDIO,
3447    .id              = AV_CODEC_ID_AAC,
3448    .priv_data_size  = sizeof(AACContext),
3449    .init            = aac_decode_init,
3450    .close           = aac_decode_close,
3451    .decode          = aac_decode_frame,
3452    .sample_fmts     = (const enum AVSampleFormat[]) {
3453        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
3454    },
3455    .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3456    .channel_layouts = aac_channel_layout,
3457    .flush = flush,
3458    .priv_class      = &aac_decoder_class,
3459};
3460
3461/*
3462    Note: This decoder filter is intended to decode LATM streams transferred
3463    in MPEG transport streams which only contain one program.
3464    To do a more complex LATM demuxing a separate LATM demuxer should be used.
3465*/
3466AVCodec ff_aac_latm_decoder = {
3467    .name            = "aac_latm",
3468    .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3469    .type            = AVMEDIA_TYPE_AUDIO,
3470    .id              = AV_CODEC_ID_AAC_LATM,
3471    .priv_data_size  = sizeof(struct LATMContext),
3472    .init            = latm_decode_init,
3473    .close           = aac_decode_close,
3474    .decode          = latm_decode_frame,
3475    .sample_fmts     = (const enum AVSampleFormat[]) {
3476        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
3477    },
3478    .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3479    .channel_layouts = aac_channel_layout,
3480    .flush = flush,
3481};
3482