1/*
2 * Copyright (c) 2012 Andrew D'Addesio
3 * Copyright (c) 2013-2014 Mozilla Corporation
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * Opus CELT decoder
25 */
26
27#include <stdint.h>
28
29#include "libavutil/float_dsp.h"
30
31#include "opus.h"
32#include "opus_imdct.h"
33
34enum CeltSpread {
35    CELT_SPREAD_NONE,
36    CELT_SPREAD_LIGHT,
37    CELT_SPREAD_NORMAL,
38    CELT_SPREAD_AGGRESSIVE
39};
40
41typedef struct CeltFrame {
42    float energy[CELT_MAX_BANDS];
43    float prev_energy[2][CELT_MAX_BANDS];
44
45    uint8_t collapse_masks[CELT_MAX_BANDS];
46
47    /* buffer for mdct output + postfilter */
48    DECLARE_ALIGNED(32, float, buf)[2048];
49
50    /* postfilter parameters */
51    int pf_period_new;
52    float pf_gains_new[3];
53    int pf_period;
54    float pf_gains[3];
55    int pf_period_old;
56    float pf_gains_old[3];
57
58    float deemph_coeff;
59} CeltFrame;
60
61struct CeltContext {
62    // constant values that do not change during context lifetime
63    AVCodecContext    *avctx;
64    CeltIMDCTContext  *imdct[4];
65    AVFloatDSPContext  dsp;
66    int output_channels;
67
68    // values that have inter-frame effect and must be reset on flush
69    CeltFrame frame[2];
70    uint32_t seed;
71    int flushed;
72
73    // values that only affect a single frame
74    int coded_channels;
75    int framebits;
76    int duration;
77
78    /* number of iMDCT blocks in the frame */
79    int blocks;
80    /* size of each block */
81    int blocksize;
82
83    int startband;
84    int endband;
85    int codedbands;
86
87    int anticollapse_bit;
88
89    int intensitystereo;
90    int dualstereo;
91    enum CeltSpread spread;
92
93    int remaining;
94    int remaining2;
95    int fine_bits    [CELT_MAX_BANDS];
96    int fine_priority[CELT_MAX_BANDS];
97    int pulses       [CELT_MAX_BANDS];
98    int tf_change    [CELT_MAX_BANDS];
99
100    DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE];
101    DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(celt_freq_range) * 1<<CELT_MAX_LOG_BLOCKS
102};
103
104static const uint16_t celt_model_tapset[] = { 4, 2, 3, 4 };
105
106static const uint16_t celt_model_spread[] = { 32, 7, 9, 30, 32 };
107
108static const uint16_t celt_model_alloc_trim[] = {
109    128,   2,   4,   9,  19,  41,  87, 109, 119, 124, 126, 128
110};
111
112static const uint16_t celt_model_energy_small[] = { 4, 2, 3, 4 };
113
114static const uint8_t celt_freq_bands[] = { /* in steps of 200Hz */
115    0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
116};
117
118static const uint8_t celt_freq_range[] = {
119    1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  4,  4,  4,  6,  6,  8, 12, 18, 22
120};
121
122static const uint8_t celt_log_freq_range[] = {
123    0,  0,  0,  0,  0,  0,  0,  0,  8,  8,  8,  8, 16, 16, 16, 21, 21, 24, 29, 34, 36
124};
125
126static const int8_t celt_tf_select[4][2][2][2] = {
127    { { { 0, -1 }, { 0, -1 } }, { { 0, -1 }, { 0, -1 } } },
128    { { { 0, -1 }, { 0, -2 } }, { { 1,  0 }, { 1, -1 } } },
129    { { { 0, -2 }, { 0, -3 } }, { { 2,  0 }, { 1, -1 } } },
130    { { { 0, -2 }, { 0, -3 } }, { { 3,  0 }, { 1, -1 } } }
131};
132
133static const float celt_mean_energy[] = {
134    6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
135    4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
136    4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
137    4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f,
138    3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f
139};
140
141static const float celt_alpha_coef[] = {
142    29440.0f/32768.0f,    26112.0f/32768.0f,    21248.0f/32768.0f,    16384.0f/32768.0f
143};
144
145static const float celt_beta_coef[] = { /* TODO: precompute 1 minus this if the code ends up neater */
146    30147.0f/32768.0f,    22282.0f/32768.0f,    12124.0f/32768.0f,     6554.0f/32768.0f
147};
148
149static const uint8_t celt_coarse_energy_dist[4][2][42] = {
150    {
151        {       // 120-sample inter
152             72, 127,  65, 129,  66, 128,  65, 128,  64, 128,  62, 128,  64, 128,
153             64, 128,  92,  78,  92,  79,  92,  78,  90,  79, 116,  41, 115,  40,
154            114,  40, 132,  26, 132,  26, 145,  17, 161,  12, 176,  10, 177,  11
155        }, {    // 120-sample intra
156             24, 179,  48, 138,  54, 135,  54, 132,  53, 134,  56, 133,  55, 132,
157             55, 132,  61, 114,  70,  96,  74,  88,  75,  88,  87,  74,  89,  66,
158             91,  67, 100,  59, 108,  50, 120,  40, 122,  37,  97,  43,  78,  50
159        }
160    }, {
161        {       // 240-sample inter
162             83,  78,  84,  81,  88,  75,  86,  74,  87,  71,  90,  73,  93,  74,
163             93,  74, 109,  40, 114,  36, 117,  34, 117,  34, 143,  17, 145,  18,
164            146,  19, 162,  12, 165,  10, 178,   7, 189,   6, 190,   8, 177,   9
165        }, {    // 240-sample intra
166             23, 178,  54, 115,  63, 102,  66,  98,  69,  99,  74,  89,  71,  91,
167             73,  91,  78,  89,  86,  80,  92,  66,  93,  64, 102,  59, 103,  60,
168            104,  60, 117,  52, 123,  44, 138,  35, 133,  31,  97,  38,  77,  45
169        }
170    }, {
171        {       // 480-sample inter
172             61,  90,  93,  60, 105,  42, 107,  41, 110,  45, 116,  38, 113,  38,
173            112,  38, 124,  26, 132,  27, 136,  19, 140,  20, 155,  14, 159,  16,
174            158,  18, 170,  13, 177,  10, 187,   8, 192,   6, 175,   9, 159,  10
175        }, {    // 480-sample intra
176             21, 178,  59, 110,  71,  86,  75,  85,  84,  83,  91,  66,  88,  73,
177             87,  72,  92,  75,  98,  72, 105,  58, 107,  54, 115,  52, 114,  55,
178            112,  56, 129,  51, 132,  40, 150,  33, 140,  29,  98,  35,  77,  42
179        }
180    }, {
181        {       // 960-sample inter
182             42, 121,  96,  66, 108,  43, 111,  40, 117,  44, 123,  32, 120,  36,
183            119,  33, 127,  33, 134,  34, 139,  21, 147,  23, 152,  20, 158,  25,
184            154,  26, 166,  21, 173,  16, 184,  13, 184,  10, 150,  13, 139,  15
185        }, {    // 960-sample intra
186             22, 178,  63, 114,  74,  82,  84,  83,  92,  82, 103,  62,  96,  72,
187             96,  67, 101,  73, 107,  72, 113,  55, 118,  52, 125,  52, 118,  52,
188            117,  55, 135,  49, 137,  39, 157,  32, 145,  29,  97,  33,  77,  40
189        }
190    }
191};
192
193static const uint8_t celt_static_alloc[11][21] = {  /* 1/32 bit/sample */
194    {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
195    {  90,  80,  75,  69,  63,  56,  49,  40,  34,  29,  20,  18,  10,   0,   0,   0,   0,   0,   0,   0,   0 },
196    { 110, 100,  90,  84,  78,  71,  65,  58,  51,  45,  39,  32,  26,  20,  12,   0,   0,   0,   0,   0,   0 },
197    { 118, 110, 103,  93,  86,  80,  75,  70,  65,  59,  53,  47,  40,  31,  23,  15,   4,   0,   0,   0,   0 },
198    { 126, 119, 112, 104,  95,  89,  83,  78,  72,  66,  60,  54,  47,  39,  32,  25,  17,  12,   1,   0,   0 },
199    { 134, 127, 120, 114, 103,  97,  91,  85,  78,  72,  66,  60,  54,  47,  41,  35,  29,  23,  16,  10,   1 },
200    { 144, 137, 130, 124, 113, 107, 101,  95,  88,  82,  76,  70,  64,  57,  51,  45,  39,  33,  26,  15,   1 },
201    { 152, 145, 138, 132, 123, 117, 111, 105,  98,  92,  86,  80,  74,  67,  61,  55,  49,  43,  36,  20,   1 },
202    { 162, 155, 148, 142, 133, 127, 121, 115, 108, 102,  96,  90,  84,  77,  71,  65,  59,  53,  46,  30,   1 },
203    { 172, 165, 158, 152, 143, 137, 131, 125, 118, 112, 106, 100,  94,  87,  81,  75,  69,  63,  56,  45,  20 },
204    { 200, 200, 200, 200, 200, 200, 200, 200, 198, 193, 188, 183, 178, 173, 168, 163, 158, 153, 148, 129, 104 }
205};
206
207static const uint8_t celt_static_caps[4][2][21] = {
208    {       // 120-sample
209        {224, 224, 224, 224, 224, 224, 224, 224, 160, 160,
210         160, 160, 185, 185, 185, 178, 178, 168, 134,  61,  37},
211        {224, 224, 224, 224, 224, 224, 224, 224, 240, 240,
212         240, 240, 207, 207, 207, 198, 198, 183, 144,  66,  40},
213    }, {    // 240-sample
214        {160, 160, 160, 160, 160, 160, 160, 160, 185, 185,
215         185, 185, 193, 193, 193, 183, 183, 172, 138,  64,  38},
216        {240, 240, 240, 240, 240, 240, 240, 240, 207, 207,
217         207, 207, 204, 204, 204, 193, 193, 180, 143,  66,  40},
218    }, {    // 480-sample
219        {185, 185, 185, 185, 185, 185, 185, 185, 193, 193,
220         193, 193, 193, 193, 193, 183, 183, 172, 138,  65,  39},
221        {207, 207, 207, 207, 207, 207, 207, 207, 204, 204,
222         204, 204, 201, 201, 201, 188, 188, 176, 141,  66,  40},
223    }, {    // 960-sample
224        {193, 193, 193, 193, 193, 193, 193, 193, 193, 193,
225         193, 193, 194, 194, 194, 184, 184, 173, 139,  65,  39},
226        {204, 204, 204, 204, 204, 204, 204, 204, 201, 201,
227         201, 201, 198, 198, 198, 187, 187, 175, 140,  66,  40}
228    }
229};
230
231static const uint8_t celt_cache_bits[392] = {
232    40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
233    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
234    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
235    31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
236    51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
237    66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
238    64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
239    94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
240    124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
241    97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
242    142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
243    28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
244    153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
245    229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
246    166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
247    86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
248    25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
249    185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
250    110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
251    74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
252    163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
253    228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
254    90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
255    87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
256    106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
257    224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
258    182, 234
259};
260
261static const int16_t celt_cache_index[105] = {
262    -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
263    82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
264    41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
265    41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
266    318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
267    305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
268    240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
269};
270
271static const uint8_t celt_log2_frac[] = {
272    0, 8, 13, 16, 19, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 34, 35, 36, 36, 37, 37
273};
274
275static const uint8_t celt_bit_interleave[] = {
276    0, 1, 1, 1, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3
277};
278
279static const uint8_t celt_bit_deinterleave[] = {
280    0x00, 0x03, 0x0C, 0x0F, 0x30, 0x33, 0x3C, 0x3F,
281    0xC0, 0xC3, 0xCC, 0xCF, 0xF0, 0xF3, 0xFC, 0xFF
282};
283
284static const uint8_t celt_hadamard_ordery[] = {
285    1,   0,
286    3,   0,  2,  1,
287    7,   0,  4,  3,  6,  1,  5,  2,
288    15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5
289};
290
291static const uint16_t celt_qn_exp2[] = {
292    16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048
293};
294
295static const uint32_t celt_pvq_u[1272] = {
296    /* N = 0, K = 0...176 */
297    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
301    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
302    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
303    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
304    /* N = 1, K = 1...176 */
305    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312    /* N = 2, K = 2...176 */
313    3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
314    43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
315    81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
316    115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
317    145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
318    175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
319    205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
320    235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
321    265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
322    295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
323    325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
324    /* N = 3, K = 3...176 */
325    13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
326    685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
327    1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
328    3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
329    6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
330    9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
331    13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
332    17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
333    21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
334    26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
335    31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
336    37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
337    43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
338    50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
339    57461, 58141, 58825, 59513, 60205, 60901, 61601,
340    /* N = 4, K = 4...176 */
341    63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
342    7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
343    30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
344    82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
345    161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
346    267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
347    410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
348    597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
349    833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
350    1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
351    1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
352    1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
353    2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
354    2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
355    3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
356    3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
357    4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
358    5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
359    6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
360    7085049, 7207551,
361    /* N = 5, K = 5...176 */
362    321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
363    50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
364    330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
365    1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
366    2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
367    4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
368    8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
369    13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
370    20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
371    29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
372    40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
373    55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
374    73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
375    95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
376    122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
377    155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
378    193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
379    238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
380    290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
381    351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
382    420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
383    500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
384    590359041, 604167209, 618216201, 632508801,
385    /* N = 6, K = 6...96 (technically V(109,5) fits in 32 bits, but that can't be
386     achieved by splitting an Opus band) */
387    1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
388    335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
389    2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
390    11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
391    29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
392    64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
393    128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
394    235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
395    402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
396    655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
397    1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
398    1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
399    2011371957, 2120032959,
400    /* N = 7, K = 7...54 (technically V(60,6) fits in 32 bits, but that can't be
401     achieved by splitting an Opus band) */
402    8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
403    1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
404    19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
405    88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
406    292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
407    793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
408    1667010073, 1870535785, 2094367717,
409    /* N = 8, K = 8...37 (technically V(40,7) fits in 32 bits, but that can't be
410     achieved by splitting an Opus band) */
411    48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
412    9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
413    104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
414    638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
415    2229491905,
416    /* N = 9, K = 9...28 (technically V(29,8) fits in 32 bits, but that can't be
417     achieved by splitting an Opus band) */
418    265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
419    39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
420    628496897, 872893441, 1196924561, 1621925137, 2173806145,
421    /* N = 10, K = 10...24 */
422    1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
423    254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629,
424    3375210671,
425    /* N = 11, K = 11...19 (technically V(20,10) fits in 32 bits, but that can't be
426     achieved by splitting an Opus band) */
427    8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
428    948062325, 1616336765,
429    /* N = 12, K = 12...18 */
430    45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
431    3248227095,
432    /* N = 13, K = 13...16 */
433    251595969, 579168825, 1267854873, 2653649025,
434    /* N = 14, K = 14 */
435    1409933619
436};
437
438DECLARE_ALIGNED(32, static const float, celt_window)[120] = {
439    6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
440    0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
441    0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
442    0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
443    0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
444    0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
445    0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
446    0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
447    0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
448    0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
449    0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
450    0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
451    0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
452    0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
453    0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
454    0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
455    0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
456    0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
457    0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
458    0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
459    0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
460    0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
461    0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
462    0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
463};
464
465/* square of the window, used for the postfilter */
466const float ff_celt_window2[120] = {
467    4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f,
468    0.000128686f, 0.000227727f, 0.000374999f, 0.000583881f, 0.000869266f, 0.0012475f,
469    0.0017363f, 0.00235471f, 0.00312299f, 0.00406253f, 0.00519576f, 0.00654601f,
470    0.00813743f, 0.00999482f, 0.0121435f, 0.0146093f, 0.017418f, 0.0205957f, 0.0241684f,
471    0.0281615f, 0.0326003f, 0.0375092f, 0.0429118f, 0.0488308f, 0.0552873f, 0.0623012f,
472    0.0698908f, 0.0780723f, 0.0868601f, 0.0962664f, 0.106301f, 0.11697f, 0.12828f,
473    0.140231f, 0.152822f, 0.166049f, 0.179905f, 0.194379f, 0.209457f, 0.225123f, 0.241356f,
474    0.258133f, 0.275428f, 0.293212f, 0.311453f, 0.330116f, 0.349163f, 0.368556f, 0.388253f,
475    0.40821f, 0.428382f, 0.448723f, 0.469185f, 0.48972f, 0.51028f, 0.530815f, 0.551277f,
476    0.571618f, 0.59179f, 0.611747f, 0.631444f, 0.650837f, 0.669884f, 0.688547f, 0.706788f,
477    0.724572f, 0.741867f, 0.758644f, 0.774877f, 0.790543f, 0.805621f, 0.820095f, 0.833951f,
478    0.847178f, 0.859769f, 0.87172f, 0.88303f, 0.893699f, 0.903734f, 0.91314f, 0.921928f,
479    0.930109f, 0.937699f, 0.944713f, 0.951169f, 0.957088f, 0.962491f, 0.9674f, 0.971838f,
480    0.975832f, 0.979404f, 0.982582f, 0.985391f, 0.987857f, 0.990005f, 0.991863f, 0.993454f,
481    0.994804f, 0.995937f, 0.996877f, 0.997645f, 0.998264f, 0.998753f, 0.999131f, 0.999416f,
482    0.999625f, 0.999772f, 0.999871f, 0.999934f, 0.99997f, 0.999989f, 0.999997f, 0.99999964f, 1.0f,
483};
484
485static const uint32_t * const celt_pvq_u_row[15] = {
486    celt_pvq_u +    0, celt_pvq_u +  176, celt_pvq_u +  351,
487    celt_pvq_u +  525, celt_pvq_u +  698, celt_pvq_u +  870,
488    celt_pvq_u + 1041, celt_pvq_u + 1131, celt_pvq_u + 1178,
489    celt_pvq_u + 1207, celt_pvq_u + 1226, celt_pvq_u + 1240,
490    celt_pvq_u + 1248, celt_pvq_u + 1254, celt_pvq_u + 1257
491};
492
493static inline int16_t celt_cos(int16_t x)
494{
495    x = (MUL16(x, x) + 4096) >> 13;
496    x = (32767-x) + ROUND_MUL16(x, (-7651 + ROUND_MUL16(x, (8277 + ROUND_MUL16(-626, x)))));
497    return 1+x;
498}
499
500static inline int celt_log2tan(int isin, int icos)
501{
502    int lc, ls;
503    lc = opus_ilog(icos);
504    ls = opus_ilog(isin);
505    icos <<= 15 - lc;
506    isin <<= 15 - ls;
507    return (ls << 11) - (lc << 11) +
508           ROUND_MUL16(isin, ROUND_MUL16(isin, -2597) + 7932) -
509           ROUND_MUL16(icos, ROUND_MUL16(icos, -2597) + 7932);
510}
511
512static inline uint32_t celt_rng(CeltContext *s)
513{
514    s->seed = 1664525 * s->seed + 1013904223;
515    return s->seed;
516}
517
518static void celt_decode_coarse_energy(CeltContext *s, OpusRangeCoder *rc)
519{
520    int i, j;
521    float prev[2] = {0};
522    float alpha, beta;
523    const uint8_t *model;
524
525    /* use the 2D z-transform to apply prediction in both */
526    /* the time domain (alpha) and the frequency domain (beta) */
527
528    if (opus_rc_tell(rc)+3 <= s->framebits && opus_rc_p2model(rc, 3)) {
529        /* intra frame */
530        alpha = 0;
531        beta  = 1.0f - 4915.0f/32768.0f;
532        model = celt_coarse_energy_dist[s->duration][1];
533    } else {
534        alpha = celt_alpha_coef[s->duration];
535        beta  = 1.0f - celt_beta_coef[s->duration];
536        model = celt_coarse_energy_dist[s->duration][0];
537    }
538
539    for (i = 0; i < CELT_MAX_BANDS; i++) {
540        for (j = 0; j < s->coded_channels; j++) {
541            CeltFrame *frame = &s->frame[j];
542            float value;
543            int available;
544
545            if (i < s->startband || i >= s->endband) {
546                frame->energy[i] = 0.0;
547                continue;
548            }
549
550            available = s->framebits - opus_rc_tell(rc);
551            if (available >= 15) {
552                /* decode using a Laplace distribution */
553                int k = FFMIN(i, 20) << 1;
554                value = opus_rc_laplace(rc, model[k] << 7, model[k+1] << 6);
555            } else if (available >= 2) {
556                int x = opus_rc_getsymbol(rc, celt_model_energy_small);
557                value = (x>>1) ^ -(x&1);
558            } else if (available >= 1) {
559                value = -(float)opus_rc_p2model(rc, 1);
560            } else value = -1;
561
562            frame->energy[i] = FFMAX(-9.0f, frame->energy[i]) * alpha + prev[j] + value;
563            prev[j] += beta * value;
564        }
565    }
566}
567
568static void celt_decode_fine_energy(CeltContext *s, OpusRangeCoder *rc)
569{
570    int i;
571    for (i = s->startband; i < s->endband; i++) {
572        int j;
573        if (!s->fine_bits[i])
574            continue;
575
576        for (j = 0; j < s->coded_channels; j++) {
577            CeltFrame *frame = &s->frame[j];
578            int q2;
579            float offset;
580            q2 = opus_getrawbits(rc, s->fine_bits[i]);
581            offset = (q2 + 0.5f) * (1 << (14 - s->fine_bits[i])) / 16384.0f - 0.5f;
582            frame->energy[i] += offset;
583        }
584    }
585}
586
587static void celt_decode_final_energy(CeltContext *s, OpusRangeCoder *rc,
588                                     int bits_left)
589{
590    int priority, i, j;
591
592    for (priority = 0; priority < 2; priority++) {
593        for (i = s->startband; i < s->endband && bits_left >= s->coded_channels; i++) {
594            if (s->fine_priority[i] != priority || s->fine_bits[i] >= CELT_MAX_FINE_BITS)
595                continue;
596
597            for (j = 0; j < s->coded_channels; j++) {
598                int q2;
599                float offset;
600                q2 = opus_getrawbits(rc, 1);
601                offset = (q2 - 0.5f) * (1 << (14 - s->fine_bits[i] - 1)) / 16384.0f;
602                s->frame[j].energy[i] += offset;
603                bits_left--;
604            }
605        }
606    }
607}
608
609static void celt_decode_tf_changes(CeltContext *s, OpusRangeCoder *rc,
610                                   int transient)
611{
612    int i, diff = 0, tf_select = 0, tf_changed = 0, tf_select_bit;
613    int consumed, bits = transient ? 2 : 4;
614
615    consumed = opus_rc_tell(rc);
616    tf_select_bit = (s->duration != 0 && consumed+bits+1 <= s->framebits);
617
618    for (i = s->startband; i < s->endband; i++) {
619        if (consumed+bits+tf_select_bit <= s->framebits) {
620            diff ^= opus_rc_p2model(rc, bits);
621            consumed = opus_rc_tell(rc);
622            tf_changed |= diff;
623        }
624        s->tf_change[i] = diff;
625        bits = transient ? 4 : 5;
626    }
627
628    if (tf_select_bit && celt_tf_select[s->duration][transient][0][tf_changed] !=
629                         celt_tf_select[s->duration][transient][1][tf_changed])
630        tf_select = opus_rc_p2model(rc, 1);
631
632    for (i = s->startband; i < s->endband; i++) {
633        s->tf_change[i] = celt_tf_select[s->duration][transient][tf_select][s->tf_change[i]];
634    }
635}
636
637static void celt_decode_allocation(CeltContext *s, OpusRangeCoder *rc)
638{
639    // approx. maximum bit allocation for each band before boost/trim
640    int cap[CELT_MAX_BANDS];
641    int boost[CELT_MAX_BANDS];
642    int threshold[CELT_MAX_BANDS];
643    int bits1[CELT_MAX_BANDS];
644    int bits2[CELT_MAX_BANDS];
645    int trim_offset[CELT_MAX_BANDS];
646
647    int skip_startband = s->startband;
648    int dynalloc       = 6;
649    int alloctrim      = 5;
650    int extrabits      = 0;
651
652    int skip_bit            = 0;
653    int intensitystereo_bit = 0;
654    int dualstereo_bit      = 0;
655
656    int remaining, bandbits;
657    int low, high, total, done;
658    int totalbits;
659    int consumed;
660    int i, j;
661
662    consumed = opus_rc_tell(rc);
663
664    /* obtain spread flag */
665    s->spread = CELT_SPREAD_NORMAL;
666    if (consumed + 4 <= s->framebits)
667        s->spread = opus_rc_getsymbol(rc, celt_model_spread);
668
669    /* generate static allocation caps */
670    for (i = 0; i < CELT_MAX_BANDS; i++) {
671        cap[i] = (celt_static_caps[s->duration][s->coded_channels - 1][i] + 64)
672                 * celt_freq_range[i] << (s->coded_channels - 1) << s->duration >> 2;
673    }
674
675    /* obtain band boost */
676    totalbits = s->framebits << 3; // convert to 1/8 bits
677    consumed = opus_rc_tell_frac(rc);
678    for (i = s->startband; i < s->endband; i++) {
679        int quanta, band_dynalloc;
680
681        boost[i] = 0;
682
683        quanta = celt_freq_range[i] << (s->coded_channels - 1) << s->duration;
684        quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
685        band_dynalloc = dynalloc;
686        while (consumed + (band_dynalloc<<3) < totalbits && boost[i] < cap[i]) {
687            int add = opus_rc_p2model(rc, band_dynalloc);
688            consumed = opus_rc_tell_frac(rc);
689            if (!add)
690                break;
691
692            boost[i]     += quanta;
693            totalbits    -= quanta;
694            band_dynalloc = 1;
695        }
696        /* dynalloc is more likely to occur if it's already been used for earlier bands */
697        if (boost[i])
698            dynalloc = FFMAX(2, dynalloc - 1);
699    }
700
701    /* obtain allocation trim */
702    if (consumed + (6 << 3) <= totalbits)
703        alloctrim = opus_rc_getsymbol(rc, celt_model_alloc_trim);
704
705    /* anti-collapse bit reservation */
706    totalbits = (s->framebits << 3) - opus_rc_tell_frac(rc) - 1;
707    s->anticollapse_bit = 0;
708    if (s->blocks > 1 && s->duration >= 2 &&
709        totalbits >= ((s->duration + 2) << 3))
710        s->anticollapse_bit = 1 << 3;
711    totalbits -= s->anticollapse_bit;
712
713    /* band skip bit reservation */
714    if (totalbits >= 1 << 3)
715        skip_bit = 1 << 3;
716    totalbits -= skip_bit;
717
718    /* intensity/dual stereo bit reservation */
719    if (s->coded_channels == 2) {
720        intensitystereo_bit = celt_log2_frac[s->endband - s->startband];
721        if (intensitystereo_bit <= totalbits) {
722            totalbits -= intensitystereo_bit;
723            if (totalbits >= 1 << 3) {
724                dualstereo_bit = 1 << 3;
725                totalbits -= 1 << 3;
726            }
727        } else
728            intensitystereo_bit = 0;
729    }
730
731    for (i = s->startband; i < s->endband; i++) {
732        int trim     = alloctrim - 5 - s->duration;
733        int band     = celt_freq_range[i] * (s->endband - i - 1);
734        int duration = s->duration + 3;
735        int scale    = duration + s->coded_channels - 1;
736
737        /* PVQ minimum allocation threshold, below this value the band is
738         * skipped */
739        threshold[i] = FFMAX(3 * celt_freq_range[i] << duration >> 4,
740                             s->coded_channels << 3);
741
742        trim_offset[i] = trim * (band << scale) >> 6;
743
744        if (celt_freq_range[i] << s->duration == 1)
745            trim_offset[i] -= s->coded_channels << 3;
746    }
747
748    /* bisection */
749    low  = 1;
750    high = CELT_VECTORS - 1;
751    while (low <= high) {
752        int center = (low + high) >> 1;
753        done = total = 0;
754
755        for (i = s->endband - 1; i >= s->startband; i--) {
756            bandbits = celt_freq_range[i] * celt_static_alloc[center][i]
757                       << (s->coded_channels - 1) << s->duration >> 2;
758
759            if (bandbits)
760                bandbits = FFMAX(0, bandbits + trim_offset[i]);
761            bandbits += boost[i];
762
763            if (bandbits >= threshold[i] || done) {
764                done = 1;
765                total += FFMIN(bandbits, cap[i]);
766            } else if (bandbits >= s->coded_channels << 3)
767                total += s->coded_channels << 3;
768        }
769
770        if (total > totalbits)
771            high = center - 1;
772        else
773            low = center + 1;
774    }
775    high = low--;
776
777    for (i = s->startband; i < s->endband; i++) {
778        bits1[i] = celt_freq_range[i] * celt_static_alloc[low][i]
779                   << (s->coded_channels - 1) << s->duration >> 2;
780        bits2[i] = high >= CELT_VECTORS ? cap[i] :
781                   celt_freq_range[i] * celt_static_alloc[high][i]
782                   << (s->coded_channels - 1) << s->duration >> 2;
783
784        if (bits1[i])
785            bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
786        if (bits2[i])
787            bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
788        if (low)
789            bits1[i] += boost[i];
790        bits2[i] += boost[i];
791
792        if (boost[i])
793            skip_startband = i;
794        bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
795    }
796
797    /* bisection */
798    low  = 0;
799    high = 1 << CELT_ALLOC_STEPS;
800    for (i = 0; i < CELT_ALLOC_STEPS; i++) {
801        int center = (low + high) >> 1;
802        done = total = 0;
803
804        for (j = s->endband - 1; j >= s->startband; j--) {
805            bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
806
807            if (bandbits >= threshold[j] || done) {
808                done = 1;
809                total += FFMIN(bandbits, cap[j]);
810            } else if (bandbits >= s->coded_channels << 3)
811                total += s->coded_channels << 3;
812        }
813        if (total > totalbits)
814            high = center;
815        else
816            low = center;
817    }
818
819    done = total = 0;
820    for (i = s->endband - 1; i >= s->startband; i--) {
821        bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
822
823        if (bandbits >= threshold[i] || done)
824            done = 1;
825        else
826            bandbits = (bandbits >= s->coded_channels << 3) ?
827                       s->coded_channels << 3 : 0;
828
829        bandbits     = FFMIN(bandbits, cap[i]);
830        s->pulses[i] = bandbits;
831        total      += bandbits;
832    }
833
834    /* band skipping */
835    for (s->codedbands = s->endband; ; s->codedbands--) {
836        int allocation;
837        j = s->codedbands - 1;
838
839        if (j == skip_startband) {
840            /* all remaining bands are not skipped */
841            totalbits += skip_bit;
842            break;
843        }
844
845        /* determine the number of bits available for coding "do not skip" markers */
846        remaining   = totalbits - total;
847        bandbits    = remaining / (celt_freq_bands[j+1] - celt_freq_bands[s->startband]);
848        remaining  -= bandbits  * (celt_freq_bands[j+1] - celt_freq_bands[s->startband]);
849        allocation  = s->pulses[j] + bandbits * celt_freq_range[j]
850                      + FFMAX(0, remaining - (celt_freq_bands[j] - celt_freq_bands[s->startband]));
851
852        /* a "do not skip" marker is only coded if the allocation is
853           above the chosen threshold */
854        if (allocation >= FFMAX(threshold[j], (s->coded_channels + 1) <<3 )) {
855            if (opus_rc_p2model(rc, 1))
856                break;
857
858            total      += 1 << 3;
859            allocation -= 1 << 3;
860        }
861
862        /* the band is skipped, so reclaim its bits */
863        total -= s->pulses[j];
864        if (intensitystereo_bit) {
865            total -= intensitystereo_bit;
866            intensitystereo_bit = celt_log2_frac[j - s->startband];
867            total += intensitystereo_bit;
868        }
869
870        total += s->pulses[j] = (allocation >= s->coded_channels << 3) ?
871                              s->coded_channels << 3 : 0;
872    }
873
874    /* obtain stereo flags */
875    s->intensitystereo = 0;
876    s->dualstereo      = 0;
877    if (intensitystereo_bit)
878        s->intensitystereo = s->startband +
879                          opus_rc_unimodel(rc, s->codedbands + 1 - s->startband);
880    if (s->intensitystereo <= s->startband)
881        totalbits += dualstereo_bit; /* no intensity stereo means no dual stereo */
882    else if (dualstereo_bit)
883        s->dualstereo = opus_rc_p2model(rc, 1);
884
885    /* supply the remaining bits in this frame to lower bands */
886    remaining = totalbits - total;
887    bandbits  = remaining / (celt_freq_bands[s->codedbands] - celt_freq_bands[s->startband]);
888    remaining -= bandbits * (celt_freq_bands[s->codedbands] - celt_freq_bands[s->startband]);
889    for (i = s->startband; i < s->codedbands; i++) {
890        int bits = FFMIN(remaining, celt_freq_range[i]);
891
892        s->pulses[i] += bits + bandbits * celt_freq_range[i];
893        remaining    -= bits;
894    }
895
896    for (i = s->startband; i < s->codedbands; i++) {
897        int N = celt_freq_range[i] << s->duration;
898        int prev_extra = extrabits;
899        s->pulses[i] += extrabits;
900
901        if (N > 1) {
902            int dof;        // degrees of freedom
903            int temp;       // dof * channels * log(dof)
904            int offset;     // fine energy quantization offset, i.e.
905                            // extra bits assigned over the standard
906                            // totalbits/dof
907            int fine_bits, max_bits;
908
909            extrabits = FFMAX(0, s->pulses[i] - cap[i]);
910            s->pulses[i] -= extrabits;
911
912            /* intensity stereo makes use of an extra degree of freedom */
913            dof = N * s->coded_channels
914                  + (s->coded_channels == 2 && N > 2 && !s->dualstereo && i < s->intensitystereo);
915            temp = dof * (celt_log_freq_range[i] + (s->duration<<3));
916            offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
917            if (N == 2) /* dof=2 is the only case that doesn't fit the model */
918                offset += dof<<1;
919
920            /* grant an additional bias for the first and second pulses */
921            if (s->pulses[i] + offset < 2 * (dof << 3))
922                offset += temp >> 2;
923            else if (s->pulses[i] + offset < 3 * (dof << 3))
924                offset += temp >> 3;
925
926            fine_bits = (s->pulses[i] + offset + (dof << 2)) / (dof << 3);
927            max_bits  = FFMIN((s->pulses[i]>>3) >> (s->coded_channels - 1),
928                              CELT_MAX_FINE_BITS);
929
930            max_bits  = FFMAX(max_bits, 0);
931
932            s->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
933
934            /* if fine_bits was rounded down or capped,
935               give priority for the final fine energy pass */
936            s->fine_priority[i] = (s->fine_bits[i] * (dof<<3) >= s->pulses[i] + offset);
937
938            /* the remaining bits are assigned to PVQ */
939            s->pulses[i] -= s->fine_bits[i] << (s->coded_channels - 1) << 3;
940        } else {
941            /* all bits go to fine energy except for the sign bit */
942            extrabits = FFMAX(0, s->pulses[i] - (s->coded_channels << 3));
943            s->pulses[i] -= extrabits;
944            s->fine_bits[i] = 0;
945            s->fine_priority[i] = 1;
946        }
947
948        /* hand back a limited number of extra fine energy bits to this band */
949        if (extrabits > 0) {
950            int fineextra = FFMIN(extrabits >> (s->coded_channels + 2),
951                                  CELT_MAX_FINE_BITS - s->fine_bits[i]);
952            s->fine_bits[i] += fineextra;
953
954            fineextra <<= s->coded_channels + 2;
955            s->fine_priority[i] = (fineextra >= extrabits - prev_extra);
956            extrabits -= fineextra;
957        }
958    }
959    s->remaining = extrabits;
960
961    /* skipped bands dedicate all of their bits for fine energy */
962    for (; i < s->endband; i++) {
963        s->fine_bits[i]     = s->pulses[i] >> (s->coded_channels - 1) >> 3;
964        s->pulses[i]        = 0;
965        s->fine_priority[i] = s->fine_bits[i] < 1;
966    }
967}
968
969static inline int celt_bits2pulses(const uint8_t *cache, int bits)
970{
971    // TODO: Find the size of cache and make it into an array in the parameters list
972    int i, low = 0, high;
973
974    high = cache[0];
975    bits--;
976
977    for (i = 0; i < 6; i++) {
978        int center = (low + high + 1) >> 1;
979        if (cache[center] >= bits)
980            high = center;
981        else
982            low = center;
983    }
984
985    return (bits - (low == 0 ? -1 : cache[low]) <= cache[high] - bits) ? low : high;
986}
987
988static inline int celt_pulses2bits(const uint8_t *cache, int pulses)
989{
990    // TODO: Find the size of cache and make it into an array in the parameters list
991   return (pulses == 0) ? 0 : cache[pulses] + 1;
992}
993
994static inline void celt_normalize_residual(const int * av_restrict iy, float * av_restrict X,
995                                           int N, float g)
996{
997    int i;
998    for (i = 0; i < N; i++)
999        X[i] = g * iy[i];
1000}
1001
1002static void celt_exp_rotation1(float *X, unsigned int len, unsigned int stride,
1003                               float c, float s)
1004{
1005    float *Xptr;
1006    int i;
1007
1008    Xptr = X;
1009    for (i = 0; i < len - stride; i++) {
1010        float x1, x2;
1011        x1           = Xptr[0];
1012        x2           = Xptr[stride];
1013        Xptr[stride] = c * x2 + s * x1;
1014        *Xptr++      = c * x1 - s * x2;
1015    }
1016
1017    Xptr = &X[len - 2 * stride - 1];
1018    for (i = len - 2 * stride - 1; i >= 0; i--) {
1019        float x1, x2;
1020        x1           = Xptr[0];
1021        x2           = Xptr[stride];
1022        Xptr[stride] = c * x2 + s * x1;
1023        *Xptr--      = c * x1 - s * x2;
1024    }
1025}
1026
1027static inline void celt_exp_rotation(float *X, unsigned int len,
1028                                     unsigned int stride, unsigned int K,
1029                                     enum CeltSpread spread)
1030{
1031    unsigned int stride2 = 0;
1032    float c, s;
1033    float gain, theta;
1034    int i;
1035
1036    if (2*K >= len || spread == CELT_SPREAD_NONE)
1037        return;
1038
1039    gain = (float)len / (len + (20 - 5*spread) * K);
1040    theta = M_PI * gain * gain / 4;
1041
1042    c = cos(theta);
1043    s = sin(theta);
1044
1045    if (len >= stride << 3) {
1046        stride2 = 1;
1047        /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
1048        It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
1049        while ((stride2 * stride2 + stride2) * stride + (stride >> 2) < len)
1050            stride2++;
1051    }
1052
1053    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
1054    extract_collapse_mask().*/
1055    len /= stride;
1056    for (i = 0; i < stride; i++) {
1057        if (stride2)
1058            celt_exp_rotation1(X + i * len, len, stride2, s, c);
1059        celt_exp_rotation1(X + i * len, len, 1, c, s);
1060    }
1061}
1062
1063static inline unsigned int celt_extract_collapse_mask(const int *iy,
1064                                                      unsigned int N,
1065                                                      unsigned int B)
1066{
1067    unsigned int collapse_mask;
1068    int N0;
1069    int i, j;
1070
1071    if (B <= 1)
1072        return 1;
1073
1074    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
1075    exp_rotation().*/
1076    N0 = N/B;
1077    collapse_mask = 0;
1078    for (i = 0; i < B; i++)
1079        for (j = 0; j < N0; j++)
1080            collapse_mask |= (iy[i*N0+j]!=0)<<i;
1081    return collapse_mask;
1082}
1083
1084static inline void celt_renormalize_vector(float *X, int N, float gain)
1085{
1086    int i;
1087    float g = 1e-15f;
1088    for (i = 0; i < N; i++)
1089        g += X[i] * X[i];
1090    g = gain / sqrtf(g);
1091
1092    for (i = 0; i < N; i++)
1093        X[i] *= g;
1094}
1095
1096static inline void celt_stereo_merge(float *X, float *Y, float mid, int N)
1097{
1098    int i;
1099    float xp = 0, side = 0;
1100    float E[2];
1101    float mid2;
1102    float t, gain[2];
1103
1104    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
1105    for (i = 0; i < N; i++) {
1106        xp   += X[i] * Y[i];
1107        side += Y[i] * Y[i];
1108    }
1109
1110    /* Compensating for the mid normalization */
1111    xp *= mid;
1112    mid2 = mid;
1113    E[0] = mid2 * mid2 + side - 2 * xp;
1114    E[1] = mid2 * mid2 + side + 2 * xp;
1115    if (E[0] < 6e-4f || E[1] < 6e-4f) {
1116        for (i = 0; i < N; i++)
1117            Y[i] = X[i];
1118        return;
1119    }
1120
1121    t = E[0];
1122    gain[0] = 1.0f / sqrtf(t);
1123    t = E[1];
1124    gain[1] = 1.0f / sqrtf(t);
1125
1126    for (i = 0; i < N; i++) {
1127        float value[2];
1128        /* Apply mid scaling (side is already scaled) */
1129        value[0] = mid * X[i];
1130        value[1] = Y[i];
1131        X[i] = gain[0] * (value[0] - value[1]);
1132        Y[i] = gain[1] * (value[0] + value[1]);
1133    }
1134}
1135
1136static void celt_interleave_hadamard(float *tmp, float *X, int N0,
1137                                     int stride, int hadamard)
1138{
1139    int i, j;
1140    int N = N0*stride;
1141
1142    if (hadamard) {
1143        const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
1144        for (i = 0; i < stride; i++)
1145            for (j = 0; j < N0; j++)
1146                tmp[j*stride+i] = X[ordery[i]*N0+j];
1147    } else {
1148        for (i = 0; i < stride; i++)
1149            for (j = 0; j < N0; j++)
1150                tmp[j*stride+i] = X[i*N0+j];
1151    }
1152
1153    for (i = 0; i < N; i++)
1154        X[i] = tmp[i];
1155}
1156
1157static void celt_deinterleave_hadamard(float *tmp, float *X, int N0,
1158                                       int stride, int hadamard)
1159{
1160    int i, j;
1161    int N = N0*stride;
1162
1163    if (hadamard) {
1164        const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
1165        for (i = 0; i < stride; i++)
1166            for (j = 0; j < N0; j++)
1167                tmp[ordery[i]*N0+j] = X[j*stride+i];
1168    } else {
1169        for (i = 0; i < stride; i++)
1170            for (j = 0; j < N0; j++)
1171                tmp[i*N0+j] = X[j*stride+i];
1172    }
1173
1174    for (i = 0; i < N; i++)
1175        X[i] = tmp[i];
1176}
1177
1178static void celt_haar1(float *X, int N0, int stride)
1179{
1180    int i, j;
1181    N0 >>= 1;
1182    for (i = 0; i < stride; i++) {
1183        for (j = 0; j < N0; j++) {
1184            float x0 = X[stride * (2 * j + 0) + i];
1185            float x1 = X[stride * (2 * j + 1) + i];
1186            X[stride * (2 * j + 0) + i] = (x0 + x1) * M_SQRT1_2;
1187            X[stride * (2 * j + 1) + i] = (x0 - x1) * M_SQRT1_2;
1188        }
1189    }
1190}
1191
1192static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap,
1193                                  int dualstereo)
1194{
1195    int qn, qb;
1196    int N2 = 2 * N - 1;
1197    if (dualstereo && N == 2)
1198        N2--;
1199
1200    /* The upper limit ensures that in a stereo split with itheta==16384, we'll
1201     * always have enough bits left over to code at least one pulse in the
1202     * side; otherwise it would collapse, since it doesn't get folded. */
1203    qb = FFMIN3(b - pulse_cap - (4 << 3), (b + N2 * offset) / N2, 8 << 3);
1204    qn = (qb < (1 << 3 >> 1)) ? 1 : ((celt_qn_exp2[qb & 0x7] >> (14 - (qb >> 3))) + 1) >> 1 << 1;
1205    return qn;
1206}
1207
1208// this code was adapted from libopus
1209static inline uint64_t celt_cwrsi(unsigned int N, unsigned int K, unsigned int i, int *y)
1210{
1211    uint64_t norm = 0;
1212    uint32_t p;
1213    int s, val;
1214    int k0;
1215
1216    while (N > 2) {
1217        uint32_t q;
1218
1219        /*Lots of pulses case:*/
1220        if (K >= N) {
1221            const uint32_t *row = celt_pvq_u_row[N];
1222
1223            /* Are the pulses in this dimension negative? */
1224            p  = row[K + 1];
1225            s  = -(i >= p);
1226            i -= p & s;
1227
1228            /*Count how many pulses were placed in this dimension.*/
1229            k0 = K;
1230            q = row[N];
1231            if (q > i) {
1232                K = N;
1233                do {
1234                    p = celt_pvq_u_row[--K][N];
1235                } while (p > i);
1236            } else
1237                for (p = row[K]; p > i; p = row[K])
1238                    K--;
1239
1240            i    -= p;
1241            val   = (k0 - K + s) ^ s;
1242            norm += val * val;
1243            *y++  = val;
1244        } else { /*Lots of dimensions case:*/
1245            /*Are there any pulses in this dimension at all?*/
1246            p = celt_pvq_u_row[K    ][N];
1247            q = celt_pvq_u_row[K + 1][N];
1248
1249            if (p <= i && i < q) {
1250                i -= p;
1251                *y++ = 0;
1252            } else {
1253                /*Are the pulses in this dimension negative?*/
1254                s  = -(i >= q);
1255                i -= q & s;
1256
1257                /*Count how many pulses were placed in this dimension.*/
1258                k0 = K;
1259                do p = celt_pvq_u_row[--K][N];
1260                while (p > i);
1261
1262                i    -= p;
1263                val   = (k0 - K + s) ^ s;
1264                norm += val * val;
1265                *y++  = val;
1266            }
1267        }
1268        N--;
1269    }
1270
1271    /* N == 2 */
1272    p  = 2 * K + 1;
1273    s  = -(i >= p);
1274    i -= p & s;
1275    k0 = K;
1276    K  = (i + 1) / 2;
1277
1278    if (K)
1279        i -= 2 * K - 1;
1280
1281    val   = (k0 - K + s) ^ s;
1282    norm += val * val;
1283    *y++  = val;
1284
1285    /* N==1 */
1286    s     = -i;
1287    val   = (K + s) ^ s;
1288    norm += val * val;
1289    *y    = val;
1290
1291    return norm;
1292}
1293
1294static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, unsigned int N, unsigned int K)
1295{
1296    unsigned int idx;
1297#define CELT_PVQ_U(n, k) (celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)])
1298#define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, (k) + 1))
1299    idx = opus_rc_unimodel(rc, CELT_PVQ_V(N, K));
1300    return celt_cwrsi(N, K, idx, y);
1301}
1302
1303/** Decode pulse vector and combine the result with the pitch vector to produce
1304    the final normalised signal in the current band. */
1305static inline unsigned int celt_alg_unquant(OpusRangeCoder *rc, float *X,
1306                                            unsigned int N, unsigned int K,
1307                                            enum CeltSpread spread,
1308                                            unsigned int blocks, float gain)
1309{
1310    int y[176];
1311
1312    gain /= sqrtf(celt_decode_pulses(rc, y, N, K));
1313    celt_normalize_residual(y, X, N, gain);
1314    celt_exp_rotation(X, N, blocks, K, spread);
1315    return celt_extract_collapse_mask(y, N, blocks);
1316}
1317
1318static unsigned int celt_decode_band(CeltContext *s, OpusRangeCoder *rc,
1319                                     const int band, float *X, float *Y,
1320                                     int N, int b, unsigned int blocks,
1321                                     float *lowband, int duration,
1322                                     float *lowband_out, int level,
1323                                     float gain, float *lowband_scratch,
1324                                     int fill)
1325{
1326    const uint8_t *cache;
1327    int dualstereo, split;
1328    int imid = 0, iside = 0;
1329    unsigned int N0 = N;
1330    int N_B;
1331    int N_B0;
1332    int B0 = blocks;
1333    int time_divide = 0;
1334    int recombine = 0;
1335    int inv = 0;
1336    float mid = 0, side = 0;
1337    int longblocks = (B0 == 1);
1338    unsigned int cm = 0;
1339
1340    N_B0 = N_B = N / blocks;
1341    split = dualstereo = (Y != NULL);
1342
1343    if (N == 1) {
1344        /* special case for one sample */
1345        int i;
1346        float *x = X;
1347        for (i = 0; i <= dualstereo; i++) {
1348            int sign = 0;
1349            if (s->remaining2 >= 1<<3) {
1350                sign           = opus_getrawbits(rc, 1);
1351                s->remaining2 -= 1 << 3;
1352                b             -= 1 << 3;
1353            }
1354            x[0] = sign ? -1.0f : 1.0f;
1355            x = Y;
1356        }
1357        if (lowband_out)
1358            lowband_out[0] = X[0];
1359        return 1;
1360    }
1361
1362    if (!dualstereo && level == 0) {
1363        int tf_change = s->tf_change[band];
1364        int k;
1365        if (tf_change > 0)
1366            recombine = tf_change;
1367        /* Band recombining to increase frequency resolution */
1368
1369        if (lowband &&
1370            (recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) {
1371            int j;
1372            for (j = 0; j < N; j++)
1373                lowband_scratch[j] = lowband[j];
1374            lowband = lowband_scratch;
1375        }
1376
1377        for (k = 0; k < recombine; k++) {
1378            if (lowband)
1379                celt_haar1(lowband, N >> k, 1 << k);
1380            fill = celt_bit_interleave[fill & 0xF] | celt_bit_interleave[fill >> 4] << 2;
1381        }
1382        blocks >>= recombine;
1383        N_B <<= recombine;
1384
1385        /* Increasing the time resolution */
1386        while ((N_B & 1) == 0 && tf_change < 0) {
1387            if (lowband)
1388                celt_haar1(lowband, N_B, blocks);
1389            fill |= fill << blocks;
1390            blocks <<= 1;
1391            N_B >>= 1;
1392            time_divide++;
1393            tf_change++;
1394        }
1395        B0 = blocks;
1396        N_B0 = N_B;
1397
1398        /* Reorganize the samples in time order instead of frequency order */
1399        if (B0 > 1 && lowband)
1400            celt_deinterleave_hadamard(s->scratch, lowband, N_B >> recombine,
1401                                       B0 << recombine, longblocks);
1402    }
1403
1404    /* If we need 1.5 more bit than we can produce, split the band in two. */
1405    cache = celt_cache_bits +
1406            celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band];
1407    if (!dualstereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) {
1408        N >>= 1;
1409        Y = X + N;
1410        split = 1;
1411        duration -= 1;
1412        if (blocks == 1)
1413            fill = (fill & 1) | (fill << 1);
1414        blocks = (blocks + 1) >> 1;
1415    }
1416
1417    if (split) {
1418        int qn;
1419        int itheta = 0;
1420        int mbits, sbits, delta;
1421        int qalloc;
1422        int pulse_cap;
1423        int offset;
1424        int orig_fill;
1425        int tell;
1426
1427        /* Decide on the resolution to give to the split parameter theta */
1428        pulse_cap = celt_log_freq_range[band] + duration * 8;
1429        offset = (pulse_cap >> 1) - (dualstereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE :
1430                                                          CELT_QTHETA_OFFSET);
1431        qn = (dualstereo && band >= s->intensitystereo) ? 1 :
1432             celt_compute_qn(N, b, offset, pulse_cap, dualstereo);
1433        tell = opus_rc_tell_frac(rc);
1434        if (qn != 1) {
1435            /* Entropy coding of the angle. We use a uniform pdf for the
1436            time split, a step for stereo, and a triangular one for the rest. */
1437            if (dualstereo && N > 2)
1438                itheta = opus_rc_stepmodel(rc, qn/2);
1439            else if (dualstereo || B0 > 1)
1440                itheta = opus_rc_unimodel(rc, qn+1);
1441            else
1442                itheta = opus_rc_trimodel(rc, qn);
1443            itheta = itheta * 16384 / qn;
1444            /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
1445            Let's do that at higher complexity */
1446        } else if (dualstereo) {
1447            inv = (b > 2 << 3 && s->remaining2 > 2 << 3) ? opus_rc_p2model(rc, 2) : 0;
1448            itheta = 0;
1449        }
1450        qalloc = opus_rc_tell_frac(rc) - tell;
1451        b -= qalloc;
1452
1453        orig_fill = fill;
1454        if (itheta == 0) {
1455            imid = 32767;
1456            iside = 0;
1457            fill &= (1 << blocks) - 1;
1458            delta = -16384;
1459        } else if (itheta == 16384) {
1460            imid = 0;
1461            iside = 32767;
1462            fill &= ((1 << blocks) - 1) << blocks;
1463            delta = 16384;
1464        } else {
1465            imid = celt_cos(itheta);
1466            iside = celt_cos(16384-itheta);
1467            /* This is the mid vs side allocation that minimizes squared error
1468            in that band. */
1469            delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid));
1470        }
1471
1472        mid  = imid  / 32768.0f;
1473        side = iside / 32768.0f;
1474
1475        /* This is a special case for N=2 that only works for stereo and takes
1476        advantage of the fact that mid and side are orthogonal to encode
1477        the side with just one bit. */
1478        if (N == 2 && dualstereo) {
1479            int c;
1480            int sign = 0;
1481            float tmp;
1482            float *x2, *y2;
1483            mbits = b;
1484            /* Only need one bit for the side */
1485            sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0;
1486            mbits -= sbits;
1487            c = (itheta > 8192);
1488            s->remaining2 -= qalloc+sbits;
1489
1490            x2 = c ? Y : X;
1491            y2 = c ? X : Y;
1492            if (sbits)
1493                sign = opus_getrawbits(rc, 1);
1494            sign = 1 - 2 * sign;
1495            /* We use orig_fill here because we want to fold the side, but if
1496            itheta==16384, we'll have cleared the low bits of fill. */
1497            cm = celt_decode_band(s, rc, band, x2, NULL, N, mbits, blocks,
1498                                  lowband, duration, lowband_out, level, gain,
1499                                  lowband_scratch, orig_fill);
1500            /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1501            and there's no need to worry about mixing with the other channel. */
1502            y2[0] = -sign * x2[1];
1503            y2[1] =  sign * x2[0];
1504            X[0] *= mid;
1505            X[1] *= mid;
1506            Y[0] *= side;
1507            Y[1] *= side;
1508            tmp = X[0];
1509            X[0] = tmp - Y[0];
1510            Y[0] = tmp + Y[0];
1511            tmp = X[1];
1512            X[1] = tmp - Y[1];
1513            Y[1] = tmp + Y[1];
1514        } else {
1515            /* "Normal" split code */
1516            float *next_lowband2     = NULL;
1517            float *next_lowband_out1 = NULL;
1518            int next_level = 0;
1519            int rebalance;
1520
1521            /* Give more bits to low-energy MDCTs than they would
1522             * otherwise deserve */
1523            if (B0 > 1 && !dualstereo && (itheta & 0x3fff)) {
1524                if (itheta > 8192)
1525                    /* Rough approximation for pre-echo masking */
1526                    delta -= delta >> (4 - duration);
1527                else
1528                    /* Corresponds to a forward-masking slope of
1529                     * 1.5 dB per 10 ms */
1530                    delta = FFMIN(0, delta + (N << 3 >> (5 - duration)));
1531            }
1532            mbits = av_clip((b - delta) / 2, 0, b);
1533            sbits = b - mbits;
1534            s->remaining2 -= qalloc;
1535
1536            if (lowband && !dualstereo)
1537                next_lowband2 = lowband + N; /* >32-bit split case */
1538
1539            /* Only stereo needs to pass on lowband_out.
1540             * Otherwise, it's handled at the end */
1541            if (dualstereo)
1542                next_lowband_out1 = lowband_out;
1543            else
1544                next_level = level + 1;
1545
1546            rebalance = s->remaining2;
1547            if (mbits >= sbits) {
1548                /* In stereo mode, we do not apply a scaling to the mid
1549                 * because we need the normalized mid for folding later */
1550                cm = celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
1551                                      lowband, duration, next_lowband_out1,
1552                                      next_level, dualstereo ? 1.0f : (gain * mid),
1553                                      lowband_scratch, fill);
1554
1555                rebalance = mbits - (rebalance - s->remaining2);
1556                if (rebalance > 3 << 3 && itheta != 0)
1557                    sbits += rebalance - (3 << 3);
1558
1559                /* For a stereo split, the high bits of fill are always zero,
1560                 * so no folding will be done to the side. */
1561                cm |= celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
1562                                       next_lowband2, duration, NULL,
1563                                       next_level, gain * side, NULL,
1564                                       fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
1565            } else {
1566                /* For a stereo split, the high bits of fill are always zero,
1567                 * so no folding will be done to the side. */
1568                cm = celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
1569                                      next_lowband2, duration, NULL,
1570                                      next_level, gain * side, NULL,
1571                                      fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
1572
1573                rebalance = sbits - (rebalance - s->remaining2);
1574                if (rebalance > 3 << 3 && itheta != 16384)
1575                    mbits += rebalance - (3 << 3);
1576
1577                /* In stereo mode, we do not apply a scaling to the mid because
1578                 * we need the normalized mid for folding later */
1579                cm |= celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
1580                                       lowband, duration, next_lowband_out1,
1581                                       next_level, dualstereo ? 1.0f : (gain * mid),
1582                                       lowband_scratch, fill);
1583            }
1584        }
1585    } else {
1586        /* This is the basic no-split case */
1587        unsigned int q         = celt_bits2pulses(cache, b);
1588        unsigned int curr_bits = celt_pulses2bits(cache, q);
1589        s->remaining2 -= curr_bits;
1590
1591        /* Ensures we can never bust the budget */
1592        while (s->remaining2 < 0 && q > 0) {
1593            s->remaining2 += curr_bits;
1594            curr_bits      = celt_pulses2bits(cache, --q);
1595            s->remaining2 -= curr_bits;
1596        }
1597
1598        if (q != 0) {
1599            /* Finally do the actual quantization */
1600            cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
1601                                  s->spread, blocks, gain);
1602        } else {
1603            /* If there's no pulse, fill the band anyway */
1604            int j;
1605            unsigned int cm_mask = (1 << blocks) - 1;
1606            fill &= cm_mask;
1607            if (!fill) {
1608                for (j = 0; j < N; j++)
1609                    X[j] = 0.0f;
1610            } else {
1611                if (lowband == NULL) {
1612                    /* Noise */
1613                    for (j = 0; j < N; j++)
1614                        X[j] = (((int32_t)celt_rng(s)) >> 20);
1615                    cm = cm_mask;
1616                } else {
1617                    /* Folded spectrum */
1618                    for (j = 0; j < N; j++) {
1619                        /* About 48 dB below the "normal" folding level */
1620                        X[j] = lowband[j] + (((celt_rng(s)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
1621                    }
1622                    cm = fill;
1623                }
1624                celt_renormalize_vector(X, N, gain);
1625            }
1626        }
1627    }
1628
1629    /* This code is used by the decoder and by the resynthesis-enabled encoder */
1630    if (dualstereo) {
1631        int j;
1632        if (N != 2)
1633            celt_stereo_merge(X, Y, mid, N);
1634        if (inv) {
1635            for (j = 0; j < N; j++)
1636                Y[j] *= -1;
1637        }
1638    } else if (level == 0) {
1639        int k;
1640
1641        /* Undo the sample reorganization going from time order to frequency order */
1642        if (B0 > 1)
1643            celt_interleave_hadamard(s->scratch, X, N_B>>recombine,
1644                                     B0<<recombine, longblocks);
1645
1646        /* Undo time-freq changes that we did earlier */
1647        N_B = N_B0;
1648        blocks = B0;
1649        for (k = 0; k < time_divide; k++) {
1650            blocks >>= 1;
1651            N_B <<= 1;
1652            cm |= cm >> blocks;
1653            celt_haar1(X, N_B, blocks);
1654        }
1655
1656        for (k = 0; k < recombine; k++) {
1657            cm = celt_bit_deinterleave[cm];
1658            celt_haar1(X, N0>>k, 1<<k);
1659        }
1660        blocks <<= recombine;
1661
1662        /* Scale output for later folding */
1663        if (lowband_out) {
1664            int j;
1665            float n = sqrtf(N0);
1666            for (j = 0; j < N0; j++)
1667                lowband_out[j] = n * X[j];
1668        }
1669        cm &= (1 << blocks) - 1;
1670    }
1671    return cm;
1672}
1673
1674static void celt_denormalize(CeltContext *s, CeltFrame *frame, float *data)
1675{
1676    int i, j;
1677
1678    for (i = s->startband; i < s->endband; i++) {
1679        float *dst = data + (celt_freq_bands[i] << s->duration);
1680        float norm = pow(2, frame->energy[i] + celt_mean_energy[i]);
1681
1682        for (j = 0; j < celt_freq_range[i] << s->duration; j++)
1683            dst[j] *= norm;
1684    }
1685}
1686
1687static void celt_postfilter_apply_transition(CeltFrame *frame, float *data)
1688{
1689    const int T0 = frame->pf_period_old;
1690    const int T1 = frame->pf_period;
1691
1692    float g00, g01, g02;
1693    float g10, g11, g12;
1694
1695    float x0, x1, x2, x3, x4;
1696
1697    int i;
1698
1699    if (frame->pf_gains[0]     == 0.0 &&
1700        frame->pf_gains_old[0] == 0.0)
1701        return;
1702
1703    g00 = frame->pf_gains_old[0];
1704    g01 = frame->pf_gains_old[1];
1705    g02 = frame->pf_gains_old[2];
1706    g10 = frame->pf_gains[0];
1707    g11 = frame->pf_gains[1];
1708    g12 = frame->pf_gains[2];
1709
1710    x1 = data[-T1 + 1];
1711    x2 = data[-T1];
1712    x3 = data[-T1 - 1];
1713    x4 = data[-T1 - 2];
1714
1715    for (i = 0; i < CELT_OVERLAP; i++) {
1716        float w = ff_celt_window2[i];
1717        x0 = data[i - T1 + 2];
1718
1719        data[i] +=  (1.0 - w) * g00 * data[i - T0]                          +
1720                    (1.0 - w) * g01 * (data[i - T0 - 1] + data[i - T0 + 1]) +
1721                    (1.0 - w) * g02 * (data[i - T0 - 2] + data[i - T0 + 2]) +
1722                    w         * g10 * x2                                    +
1723                    w         * g11 * (x1 + x3)                             +
1724                    w         * g12 * (x0 + x4);
1725        x4 = x3;
1726        x3 = x2;
1727        x2 = x1;
1728        x1 = x0;
1729    }
1730}
1731
1732static void celt_postfilter_apply(CeltFrame *frame,
1733                                  float *data, int len)
1734{
1735    const int T = frame->pf_period;
1736    float g0, g1, g2;
1737    float x0, x1, x2, x3, x4;
1738    int i;
1739
1740    if (frame->pf_gains[0] == 0.0 || len <= 0)
1741        return;
1742
1743    g0 = frame->pf_gains[0];
1744    g1 = frame->pf_gains[1];
1745    g2 = frame->pf_gains[2];
1746
1747    x4 = data[-T - 2];
1748    x3 = data[-T - 1];
1749    x2 = data[-T];
1750    x1 = data[-T + 1];
1751
1752    for (i = 0; i < len; i++) {
1753        x0 = data[i - T + 2];
1754        data[i] += g0 * x2        +
1755                   g1 * (x1 + x3) +
1756                   g2 * (x0 + x4);
1757        x4 = x3;
1758        x3 = x2;
1759        x2 = x1;
1760        x1 = x0;
1761    }
1762}
1763
1764static void celt_postfilter(CeltContext *s, CeltFrame *frame)
1765{
1766    int len = s->blocksize * s->blocks;
1767
1768    celt_postfilter_apply_transition(frame, frame->buf + 1024);
1769
1770    frame->pf_period_old = frame->pf_period;
1771    memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains));
1772
1773    frame->pf_period = frame->pf_period_new;
1774    memcpy(frame->pf_gains, frame->pf_gains_new, sizeof(frame->pf_gains));
1775
1776    if (len > CELT_OVERLAP) {
1777        celt_postfilter_apply_transition(frame, frame->buf + 1024 + CELT_OVERLAP);
1778        celt_postfilter_apply(frame, frame->buf + 1024 + 2 * CELT_OVERLAP,
1779                              len - 2 * CELT_OVERLAP);
1780
1781        frame->pf_period_old = frame->pf_period;
1782        memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains));
1783    }
1784
1785    memmove(frame->buf, frame->buf + len, (1024 + CELT_OVERLAP / 2) * sizeof(float));
1786}
1787
1788static int parse_postfilter(CeltContext *s, OpusRangeCoder *rc, int consumed)
1789{
1790    static const float postfilter_taps[3][3] = {
1791        { 0.3066406250f, 0.2170410156f, 0.1296386719f },
1792        { 0.4638671875f, 0.2680664062f, 0.0           },
1793        { 0.7998046875f, 0.1000976562f, 0.0           }
1794    };
1795    int i;
1796
1797    memset(s->frame[0].pf_gains_new, 0, sizeof(s->frame[0].pf_gains_new));
1798    memset(s->frame[1].pf_gains_new, 0, sizeof(s->frame[1].pf_gains_new));
1799
1800    if (s->startband == 0 && consumed + 16 <= s->framebits) {
1801        int has_postfilter = opus_rc_p2model(rc, 1);
1802        if (has_postfilter) {
1803            float gain;
1804            int tapset, octave, period;
1805
1806            octave = opus_rc_unimodel(rc, 6);
1807            period = (16 << octave) + opus_getrawbits(rc, 4 + octave) - 1;
1808            gain   = 0.09375f * (opus_getrawbits(rc, 3) + 1);
1809            tapset = (opus_rc_tell(rc) + 2 <= s->framebits) ?
1810                     opus_rc_getsymbol(rc, celt_model_tapset) : 0;
1811
1812            for (i = 0; i < 2; i++) {
1813                CeltFrame *frame = &s->frame[i];
1814
1815                frame->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
1816                frame->pf_gains_new[0] = gain * postfilter_taps[tapset][0];
1817                frame->pf_gains_new[1] = gain * postfilter_taps[tapset][1];
1818                frame->pf_gains_new[2] = gain * postfilter_taps[tapset][2];
1819            }
1820        }
1821
1822        consumed = opus_rc_tell(rc);
1823    }
1824
1825    return consumed;
1826}
1827
1828static void process_anticollapse(CeltContext *s, CeltFrame *frame, float *X)
1829{
1830    int i, j, k;
1831
1832    for (i = s->startband; i < s->endband; i++) {
1833        int renormalize = 0;
1834        float *xptr;
1835        float prev[2];
1836        float Ediff, r;
1837        float thresh, sqrt_1;
1838        int depth;
1839
1840        /* depth in 1/8 bits */
1841        depth = (1 + s->pulses[i]) / (celt_freq_range[i] << s->duration);
1842        thresh = pow(2, -1.0 - 0.125f * depth);
1843        sqrt_1 = 1.0f / sqrtf(celt_freq_range[i] << s->duration);
1844
1845        xptr = X + (celt_freq_bands[i] << s->duration);
1846
1847        prev[0] = frame->prev_energy[0][i];
1848        prev[1] = frame->prev_energy[1][i];
1849        if (s->coded_channels == 1) {
1850            CeltFrame *frame1 = &s->frame[1];
1851
1852            prev[0] = FFMAX(prev[0], frame1->prev_energy[0][i]);
1853            prev[1] = FFMAX(prev[1], frame1->prev_energy[1][i]);
1854        }
1855        Ediff = frame->energy[i] - FFMIN(prev[0], prev[1]);
1856        Ediff = FFMAX(0, Ediff);
1857
1858        /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
1859        short blocks don't have the same energy as long */
1860        r = pow(2, 1 - Ediff);
1861        if (s->duration == 3)
1862            r *= M_SQRT2;
1863        r = FFMIN(thresh, r) * sqrt_1;
1864        for (k = 0; k < 1 << s->duration; k++) {
1865            /* Detect collapse */
1866            if (!(frame->collapse_masks[i] & 1 << k)) {
1867                /* Fill with noise */
1868                for (j = 0; j < celt_freq_range[i]; j++)
1869                    xptr[(j << s->duration) + k] = (celt_rng(s) & 0x8000) ? r : -r;
1870                renormalize = 1;
1871            }
1872        }
1873
1874        /* We just added some energy, so we need to renormalize */
1875        if (renormalize)
1876            celt_renormalize_vector(xptr, celt_freq_range[i] << s->duration, 1.0f);
1877    }
1878}
1879
1880static void celt_decode_bands(CeltContext *s, OpusRangeCoder *rc)
1881{
1882    float lowband_scratch[8 * 22];
1883    float norm[2 * 8 * 100];
1884
1885    int totalbits = (s->framebits << 3) - s->anticollapse_bit;
1886
1887    int update_lowband = 1;
1888    int lowband_offset = 0;
1889
1890    int i, j;
1891
1892    memset(s->coeffs, 0, sizeof(s->coeffs));
1893
1894    for (i = s->startband; i < s->endband; i++) {
1895        int band_offset = celt_freq_bands[i] << s->duration;
1896        int band_size   = celt_freq_range[i] << s->duration;
1897        float *X = s->coeffs[0] + band_offset;
1898        float *Y = (s->coded_channels == 2) ? s->coeffs[1] + band_offset : NULL;
1899
1900        int consumed = opus_rc_tell_frac(rc);
1901        float *norm2 = norm + 8 * 100;
1902        int effective_lowband = -1;
1903        unsigned int cm[2];
1904        int b;
1905
1906        /* Compute how many bits we want to allocate to this band */
1907        if (i != s->startband)
1908            s->remaining -= consumed;
1909        s->remaining2 = totalbits - consumed - 1;
1910        if (i <= s->codedbands - 1) {
1911            int curr_balance = s->remaining / FFMIN(3, s->codedbands-i);
1912            b = av_clip(FFMIN(s->remaining2 + 1, s->pulses[i] + curr_balance), 0, 16383);
1913        } else
1914            b = 0;
1915
1916        if (celt_freq_bands[i] - celt_freq_range[i] >= celt_freq_bands[s->startband] &&
1917            (update_lowband || lowband_offset == 0))
1918            lowband_offset = i;
1919
1920        /* Get a conservative estimate of the collapse_mask's for the bands we're
1921        going to be folding from. */
1922        if (lowband_offset != 0 && (s->spread != CELT_SPREAD_AGGRESSIVE ||
1923                                    s->blocks > 1 || s->tf_change[i] < 0)) {
1924            int foldstart, foldend;
1925
1926            /* This ensures we never repeat spectral content within one band */
1927            effective_lowband = FFMAX(celt_freq_bands[s->startband],
1928                                      celt_freq_bands[lowband_offset] - celt_freq_range[i]);
1929            foldstart = lowband_offset;
1930            while (celt_freq_bands[--foldstart] > effective_lowband);
1931            foldend = lowband_offset - 1;
1932            while (celt_freq_bands[++foldend] < effective_lowband + celt_freq_range[i]);
1933
1934            cm[0] = cm[1] = 0;
1935            for (j = foldstart; j < foldend; j++) {
1936                cm[0] |= s->frame[0].collapse_masks[j];
1937                cm[1] |= s->frame[s->coded_channels - 1].collapse_masks[j];
1938            }
1939        } else
1940            /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1941            always) be non-zero.*/
1942            cm[0] = cm[1] = (1 << s->blocks) - 1;
1943
1944        if (s->dualstereo && i == s->intensitystereo) {
1945            /* Switch off dual stereo to do intensity */
1946            s->dualstereo = 0;
1947            for (j = celt_freq_bands[s->startband] << s->duration; j < band_offset; j++)
1948                norm[j] = (norm[j] + norm2[j]) / 2;
1949        }
1950
1951        if (s->dualstereo) {
1952            cm[0] = celt_decode_band(s, rc, i, X, NULL, band_size, b / 2, s->blocks,
1953                                     effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration,
1954            norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
1955
1956            cm[1] = celt_decode_band(s, rc, i, Y, NULL, band_size, b/2, s->blocks,
1957                                     effective_lowband != -1 ? norm2 + (effective_lowband << s->duration) : NULL, s->duration,
1958            norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
1959        } else {
1960            cm[0] = celt_decode_band(s, rc, i, X, Y, band_size, b, s->blocks,
1961            effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration,
1962            norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]);
1963
1964            cm[1] = cm[0];
1965        }
1966
1967        s->frame[0].collapse_masks[i]                     = (uint8_t)cm[0];
1968        s->frame[s->coded_channels - 1].collapse_masks[i] = (uint8_t)cm[1];
1969        s->remaining += s->pulses[i] + consumed;
1970
1971        /* Update the folding position only as long as we have 1 bit/sample depth */
1972        update_lowband = (b > band_size << 3);
1973    }
1974}
1975
1976int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
1977                         float **output, int coded_channels, int frame_size,
1978                         int startband,  int endband)
1979{
1980    int i, j;
1981
1982    int consumed;           // bits of entropy consumed thus far for this frame
1983    int silence = 0;
1984    int transient = 0;
1985    int anticollapse = 0;
1986    CeltIMDCTContext *imdct;
1987    float imdct_scale = 1.0;
1988
1989    if (coded_channels != 1 && coded_channels != 2) {
1990        av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n",
1991               coded_channels);
1992        return AVERROR_INVALIDDATA;
1993    }
1994    if (startband < 0 || startband > endband || endband > CELT_MAX_BANDS) {
1995        av_log(s->avctx, AV_LOG_ERROR, "Invalid start/end band: %d %d\n",
1996               startband, endband);
1997        return AVERROR_INVALIDDATA;
1998    }
1999
2000    s->flushed        = 0;
2001    s->coded_channels = coded_channels;
2002    s->startband      = startband;
2003    s->endband        = endband;
2004    s->framebits      = rc->rb.bytes * 8;
2005
2006    s->duration = av_log2(frame_size / CELT_SHORT_BLOCKSIZE);
2007    if (s->duration > CELT_MAX_LOG_BLOCKS ||
2008        frame_size != CELT_SHORT_BLOCKSIZE * (1 << s->duration)) {
2009        av_log(s->avctx, AV_LOG_ERROR, "Invalid CELT frame size: %d\n",
2010               frame_size);
2011        return AVERROR_INVALIDDATA;
2012    }
2013
2014    if (!s->output_channels)
2015        s->output_channels = coded_channels;
2016
2017    memset(s->frame[0].collapse_masks, 0, sizeof(s->frame[0].collapse_masks));
2018    memset(s->frame[1].collapse_masks, 0, sizeof(s->frame[1].collapse_masks));
2019
2020    consumed = opus_rc_tell(rc);
2021
2022    /* obtain silence flag */
2023    if (consumed >= s->framebits)
2024        silence = 1;
2025    else if (consumed == 1)
2026        silence = opus_rc_p2model(rc, 15);
2027
2028
2029    if (silence) {
2030        consumed = s->framebits;
2031        rc->total_read_bits += s->framebits - opus_rc_tell(rc);
2032    }
2033
2034    /* obtain post-filter options */
2035    consumed = parse_postfilter(s, rc, consumed);
2036
2037    /* obtain transient flag */
2038    if (s->duration != 0 && consumed+3 <= s->framebits)
2039        transient = opus_rc_p2model(rc, 3);
2040
2041    s->blocks    = transient ? 1 << s->duration : 1;
2042    s->blocksize = frame_size / s->blocks;
2043
2044    imdct = s->imdct[transient ? 0 : s->duration];
2045
2046    if (coded_channels == 1) {
2047        for (i = 0; i < CELT_MAX_BANDS; i++)
2048            s->frame[0].energy[i] = FFMAX(s->frame[0].energy[i], s->frame[1].energy[i]);
2049    }
2050
2051    celt_decode_coarse_energy(s, rc);
2052    celt_decode_tf_changes   (s, rc, transient);
2053    celt_decode_allocation   (s, rc);
2054    celt_decode_fine_energy  (s, rc);
2055    celt_decode_bands        (s, rc);
2056
2057    if (s->anticollapse_bit)
2058        anticollapse = opus_getrawbits(rc, 1);
2059
2060    celt_decode_final_energy(s, rc, s->framebits - opus_rc_tell(rc));
2061
2062    /* apply anti-collapse processing and denormalization to
2063     * each coded channel */
2064    for (i = 0; i < s->coded_channels; i++) {
2065        CeltFrame *frame = &s->frame[i];
2066
2067        if (anticollapse)
2068            process_anticollapse(s, frame, s->coeffs[i]);
2069
2070        celt_denormalize(s, frame, s->coeffs[i]);
2071    }
2072
2073    /* stereo -> mono downmix */
2074    if (s->output_channels < s->coded_channels) {
2075        s->dsp.vector_fmac_scalar(s->coeffs[0], s->coeffs[1], 1.0, FFALIGN(frame_size, 16));
2076        imdct_scale = 0.5;
2077    } else if (s->output_channels > s->coded_channels)
2078        memcpy(s->coeffs[1], s->coeffs[0], frame_size * sizeof(float));
2079
2080    if (silence) {
2081        for (i = 0; i < 2; i++) {
2082            CeltFrame *frame = &s->frame[i];
2083
2084            for (j = 0; j < FF_ARRAY_ELEMS(frame->energy); j++)
2085                frame->energy[j] = CELT_ENERGY_SILENCE;
2086        }
2087        memset(s->coeffs, 0, sizeof(s->coeffs));
2088    }
2089
2090    /* transform and output for each output channel */
2091    for (i = 0; i < s->output_channels; i++) {
2092        CeltFrame *frame = &s->frame[i];
2093        float m = frame->deemph_coeff;
2094
2095        /* iMDCT and overlap-add */
2096        for (j = 0; j < s->blocks; j++) {
2097            float *dst  = frame->buf + 1024 + j * s->blocksize;
2098
2099            imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
2100                              s->blocks, imdct_scale);
2101            s->dsp.vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
2102                                      celt_window, CELT_OVERLAP / 2);
2103        }
2104
2105        /* postfilter */
2106        celt_postfilter(s, frame);
2107
2108        /* deemphasis and output scaling */
2109        for (j = 0; j < frame_size; j++) {
2110            float tmp = frame->buf[1024 - frame_size + j] + m;
2111            m = tmp * CELT_DEEMPH_COEFF;
2112            output[i][j] = tmp / 32768.;
2113        }
2114        frame->deemph_coeff = m;
2115    }
2116
2117    if (coded_channels == 1)
2118        memcpy(s->frame[1].energy, s->frame[0].energy, sizeof(s->frame[0].energy));
2119
2120    for (i = 0; i < 2; i++ ) {
2121        CeltFrame *frame = &s->frame[i];
2122
2123        if (!transient) {
2124            memcpy(frame->prev_energy[1], frame->prev_energy[0], sizeof(frame->prev_energy[0]));
2125            memcpy(frame->prev_energy[0], frame->energy,         sizeof(frame->prev_energy[0]));
2126        } else {
2127            for (j = 0; j < CELT_MAX_BANDS; j++)
2128                frame->prev_energy[0][j] = FFMIN(frame->prev_energy[0][j], frame->energy[j]);
2129        }
2130
2131        for (j = 0; j < s->startband; j++) {
2132            frame->prev_energy[0][j] = CELT_ENERGY_SILENCE;
2133            frame->energy[j]         = 0.0;
2134        }
2135        for (j = s->endband; j < CELT_MAX_BANDS; j++) {
2136            frame->prev_energy[0][j] = CELT_ENERGY_SILENCE;
2137            frame->energy[j]         = 0.0;
2138        }
2139    }
2140
2141    s->seed = rc->range;
2142
2143    return 0;
2144}
2145
2146void ff_celt_flush(CeltContext *s)
2147{
2148    int i, j;
2149
2150    if (s->flushed)
2151        return;
2152
2153    for (i = 0; i < 2; i++) {
2154        CeltFrame *frame = &s->frame[i];
2155
2156        for (j = 0; j < CELT_MAX_BANDS; j++)
2157            frame->prev_energy[0][j] = frame->prev_energy[1][j] = CELT_ENERGY_SILENCE;
2158
2159        memset(frame->energy, 0, sizeof(frame->energy));
2160        memset(frame->buf,    0, sizeof(frame->buf));
2161
2162        memset(frame->pf_gains,     0, sizeof(frame->pf_gains));
2163        memset(frame->pf_gains_old, 0, sizeof(frame->pf_gains_old));
2164        memset(frame->pf_gains_new, 0, sizeof(frame->pf_gains_new));
2165
2166        frame->deemph_coeff = 0.0;
2167    }
2168    s->seed = 0;
2169
2170    s->flushed = 1;
2171}
2172
2173void ff_celt_free(CeltContext **ps)
2174{
2175    CeltContext *s = *ps;
2176    int i;
2177
2178    if (!s)
2179        return;
2180
2181    for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++)
2182        ff_celt_imdct_uninit(&s->imdct[i]);
2183
2184    av_freep(ps);
2185}
2186
2187int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels)
2188{
2189    CeltContext *s;
2190    int i, ret;
2191
2192    if (output_channels != 1 && output_channels != 2) {
2193        av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n",
2194               output_channels);
2195        return AVERROR(EINVAL);
2196    }
2197
2198    s = av_mallocz(sizeof(*s));
2199    if (!s)
2200        return AVERROR(ENOMEM);
2201
2202    s->avctx           = avctx;
2203    s->output_channels = output_channels;
2204
2205    for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) {
2206        ret = ff_celt_imdct_init(&s->imdct[i], i + 3);
2207        if (ret < 0)
2208            goto fail;
2209    }
2210
2211    avpriv_float_dsp_init(&s->dsp, avctx->flags & CODEC_FLAG_BITEXACT);
2212
2213    ff_celt_flush(s);
2214
2215    *ps = s;
2216
2217    return 0;
2218fail:
2219    ff_celt_free(&s);
2220    return ret;
2221}
2222