1/* 2 * Opus decoder/demuxer common functions 3 * Copyright (c) 2012 Andrew D'Addesio 4 * Copyright (c) 2013-2014 Mozilla Corporation 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#ifndef AVCODEC_OPUS_H 24#define AVCODEC_OPUS_H 25 26#include <stdint.h> 27 28#include "libavutil/audio_fifo.h" 29#include "libavutil/float_dsp.h" 30#include "libavutil/frame.h" 31 32#include "libswresample/swresample.h" 33 34#include "avcodec.h" 35#include "get_bits.h" 36 37#define MAX_FRAME_SIZE 1275 38#define MAX_FRAMES 48 39#define MAX_PACKET_DUR 5760 40 41#define CELT_SHORT_BLOCKSIZE 120 42#define CELT_OVERLAP CELT_SHORT_BLOCKSIZE 43#define CELT_MAX_LOG_BLOCKS 3 44#define CELT_MAX_FRAME_SIZE (CELT_SHORT_BLOCKSIZE * (1 << CELT_MAX_LOG_BLOCKS)) 45#define CELT_MAX_BANDS 21 46#define CELT_VECTORS 11 47#define CELT_ALLOC_STEPS 6 48#define CELT_FINE_OFFSET 21 49#define CELT_MAX_FINE_BITS 8 50#define CELT_NORM_SCALE 16384 51#define CELT_QTHETA_OFFSET 4 52#define CELT_QTHETA_OFFSET_TWOPHASE 16 53#define CELT_DEEMPH_COEFF 0.85000610f 54#define CELT_POSTFILTER_MINPERIOD 15 55#define CELT_ENERGY_SILENCE (-28.0f) 56 57#define SILK_HISTORY 322 58#define SILK_MAX_LPC 16 59 60#define ROUND_MULL(a,b,s) (((MUL64(a, b) >> ((s) - 1)) + 1) >> 1) 61#define ROUND_MUL16(a,b) ((MUL16(a, b) + 16384) >> 15) 62#define opus_ilog(i) (av_log2(i) + !!(i)) 63 64enum OpusMode { 65 OPUS_MODE_SILK, 66 OPUS_MODE_HYBRID, 67 OPUS_MODE_CELT 68}; 69 70enum OpusBandwidth { 71 OPUS_BANDWIDTH_NARROWBAND, 72 OPUS_BANDWIDTH_MEDIUMBAND, 73 OPUS_BANDWIDTH_WIDEBAND, 74 OPUS_BANDWIDTH_SUPERWIDEBAND, 75 OPUS_BANDWIDTH_FULLBAND 76}; 77 78typedef struct RawBitsContext { 79 const uint8_t *position; 80 unsigned int bytes; 81 unsigned int cachelen; 82 unsigned int cacheval; 83} RawBitsContext; 84 85typedef struct OpusRangeCoder { 86 GetBitContext gb; 87 RawBitsContext rb; 88 unsigned int range; 89 unsigned int value; 90 unsigned int total_read_bits; 91} OpusRangeCoder; 92 93typedef struct SilkContext SilkContext; 94 95typedef struct CeltContext CeltContext; 96 97typedef struct OpusPacket { 98 int packet_size; /**< packet size */ 99 int data_size; /**< size of the useful data -- packet size - padding */ 100 int code; /**< packet code: specifies the frame layout */ 101 int stereo; /**< whether this packet is mono or stereo */ 102 int vbr; /**< vbr flag */ 103 int config; /**< configuration: tells the audio mode, 104 ** bandwidth, and frame duration */ 105 int frame_count; /**< frame count */ 106 int frame_offset[MAX_FRAMES]; /**< frame offsets */ 107 int frame_size[MAX_FRAMES]; /**< frame sizes */ 108 int frame_duration; /**< frame duration, in samples @ 48kHz */ 109 enum OpusMode mode; /**< mode */ 110 enum OpusBandwidth bandwidth; /**< bandwidth */ 111} OpusPacket; 112 113typedef struct OpusStreamContext { 114 AVCodecContext *avctx; 115 int output_channels; 116 117 OpusRangeCoder rc; 118 OpusRangeCoder redundancy_rc; 119 SilkContext *silk; 120 CeltContext *celt; 121 AVFloatDSPContext *fdsp; 122 123 float silk_buf[2][960]; 124 float *silk_output[2]; 125 DECLARE_ALIGNED(32, float, celt_buf)[2][960]; 126 float *celt_output[2]; 127 128 float redundancy_buf[2][960]; 129 float *redundancy_output[2]; 130 131 /* data buffers for the final output data */ 132 float *out[2]; 133 int out_size; 134 135 float *out_dummy; 136 int out_dummy_allocated_size; 137 138 SwrContext *swr; 139 AVAudioFifo *celt_delay; 140 int silk_samplerate; 141 /* number of samples we still want to get from the resampler */ 142 int delayed_samples; 143 144 OpusPacket packet; 145 146 int redundancy_idx; 147} OpusStreamContext; 148 149// a mapping between an opus stream and an output channel 150typedef struct ChannelMap { 151 int stream_idx; 152 int channel_idx; 153 154 // when a single decoded channel is mapped to multiple output channels, we 155 // write to the first output directly and copy from it to the others 156 // this field is set to 1 for those copied output channels 157 int copy; 158 // this is the index of the output channel to copy from 159 int copy_idx; 160 161 // this channel is silent 162 int silence; 163} ChannelMap; 164 165typedef struct OpusContext { 166 OpusStreamContext *streams; 167 int nb_streams; 168 int nb_stereo_streams; 169 170 AVFloatDSPContext fdsp; 171 int16_t gain_i; 172 float gain; 173 174 ChannelMap *channel_maps; 175} OpusContext; 176 177static av_always_inline void opus_rc_normalize(OpusRangeCoder *rc) 178{ 179 while (rc->range <= 1<<23) { 180 rc->value = ((rc->value << 8) | (get_bits(&rc->gb, 8) ^ 0xFF)) & ((1u << 31) - 1); 181 rc->range <<= 8; 182 rc->total_read_bits += 8; 183 } 184} 185 186static av_always_inline void opus_rc_update(OpusRangeCoder *rc, unsigned int scale, 187 unsigned int low, unsigned int high, 188 unsigned int total) 189{ 190 rc->value -= scale * (total - high); 191 rc->range = low ? scale * (high - low) 192 : rc->range - scale * (total - high); 193 opus_rc_normalize(rc); 194} 195 196static av_always_inline unsigned int opus_rc_getsymbol(OpusRangeCoder *rc, const uint16_t *cdf) 197{ 198 unsigned int k, scale, total, symbol, low, high; 199 200 total = *cdf++; 201 202 scale = rc->range / total; 203 symbol = rc->value / scale + 1; 204 symbol = total - FFMIN(symbol, total); 205 206 for (k = 0; cdf[k] <= symbol; k++); 207 high = cdf[k]; 208 low = k ? cdf[k-1] : 0; 209 210 opus_rc_update(rc, scale, low, high, total); 211 212 return k; 213} 214 215static av_always_inline unsigned int opus_rc_p2model(OpusRangeCoder *rc, unsigned int bits) 216{ 217 unsigned int k, scale; 218 scale = rc->range >> bits; // in this case, scale = symbol 219 220 if (rc->value >= scale) { 221 rc->value -= scale; 222 rc->range -= scale; 223 k = 0; 224 } else { 225 rc->range = scale; 226 k = 1; 227 } 228 opus_rc_normalize(rc); 229 return k; 230} 231 232/** 233 * CELT: estimate bits of entropy that have thus far been consumed for the 234 * current CELT frame, to integer and fractional (1/8th bit) precision 235 */ 236static av_always_inline unsigned int opus_rc_tell(const OpusRangeCoder *rc) 237{ 238 return rc->total_read_bits - av_log2(rc->range) - 1; 239} 240 241static av_always_inline unsigned int opus_rc_tell_frac(const OpusRangeCoder *rc) 242{ 243 unsigned int i, total_bits, rcbuffer, range; 244 245 total_bits = rc->total_read_bits << 3; 246 rcbuffer = av_log2(rc->range) + 1; 247 range = rc->range >> (rcbuffer-16); 248 249 for (i = 0; i < 3; i++) { 250 int bit; 251 range = range * range >> 15; 252 bit = range >> 16; 253 rcbuffer = rcbuffer << 1 | bit; 254 range >>= bit; 255 } 256 257 return total_bits - rcbuffer; 258} 259 260/** 261 * CELT: read 1-25 raw bits at the end of the frame, backwards byte-wise 262 */ 263static av_always_inline unsigned int opus_getrawbits(OpusRangeCoder *rc, unsigned int count) 264{ 265 unsigned int value = 0; 266 267 while (rc->rb.bytes && rc->rb.cachelen < count) { 268 rc->rb.cacheval |= *--rc->rb.position << rc->rb.cachelen; 269 rc->rb.cachelen += 8; 270 rc->rb.bytes--; 271 } 272 273 value = rc->rb.cacheval & ((1<<count)-1); 274 rc->rb.cacheval >>= count; 275 rc->rb.cachelen -= count; 276 rc->total_read_bits += count; 277 278 return value; 279} 280 281/** 282 * CELT: read a uniform distribution 283 */ 284static av_always_inline unsigned int opus_rc_unimodel(OpusRangeCoder *rc, unsigned int size) 285{ 286 unsigned int bits, k, scale, total; 287 288 bits = opus_ilog(size - 1); 289 total = (bits > 8) ? ((size - 1) >> (bits - 8)) + 1 : size; 290 291 scale = rc->range / total; 292 k = rc->value / scale + 1; 293 k = total - FFMIN(k, total); 294 opus_rc_update(rc, scale, k, k + 1, total); 295 296 if (bits > 8) { 297 k = k << (bits - 8) | opus_getrawbits(rc, bits - 8); 298 return FFMIN(k, size - 1); 299 } else 300 return k; 301} 302 303static av_always_inline int opus_rc_laplace(OpusRangeCoder *rc, unsigned int symbol, int decay) 304{ 305 /* extends the range coder to model a Laplace distribution */ 306 int value = 0; 307 unsigned int scale, low = 0, center; 308 309 scale = rc->range >> 15; 310 center = rc->value / scale + 1; 311 center = (1 << 15) - FFMIN(center, 1 << 15); 312 313 if (center >= symbol) { 314 value++; 315 low = symbol; 316 symbol = 1 + ((32768 - 32 - symbol) * (16384-decay) >> 15); 317 318 while (symbol > 1 && center >= low + 2 * symbol) { 319 value++; 320 symbol *= 2; 321 low += symbol; 322 symbol = (((symbol - 2) * decay) >> 15) + 1; 323 } 324 325 if (symbol <= 1) { 326 int distance = (center - low) >> 1; 327 value += distance; 328 low += 2 * distance; 329 } 330 331 if (center < low + symbol) 332 value *= -1; 333 else 334 low += symbol; 335 } 336 337 opus_rc_update(rc, scale, low, FFMIN(low + symbol, 32768), 32768); 338 339 return value; 340} 341 342static av_always_inline unsigned int opus_rc_stepmodel(OpusRangeCoder *rc, int k0) 343{ 344 /* Use a probability of 3 up to itheta=8192 and then use 1 after */ 345 unsigned int k, scale, symbol, total = (k0+1)*3 + k0; 346 scale = rc->range / total; 347 symbol = rc->value / scale + 1; 348 symbol = total - FFMIN(symbol, total); 349 350 k = (symbol < (k0+1)*3) ? symbol/3 : symbol - (k0+1)*2; 351 352 opus_rc_update(rc, scale, (k <= k0) ? 3*(k+0) : (k-1-k0) + 3*(k0+1), 353 (k <= k0) ? 3*(k+1) : (k-0-k0) + 3*(k0+1), total); 354 return k; 355} 356 357static av_always_inline unsigned int opus_rc_trimodel(OpusRangeCoder *rc, int qn) 358{ 359 unsigned int k, scale, symbol, total, low, center; 360 361 total = ((qn>>1) + 1) * ((qn>>1) + 1); 362 scale = rc->range / total; 363 center = rc->value / scale + 1; 364 center = total - FFMIN(center, total); 365 366 if (center < total >> 1) { 367 k = (ff_sqrt(8 * center + 1) - 1) >> 1; 368 low = k * (k + 1) >> 1; 369 symbol = k + 1; 370 } else { 371 k = (2*(qn + 1) - ff_sqrt(8*(total - center - 1) + 1)) >> 1; 372 low = total - ((qn + 1 - k) * (qn + 2 - k) >> 1); 373 symbol = qn + 1 - k; 374 } 375 376 opus_rc_update(rc, scale, low, low + symbol, total); 377 378 return k; 379} 380 381int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size, 382 int self_delimited); 383 384int ff_opus_parse_extradata(AVCodecContext *avctx, OpusContext *s); 385 386int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int output_channels); 387void ff_silk_free(SilkContext **ps); 388void ff_silk_flush(SilkContext *s); 389 390/** 391 * Decode the LP layer of one Opus frame (which may correspond to several SILK 392 * frames). 393 */ 394int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc, 395 float *output[2], 396 enum OpusBandwidth bandwidth, int coded_channels, 397 int duration_ms); 398 399int ff_celt_init(AVCodecContext *avctx, CeltContext **s, int output_channels); 400 401void ff_celt_free(CeltContext **s); 402 403void ff_celt_flush(CeltContext *s); 404 405int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, 406 float **output, int coded_channels, int frame_size, 407 int startband, int endband); 408 409extern const float ff_celt_window2[120]; 410 411#endif /* AVCODEC_OPUS_H */ 412