1/* 2 * AAC definitions and structures 3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) 4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file libavcodec/aac.h 25 * AAC definitions and structures 26 * @author Oded Shimon ( ods15 ods15 dyndns org ) 27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) 28 */ 29 30#ifndef AVCODEC_AAC_H 31#define AVCODEC_AAC_H 32 33#include "libavutil/internal.h" 34#include "avcodec.h" 35#include "dsputil.h" 36#include "mpeg4audio.h" 37 38#include <stdint.h> 39 40#define AAC_INIT_VLC_STATIC(num, size) \ 41 INIT_VLC_STATIC(&vlc_spectral[num], 6, ff_aac_spectral_sizes[num], \ 42 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \ 43 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \ 44 size); 45 46#define MAX_CHANNELS 64 47#define MAX_ELEM_ID 16 48 49#define TNS_MAX_ORDER 20 50 51enum AudioObjectType { 52 AOT_NULL, 53 // Support? Name 54 AOT_AAC_MAIN, ///< Y Main 55 AOT_AAC_LC, ///< Y Low Complexity 56 AOT_AAC_SSR, ///< N (code in SoC repo) Scalable Sample Rate 57 AOT_AAC_LTP, ///< N (code in SoC repo) Long Term Prediction 58 AOT_SBR, ///< N (in progress) Spectral Band Replication 59 AOT_AAC_SCALABLE, ///< N Scalable 60 AOT_TWINVQ, ///< N Twin Vector Quantizer 61 AOT_CELP, ///< N Code Excited Linear Prediction 62 AOT_HVXC, ///< N Harmonic Vector eXcitation Coding 63 AOT_TTSI = 12, ///< N Text-To-Speech Interface 64 AOT_MAINSYNTH, ///< N Main Synthesis 65 AOT_WAVESYNTH, ///< N Wavetable Synthesis 66 AOT_MIDI, ///< N General MIDI 67 AOT_SAFX, ///< N Algorithmic Synthesis and Audio Effects 68 AOT_ER_AAC_LC, ///< N Error Resilient Low Complexity 69 AOT_ER_AAC_LTP = 19, ///< N Error Resilient Long Term Prediction 70 AOT_ER_AAC_SCALABLE, ///< N Error Resilient Scalable 71 AOT_ER_TWINVQ, ///< N Error Resilient Twin Vector Quantizer 72 AOT_ER_BSAC, ///< N Error Resilient Bit-Sliced Arithmetic Coding 73 AOT_ER_AAC_LD, ///< N Error Resilient Low Delay 74 AOT_ER_CELP, ///< N Error Resilient Code Excited Linear Prediction 75 AOT_ER_HVXC, ///< N Error Resilient Harmonic Vector eXcitation Coding 76 AOT_ER_HILN, ///< N Error Resilient Harmonic and Individual Lines plus Noise 77 AOT_ER_PARAM, ///< N Error Resilient Parametric 78 AOT_SSC, ///< N SinuSoidal Coding 79}; 80 81enum RawDataBlockType { 82 TYPE_SCE, 83 TYPE_CPE, 84 TYPE_CCE, 85 TYPE_LFE, 86 TYPE_DSE, 87 TYPE_PCE, 88 TYPE_FIL, 89 TYPE_END, 90}; 91 92enum ExtensionPayloadID { 93 EXT_FILL, 94 EXT_FILL_DATA, 95 EXT_DATA_ELEMENT, 96 EXT_DYNAMIC_RANGE = 0xb, 97 EXT_SBR_DATA = 0xd, 98 EXT_SBR_DATA_CRC = 0xe, 99}; 100 101enum WindowSequence { 102 ONLY_LONG_SEQUENCE, 103 LONG_START_SEQUENCE, 104 EIGHT_SHORT_SEQUENCE, 105 LONG_STOP_SEQUENCE, 106}; 107 108enum BandType { 109 ZERO_BT = 0, ///< Scalefactors and spectral data are all zero. 110 FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word. 111 ESC_BT = 11, ///< Spectral data are coded with an escape sequence. 112 NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream. 113 INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions. 114 INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions. 115}; 116 117#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10) 118 119enum ChannelPosition { 120 AAC_CHANNEL_FRONT = 1, 121 AAC_CHANNEL_SIDE = 2, 122 AAC_CHANNEL_BACK = 3, 123 AAC_CHANNEL_LFE = 4, 124 AAC_CHANNEL_CC = 5, 125}; 126 127/** 128 * The point during decoding at which channel coupling is applied. 129 */ 130enum CouplingPoint { 131 BEFORE_TNS, 132 BETWEEN_TNS_AND_IMDCT, 133 AFTER_IMDCT = 3, 134}; 135 136/** 137 * Predictor State 138 */ 139typedef struct { 140 float cor0; 141 float cor1; 142 float var0; 143 float var1; 144 float r0; 145 float r1; 146} PredictorState; 147 148#define MAX_PREDICTORS 672 149 150/** 151 * Individual Channel Stream 152 */ 153typedef struct { 154 uint8_t max_sfb; ///< number of scalefactor bands per group 155 enum WindowSequence window_sequence[2]; 156 uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window. 157 int num_window_groups; 158 uint8_t group_len[8]; 159 const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window 160 int num_swb; ///< number of scalefactor window bands 161 int num_windows; 162 int tns_max_bands; 163 int predictor_present; 164 int predictor_initialized; 165 int predictor_reset_group; 166 uint8_t prediction_used[41]; 167} IndividualChannelStream; 168 169/** 170 * Temporal Noise Shaping 171 */ 172typedef struct { 173 int present; 174 int n_filt[8]; 175 int length[8][4]; 176 int direction[8][4]; 177 int order[8][4]; 178 float coef[8][4][TNS_MAX_ORDER]; 179} TemporalNoiseShaping; 180 181/** 182 * Dynamic Range Control - decoded from the bitstream but not processed further. 183 */ 184typedef struct { 185 int pce_instance_tag; ///< Indicates with which program the DRC info is associated. 186 int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative 187 int dyn_rng_ctl[17]; ///< DRC magnitude information 188 int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing. 189 int band_incr; ///< Number of DRC bands greater than 1 having DRC info. 190 int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain. 191 int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines. 192 int prog_ref_level; /**< A reference level for the long-term program audio level for all 193 * channels combined. 194 */ 195} DynamicRangeControl; 196 197typedef struct { 198 int num_pulse; 199 int pos[4]; 200 int amp[4]; 201} Pulse; 202 203/** 204 * coupling parameters 205 */ 206typedef struct { 207 enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. 208 int num_coupled; ///< number of target elements 209 enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. 210 int id_select[8]; ///< element id 211 int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; 212 * [2] list of gains for left channel; [3] lists of gains for both channels 213 */ 214 float gain[16][120]; 215} ChannelCoupling; 216 217/** 218 * Single Channel Element - used for both SCE and LFE elements. 219 */ 220typedef struct { 221 IndividualChannelStream ics; 222 TemporalNoiseShaping tns; 223 enum BandType band_type[120]; ///< band types 224 int band_type_run_end[120]; ///< band type run end points 225 float sf[120]; ///< scalefactors 226 DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT 227 DECLARE_ALIGNED_16(float, saved[512]); ///< overlap 228 DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output 229 PredictorState predictor_state[MAX_PREDICTORS]; 230} SingleChannelElement; 231 232/** 233 * channel element - generic struct for SCE/CPE/CCE/LFE 234 */ 235typedef struct { 236 // CPE specific 237 uint8_t ms_mask[120]; ///< Set if mid/side stereo is used for each scalefactor window band 238 // shared 239 SingleChannelElement ch[2]; 240 // CCE specific 241 ChannelCoupling coup; 242} ChannelElement; 243 244/** 245 * main AAC context 246 */ 247typedef struct { 248 AVCodecContext * avccontext; 249 250 MPEG4AudioConfig m4ac; 251 252 int is_saved; ///< Set if elements have stored overlap from previous frame. 253 DynamicRangeControl che_drc; 254 255 /** 256 * @defgroup elements Channel element related data. 257 * @{ 258 */ 259 enum ChannelPosition che_pos[4][MAX_ELEM_ID]; /**< channel element channel mapping with the 260 * first index as the first 4 raw data block types 261 */ 262 ChannelElement * che[4][MAX_ELEM_ID]; 263 ChannelElement * tag_che_map[4][MAX_ELEM_ID]; 264 int tags_mapped; 265 /** @} */ 266 267 /** 268 * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) 269 * @{ 270 */ 271 DECLARE_ALIGNED_16(float, buf_mdct[1024]); 272 /** @} */ 273 274 /** 275 * @defgroup tables Computed / set up during initialization. 276 * @{ 277 */ 278 MDCTContext mdct; 279 MDCTContext mdct_small; 280 DSPContext dsp; 281 int random_state; 282 /** @} */ 283 284 /** 285 * @defgroup output Members used for output interleaving. 286 * @{ 287 */ 288 float *output_data[MAX_CHANNELS]; ///< Points to each element's 'ret' buffer (PCM output). 289 float add_bias; ///< offset for dsp.float_to_int16 290 float sf_scale; ///< Pre-scale for correct IMDCT and dsp.float_to_int16. 291 int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16 292 /** @} */ 293 294 DECLARE_ALIGNED(16, float, temp[128]); 295} AACContext; 296 297#endif /* AVCODEC_AAC_H */ 298