1/* 2 * audio encoder psychoacoustic model 3 * Copyright (C) 2008 Konstantin Shishkov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#ifndef AVCODEC_PSYMODEL_H 23#define AVCODEC_PSYMODEL_H 24 25#include "avcodec.h" 26 27/** maximum possible number of bands */ 28#define PSY_MAX_BANDS 128 29 30/** 31 * single band psychoacoustic information 32 */ 33typedef struct FFPsyBand { 34 int bits; 35 float energy; 36 float threshold; 37 float distortion; 38 float perceptual_weight; 39} FFPsyBand; 40 41/** 42 * windowing related information 43 */ 44typedef struct FFPsyWindowInfo { 45 int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next 46 int window_shape; ///< window shape (sine/KBD/whatever) 47 int num_windows; ///< number of windows in a frame 48 int grouping[8]; ///< window grouping (for e.g. AAC) 49 int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) 50} FFPsyWindowInfo; 51 52/** 53 * context used by psychoacoustic model 54 */ 55typedef struct FFPsyContext { 56 AVCodecContext *avctx; ///< encoder context 57 const struct FFPsyModel *model; ///< encoder-specific model functions 58 59 FFPsyBand *psy_bands; ///< frame bands information 60 61 uint8_t **bands; ///< scalefactor band sizes for possible frame sizes 62 int *num_bands; ///< number of scalefactor bands for possible frame sizes 63 int num_lens; ///< number of scalefactor band sets 64 65 void* model_priv_data; ///< psychoacoustic model implementation private data 66} FFPsyContext; 67 68/** 69 * codec-specific psychoacoustic model implementation 70 */ 71typedef struct FFPsyModel { 72 const char *name; 73 int (*init) (FFPsyContext *apc); 74 FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); 75 void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi); 76 void (*end) (FFPsyContext *apc); 77} FFPsyModel; 78 79/** 80 * Initialize psychoacoustic model. 81 * 82 * @param ctx model context 83 * @param avctx codec context 84 * @param num_lens number of possible frame lengths 85 * @param bands scalefactor band lengths for all frame lengths 86 * @param num_bands number of scalefactor bands for all frame lengths 87 * 88 * @return zero if successful, a negative value if not 89 */ 90av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, 91 int num_lens, 92 const uint8_t **bands, const int* num_bands); 93 94/** 95 * Suggest window sequence for channel. 96 * 97 * @param ctx model context 98 * @param audio samples for the current frame 99 * @param la lookahead samples (NULL when unavailable) 100 * @param channel number of channel element to analyze 101 * @param prev_type previous window type 102 * 103 * @return suggested window information in a structure 104 */ 105FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, 106 const int16_t *audio, const int16_t *la, 107 int channel, int prev_type); 108 109 110/** 111 * Perform psychoacoustic analysis and set band info (threshold, energy). 112 * 113 * @param ctx model context 114 * @param channel audio channel number 115 * @param coeffs pointer to the transformed coefficients 116 * @param wi window information 117 */ 118void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs, 119 FFPsyWindowInfo *wi); 120 121/** 122 * Cleanup model context at the end. 123 * 124 * @param ctx model context 125 */ 126av_cold void ff_psy_end(FFPsyContext *ctx); 127 128 129/************************************************************************** 130 * Audio preprocessing stuff. * 131 * This should be moved into some audio filter eventually. * 132 **************************************************************************/ 133struct FFPsyPreprocessContext; 134 135/** 136 * psychoacoustic model audio preprocessing initialization 137 */ 138av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx); 139 140/** 141 * Preprocess several channel in audio frame in order to compress it better. 142 * 143 * @param ctx preprocessing context 144 * @param audio samples to preprocess 145 * @param dest place to put filtered samples 146 * @param tag channel number 147 * @param channels number of channel to preprocess (some additional work may be done on stereo pair) 148 */ 149void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, 150 const int16_t *audio, int16_t *dest, 151 int tag, int channels); 152 153/** 154 * Cleanup audio preprocessing module. 155 */ 156av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); 157 158#endif /* AVCODEC_PSYMODEL_H */ 159