1/*
2 * audio encoder psychoacoustic model
3 * Copyright (C) 2008 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#ifndef AVCODEC_PSYMODEL_H
23#define AVCODEC_PSYMODEL_H
24
25#include "avcodec.h"
26
27/** maximum possible number of bands */
28#define PSY_MAX_BANDS 128
29
30/**
31 * single band psychoacoustic information
32 */
33typedef struct FFPsyBand {
34    int   bits;
35    float energy;
36    float threshold;
37    float distortion;
38    float perceptual_weight;
39} FFPsyBand;
40
41/**
42 * windowing related information
43 */
44typedef struct FFPsyWindowInfo {
45    int window_type[3];               ///< window type (short/long/transitional, etc.) - current, previous and next
46    int window_shape;                 ///< window shape (sine/KBD/whatever)
47    int num_windows;                  ///< number of windows in a frame
48    int grouping[8];                  ///< window grouping (for e.g. AAC)
49    int *window_sizes;                ///< sequence of window sizes inside one frame (for eg. WMA)
50} FFPsyWindowInfo;
51
52/**
53 * context used by psychoacoustic model
54 */
55typedef struct FFPsyContext {
56    AVCodecContext *avctx;            ///< encoder context
57    const struct FFPsyModel *model;   ///< encoder-specific model functions
58
59    FFPsyBand *psy_bands;             ///< frame bands information
60
61    uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes
62    int     *num_bands;               ///< number of scalefactor bands for possible frame sizes
63    int num_lens;                     ///< number of scalefactor band sets
64
65    void* model_priv_data;            ///< psychoacoustic model implementation private data
66} FFPsyContext;
67
68/**
69 * codec-specific psychoacoustic model implementation
70 */
71typedef struct FFPsyModel {
72    const char *name;
73    int  (*init)   (FFPsyContext *apc);
74    FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
75    void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi);
76    void (*end)    (FFPsyContext *apc);
77} FFPsyModel;
78
79/**
80 * Initialize psychoacoustic model.
81 *
82 * @param ctx        model context
83 * @param avctx      codec context
84 * @param num_lens   number of possible frame lengths
85 * @param bands      scalefactor band lengths for all frame lengths
86 * @param num_bands  number of scalefactor bands for all frame lengths
87 *
88 * @return zero if successful, a negative value if not
89 */
90av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
91                        int num_lens,
92                        const uint8_t **bands, const int* num_bands);
93
94/**
95 * Suggest window sequence for channel.
96 *
97 * @param ctx       model context
98 * @param audio     samples for the current frame
99 * @param la        lookahead samples (NULL when unavailable)
100 * @param channel   number of channel element to analyze
101 * @param prev_type previous window type
102 *
103 * @return suggested window information in a structure
104 */
105FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx,
106                                      const int16_t *audio, const int16_t *la,
107                                      int channel, int prev_type);
108
109
110/**
111 * Perform psychoacoustic analysis and set band info (threshold, energy).
112 *
113 * @param ctx     model context
114 * @param channel audio channel number
115 * @param coeffs  pointer to the transformed coefficients
116 * @param wi      window information
117 */
118void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs,
119                          FFPsyWindowInfo *wi);
120
121/**
122 * Cleanup model context at the end.
123 *
124 * @param ctx model context
125 */
126av_cold void ff_psy_end(FFPsyContext *ctx);
127
128
129/**************************************************************************
130 *                       Audio preprocessing stuff.                       *
131 *       This should be moved into some audio filter eventually.          *
132 **************************************************************************/
133struct FFPsyPreprocessContext;
134
135/**
136 * psychoacoustic model audio preprocessing initialization
137 */
138av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx);
139
140/**
141 * Preprocess several channel in audio frame in order to compress it better.
142 *
143 * @param ctx      preprocessing context
144 * @param audio    samples to preprocess
145 * @param dest     place to put filtered samples
146 * @param tag      channel number
147 * @param channels number of channel to preprocess (some additional work may be done on stereo pair)
148 */
149void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
150                       const int16_t *audio, int16_t *dest,
151                       int tag, int channels);
152
153/**
154 * Cleanup audio preprocessing module.
155 */
156av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);
157
158#endif /* AVCODEC_PSYMODEL_H */
159